diff options
Diffstat (limited to 'net/ipv6')
67 files changed, 2049 insertions, 1265 deletions
diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 2e8c06108ab9..0f3f1999719a 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -48,4 +48,5 @@ obj-$(subst m,y,$(CONFIG_IPV6)) += inet6_hashtables.o ifneq ($(CONFIG_IPV6),) obj-$(CONFIG_NET_UDP_TUNNEL) += ip6_udp_tunnel.o +obj-y += mcast_snoop.o endif diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index b6030025f411..21c2c818df3b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -46,6 +46,7 @@ #include <linux/socket.h> #include <linux/sockios.h> #include <linux/net.h> +#include <linux/inet.h> #include <linux/in6.h> #include <linux/netdevice.h> #include <linux/if_addr.h> @@ -102,6 +103,9 @@ #define INFINITY_LIFE_TIME 0xFFFFFFFF +#define IPV6_MAX_STRLEN \ + sizeof("ffff:ffff:ffff:ffff:ffff:ffff:255.255.255.255") + static inline u32 cstamp_delta(unsigned long cstamp) { return (cstamp - INITIAL_JIFFIES) * 100UL / HZ; @@ -127,6 +131,9 @@ static void ipv6_regen_rndid(unsigned long data); static int ipv6_generate_eui64(u8 *eui, struct net_device *dev); static int ipv6_count_addresses(struct inet6_dev *idev); +static int ipv6_generate_stable_address(struct in6_addr *addr, + u8 dad_count, + const struct inet6_dev *idev); /* * Configured unicast address hash table @@ -202,6 +209,9 @@ static struct ipv6_devconf ipv6_devconf __read_mostly = { .accept_dad = 1, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, + .stable_secret = { + .initialized = false, + } }; static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { @@ -240,6 +250,9 @@ static struct ipv6_devconf ipv6_devconf_dflt __read_mostly = { .accept_dad = 1, .suppress_frag_ndisc = 1, .accept_ra_mtu = 1, + .stable_secret = { + .initialized = false, + }, }; /* Check if a valid qdisc is available */ @@ -321,7 +334,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) return ERR_PTR(-EINVAL); ndev = kzalloc(sizeof(struct inet6_dev), GFP_KERNEL); - if (ndev == NULL) + if (!ndev) return ERR_PTR(err); rwlock_init(&ndev->lock); @@ -333,7 +346,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev) ndev->cnf.mtu6 = dev->mtu; ndev->cnf.sysctl = NULL; ndev->nd_parms = neigh_parms_alloc(dev, &nd_tbl); - if (ndev->nd_parms == NULL) { + if (!ndev->nd_parms) { kfree(ndev); return ERR_PTR(err); } @@ -468,7 +481,7 @@ static int inet6_netconf_fill_devconf(struct sk_buff *skb, int ifindex, nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct netconfmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; ncm = nlmsg_data(nlh); @@ -506,7 +519,7 @@ void inet6_netconf_notify_devconf(struct net *net, int type, int ifindex, int err = -ENOBUFS; skb = nlmsg_new(inet6_netconf_msgsize_devconf(type), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_netconf_fill_devconf(skb, ifindex, devconf, 0, 0, @@ -561,10 +574,10 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, break; default: dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) + if (!dev) goto errout; in6_dev = __in6_dev_get(dev); - if (in6_dev == NULL) + if (!in6_dev) goto errout; devconf = &in6_dev->cnf; break; @@ -572,7 +585,7 @@ static int inet6_netconf_get_devconf(struct sk_buff *in_skb, err = -ENOBUFS; skb = nlmsg_new(inet6_netconf_msgsize_devconf(-1), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_netconf_fill_devconf(skb, ifindex, devconf, @@ -841,7 +854,7 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa = kzalloc(sizeof(struct inet6_ifaddr), GFP_ATOMIC); - if (ifa == NULL) { + if (!ifa) { ADBG("ipv6_add_addr: malloc failed\n"); err = -ENOBUFS; goto out; @@ -860,7 +873,6 @@ ipv6_add_addr(struct inet6_dev *idev, const struct in6_addr *addr, ifa->peer_addr = *peer_addr; spin_lock_init(&ifa->lock); - spin_lock_init(&ifa->state_lock); INIT_DELAYED_WORK(&ifa->dad_work, addrconf_dad_work); INIT_HLIST_NODE(&ifa->addr_lst); ifa->scope = scope; @@ -1003,10 +1015,10 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) ASSERT_RTNL(); - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); state = ifp->state; ifp->state = INET6_IFADDR_STATE_DEAD; - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); if (state == INET6_IFADDR_STATE_DEAD) goto out; @@ -1546,7 +1558,7 @@ int ipv6_chk_addr_and_flags(struct net *net, const struct in6_addr *addr, : ifp->flags; if (ipv6_addr_equal(&ifp->addr, addr) && !(ifp_flags&banned_flags) && - (dev == NULL || ifp->idev->dev == dev || + (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict))) { rcu_read_unlock_bh(); return 1; @@ -1568,7 +1580,7 @@ static bool ipv6_chk_same_addr(struct net *net, const struct in6_addr *addr, if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { - if (dev == NULL || ifp->idev->dev == dev) + if (!dev || ifp->idev->dev == dev) return true; } } @@ -1637,7 +1649,7 @@ struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, const struct in6_addr *add if (!net_eq(dev_net(ifp->idev->dev), net)) continue; if (ipv6_addr_equal(&ifp->addr, addr)) { - if (dev == NULL || ifp->idev->dev == dev || + if (!dev || ifp->idev->dev == dev || !(ifp->scope&(IFA_LINK|IFA_HOST) || strict)) { result = ifp; in6_ifa_hold(ifp); @@ -1686,19 +1698,21 @@ static int addrconf_dad_end(struct inet6_ifaddr *ifp) { int err = -ENOENT; - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_DAD) { ifp->state = INET6_IFADDR_STATE_POSTDAD; err = 0; } - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); return err; } void addrconf_dad_failure(struct inet6_ifaddr *ifp) { + struct in6_addr addr; struct inet6_dev *idev = ifp->idev; + struct net *net = dev_net(ifp->idev->dev); if (addrconf_dad_end(ifp)) { in6_ifa_put(ifp); @@ -1708,9 +1722,57 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) net_info_ratelimited("%s: IPv6 duplicate address %pI6c detected!\n", ifp->idev->dev->name, &ifp->addr); - if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { - struct in6_addr addr; + spin_lock_bh(&ifp->lock); + + if (ifp->flags & IFA_F_STABLE_PRIVACY) { + int scope = ifp->scope; + u32 flags = ifp->flags; + struct in6_addr new_addr; + struct inet6_ifaddr *ifp2; + u32 valid_lft, preferred_lft; + int pfxlen = ifp->prefix_len; + int retries = ifp->stable_privacy_retry + 1; + + if (retries > net->ipv6.sysctl.idgen_retries) { + net_info_ratelimited("%s: privacy stable address generation failed because of DAD conflicts!\n", + ifp->idev->dev->name); + goto errdad; + } + + new_addr = ifp->addr; + if (ipv6_generate_stable_address(&new_addr, retries, + idev)) + goto errdad; + + valid_lft = ifp->valid_lft; + preferred_lft = ifp->prefered_lft; + + spin_unlock_bh(&ifp->lock); + + if (idev->cnf.max_addresses && + ipv6_count_addresses(idev) >= + idev->cnf.max_addresses) + goto lock_errdad; + + net_info_ratelimited("%s: generating new stable privacy address because of DAD conflict\n", + ifp->idev->dev->name); + + ifp2 = ipv6_add_addr(idev, &new_addr, NULL, pfxlen, + scope, flags, valid_lft, + preferred_lft); + if (IS_ERR(ifp2)) + goto lock_errdad; + + spin_lock_bh(&ifp2->lock); + ifp2->stable_privacy_retry = retries; + ifp2->state = INET6_IFADDR_STATE_PREDAD; + spin_unlock_bh(&ifp2->lock); + addrconf_mod_dad_work(ifp2, net->ipv6.sysctl.idgen_delay); + in6_ifa_put(ifp2); +lock_errdad: + spin_lock_bh(&ifp->lock); + } else if (idev->cnf.accept_dad > 1 && !idev->cnf.disable_ipv6) { addr.s6_addr32[0] = htonl(0xfe800000); addr.s6_addr32[1] = 0; @@ -1724,10 +1786,10 @@ void addrconf_dad_failure(struct inet6_ifaddr *ifp) } } - spin_lock_bh(&ifp->state_lock); +errdad: /* transition from _POSTDAD to _ERRDAD */ ifp->state = INET6_IFADDR_STATE_ERRDAD; - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); addrconf_mod_dad_work(ifp, 0); } @@ -2052,13 +2114,15 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, struct fib6_table *table; table = fib6_get_table(dev_net(dev), RT6_TABLE_PREFIX); - if (table == NULL) + if (!table) return NULL; read_lock_bh(&table->tb6_lock); fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); if (!fn) goto out; + + noflags |= RTF_CACHE; for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { if (rt->dst.dev->ifindex != dev->ifindex) continue; @@ -2186,6 +2250,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) __u32 valid_lft; __u32 prefered_lft; int addr_type; + u32 addr_flags = 0; struct inet6_dev *in6_dev; struct net *net = dev_net(dev); @@ -2215,7 +2280,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) in6_dev = in6_dev_get(dev); - if (in6_dev == NULL) { + if (!in6_dev) { net_dbg_ratelimited("addrconf: device %s not configured\n", dev->name); return; @@ -2292,6 +2357,12 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao) in6_dev->token.s6_addr + 8, 8); read_unlock_bh(&in6_dev->lock); tokenized = true; + } else if (in6_dev->addr_gen_mode == + IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !ipv6_generate_stable_address(&addr, 0, + in6_dev)) { + addr_flags |= IFA_F_STABLE_PRIVACY; + goto ok; } else if (ipv6_generate_eui64(addr.s6_addr + 8, dev) && ipv6_inherit_eui64(addr.s6_addr + 8, in6_dev)) { in6_dev_put(in6_dev); @@ -2308,9 +2379,8 @@ ok: ifp = ipv6_get_ifaddr(net, &addr, dev, 1); - if (ifp == NULL && valid_lft) { + if (!ifp && valid_lft) { int max_addresses = in6_dev->cnf.max_addresses; - u32 addr_flags = 0; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (in6_dev->cnf.optimistic_dad && @@ -2350,7 +2420,7 @@ ok: u32 stored_lft; /* update lifetime (RFC2462 5.5.3 e) */ - spin_lock(&ifp->lock); + spin_lock_bh(&ifp->lock); now = jiffies; if (ifp->valid_lft > (now - ifp->tstamp) / HZ) stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ; @@ -2380,12 +2450,12 @@ ok: ifp->tstamp = now; flags = ifp->flags; ifp->flags &= ~IFA_F_DEPRECATED; - spin_unlock(&ifp->lock); + spin_unlock_bh(&ifp->lock); if (!(flags&IFA_F_TENTATIVE)) ipv6_ifa_notify(0, ifp); } else - spin_unlock(&ifp->lock); + spin_unlock_bh(&ifp->lock); manage_tempaddrs(in6_dev, ifp, valid_lft, prefered_lft, create, now); @@ -2418,7 +2488,7 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg) dev = __dev_get_by_index(net, ireq.ifr6_ifindex); err = -ENODEV; - if (dev == NULL) + if (!dev) goto err_exit; #if IS_ENABLED(CONFIG_IPV6_SIT) @@ -2464,6 +2534,23 @@ err_exit: return err; } +static int ipv6_mc_config(struct sock *sk, bool join, + const struct in6_addr *addr, int ifindex) +{ + int ret; + + ASSERT_RTNL(); + + lock_sock(sk); + if (join) + ret = ipv6_sock_mc_join(sk, ifindex, addr); + else + ret = ipv6_sock_mc_drop(sk, ifindex, addr); + release_sock(sk); + + return ret; +} + /* * Manual configuration of address on an interface */ @@ -2476,10 +2563,10 @@ static int inet6_addr_add(struct net *net, int ifindex, struct inet6_ifaddr *ifp; struct inet6_dev *idev; struct net_device *dev; + unsigned long timeout; + clock_t expires; int scope; u32 flags; - clock_t expires; - unsigned long timeout; ASSERT_RTNL(); @@ -2501,6 +2588,14 @@ static int inet6_addr_add(struct net *net, int ifindex, if (IS_ERR(idev)) return PTR_ERR(idev); + if (ifa_flags & IFA_F_MCAUTOJOIN) { + int ret = ipv6_mc_config(net->ipv6.mc_autojoin_sk, + true, pfx, ifindex); + + if (ret < 0) + return ret; + } + scope = ipv6_addr_scope(pfx); timeout = addrconf_timeout_fixup(valid_lft, HZ); @@ -2542,6 +2637,9 @@ static int inet6_addr_add(struct net *net, int ifindex, in6_ifa_put(ifp); addrconf_verify_rtnl(); return 0; + } else if (ifa_flags & IFA_F_MCAUTOJOIN) { + ipv6_mc_config(net->ipv6.mc_autojoin_sk, + false, pfx, ifindex); } return PTR_ERR(ifp); @@ -2562,7 +2660,7 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, return -ENODEV; idev = __in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -ENXIO; read_lock_bh(&idev->lock); @@ -2578,6 +2676,10 @@ static int inet6_addr_del(struct net *net, int ifindex, u32 ifa_flags, jiffies); ipv6_del_addr(ifp); addrconf_verify_rtnl(); + if (ipv6_addr_is_multicast(pfx)) { + ipv6_mc_config(net->ipv6.mc_autojoin_sk, + false, pfx, dev->ifindex); + } return 0; } } @@ -2710,7 +2812,7 @@ static void init_loopback(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (idev == NULL) { + if (!idev) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2757,10 +2859,11 @@ static void init_loopback(struct net_device *dev) } } -static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr *addr) +static void addrconf_add_linklocal(struct inet6_dev *idev, + const struct in6_addr *addr, u32 flags) { struct inet6_ifaddr *ifp; - u32 addr_flags = IFA_F_PERMANENT; + u32 addr_flags = flags | IFA_F_PERMANENT; #ifdef CONFIG_IPV6_OPTIMISTIC_DAD if (idev->cnf.optimistic_dad && @@ -2768,7 +2871,6 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr addr_flags |= IFA_F_OPTIMISTIC; #endif - ifp = ipv6_add_addr(idev, addr, NULL, 64, IFA_LINK, addr_flags, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME); if (!IS_ERR(ifp)) { @@ -2778,18 +2880,103 @@ static void addrconf_add_linklocal(struct inet6_dev *idev, const struct in6_addr } } +static bool ipv6_reserved_interfaceid(struct in6_addr address) +{ + if ((address.s6_addr32[2] | address.s6_addr32[3]) == 0) + return true; + + if (address.s6_addr32[2] == htonl(0x02005eff) && + ((address.s6_addr32[3] & htonl(0xfe000000)) == htonl(0xfe000000))) + return true; + + if (address.s6_addr32[2] == htonl(0xfdffffff) && + ((address.s6_addr32[3] & htonl(0xffffff80)) == htonl(0xffffff80))) + return true; + + return false; +} + +static int ipv6_generate_stable_address(struct in6_addr *address, + u8 dad_count, + const struct inet6_dev *idev) +{ + static DEFINE_SPINLOCK(lock); + static __u32 digest[SHA_DIGEST_WORDS]; + static __u32 workspace[SHA_WORKSPACE_WORDS]; + + static union { + char __data[SHA_MESSAGE_BYTES]; + struct { + struct in6_addr secret; + __be32 prefix[2]; + unsigned char hwaddr[MAX_ADDR_LEN]; + u8 dad_count; + } __packed; + } data; + + struct in6_addr secret; + struct in6_addr temp; + struct net *net = dev_net(idev->dev); + + BUILD_BUG_ON(sizeof(data.__data) != sizeof(data)); + + if (idev->cnf.stable_secret.initialized) + secret = idev->cnf.stable_secret.secret; + else if (net->ipv6.devconf_dflt->stable_secret.initialized) + secret = net->ipv6.devconf_dflt->stable_secret.secret; + else + return -1; + +retry: + spin_lock_bh(&lock); + + sha_init(digest); + memset(&data, 0, sizeof(data)); + memset(workspace, 0, sizeof(workspace)); + memcpy(data.hwaddr, idev->dev->perm_addr, idev->dev->addr_len); + data.prefix[0] = address->s6_addr32[0]; + data.prefix[1] = address->s6_addr32[1]; + data.secret = secret; + data.dad_count = dad_count; + + sha_transform(digest, data.__data, workspace); + + temp = *address; + temp.s6_addr32[2] = (__force __be32)digest[0]; + temp.s6_addr32[3] = (__force __be32)digest[1]; + + spin_unlock_bh(&lock); + + if (ipv6_reserved_interfaceid(temp)) { + dad_count++; + if (dad_count > dev_net(idev->dev)->ipv6.sysctl.idgen_retries) + return -1; + goto retry; + } + + *address = temp; + return 0; +} + static void addrconf_addr_gen(struct inet6_dev *idev, bool prefix_route) { - if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { - struct in6_addr addr; + struct in6_addr addr; - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); + + if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY) { + if (!ipv6_generate_stable_address(&addr, 0, idev)) + addrconf_add_linklocal(idev, &addr, + IFA_F_STABLE_PRIVACY); + else if (prefix_route) + addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); + } else if (idev->addr_gen_mode == IN6_ADDR_GEN_MODE_EUI64) { /* addrconf_add_linklocal also adds a prefix_route and we * only need to care about prefix routes if ipv6_generate_eui64 * couldn't generate one. */ if (ipv6_generate_eui64(addr.s6_addr + 8, idev->dev) == 0) - addrconf_add_linklocal(idev, &addr); + addrconf_add_linklocal(idev, &addr, 0); else if (prefix_route) addrconf_prefix_route(&addr, 64, idev->dev, 0, 0); } @@ -2834,7 +3021,7 @@ static void addrconf_sit_config(struct net_device *dev) */ idev = ipv6_find_idev(dev); - if (idev == NULL) { + if (!idev) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -2859,7 +3046,7 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (idev == NULL) { + if (!idev) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -3056,7 +3243,7 @@ static int addrconf_ifdown(struct net_device *dev, int how) neigh_ifdown(&nd_tbl, dev); idev = __in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -ENODEV; /* @@ -3127,10 +3314,10 @@ restart: write_unlock_bh(&idev->lock); - spin_lock_bh(&ifa->state_lock); + spin_lock_bh(&ifa->lock); state = ifa->state; ifa->state = INET6_IFADDR_STATE_DEAD; - spin_unlock_bh(&ifa->state_lock); + spin_unlock_bh(&ifa->lock); if (state != INET6_IFADDR_STATE_DEAD) { __ipv6_ifa_notify(RTM_DELADDR, ifa); @@ -3288,12 +3475,12 @@ static void addrconf_dad_start(struct inet6_ifaddr *ifp) { bool begin_dad = false; - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); if (ifp->state != INET6_IFADDR_STATE_DEAD) { ifp->state = INET6_IFADDR_STATE_PREDAD; begin_dad = true; } - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); if (begin_dad) addrconf_mod_dad_work(ifp, 0); @@ -3315,7 +3502,7 @@ static void addrconf_dad_work(struct work_struct *w) rtnl_lock(); - spin_lock_bh(&ifp->state_lock); + spin_lock_bh(&ifp->lock); if (ifp->state == INET6_IFADDR_STATE_PREDAD) { action = DAD_BEGIN; ifp->state = INET6_IFADDR_STATE_DAD; @@ -3323,7 +3510,7 @@ static void addrconf_dad_work(struct work_struct *w) action = DAD_ABORT; ifp->state = INET6_IFADDR_STATE_POSTDAD; } - spin_unlock_bh(&ifp->state_lock); + spin_unlock_bh(&ifp->lock); if (action == DAD_BEGIN) { addrconf_dad_begin(ifp); @@ -3811,7 +3998,7 @@ inet6_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); - if (pfx == NULL) + if (!pfx) return -EINVAL; ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; @@ -3923,7 +4110,7 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) ifm = nlmsg_data(nlh); pfx = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer_pfx); - if (pfx == NULL) + if (!pfx) return -EINVAL; if (tb[IFA_CACHEINFO]) { @@ -3938,17 +4125,17 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh) } dev = __dev_get_by_index(net, ifm->ifa_index); - if (dev == NULL) + if (!dev) return -ENODEV; ifa_flags = tb[IFA_FLAGS] ? nla_get_u32(tb[IFA_FLAGS]) : ifm->ifa_flags; /* We ignore other flags so far. */ ifa_flags &= IFA_F_NODAD | IFA_F_HOMEADDRESS | IFA_F_MANAGETEMPADDR | - IFA_F_NOPREFIXROUTE; + IFA_F_NOPREFIXROUTE | IFA_F_MCAUTOJOIN; ifa = ipv6_get_ifaddr(net, pfx, dev, 1); - if (ifa == NULL) { + if (!ifa) { /* * It would be best to check for !NLM_F_CREATE here but * userspace already relies on not having to provide this. @@ -4023,7 +4210,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, u32 preferred, valid; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; put_ifaddrmsg(nlh, ifa->prefix_len, ifa->flags, rt_scope(ifa->scope), @@ -4052,11 +4239,11 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } if (!ipv6_addr_any(&ifa->peer_addr)) { - if (nla_put(skb, IFA_LOCAL, 16, &ifa->addr) < 0 || - nla_put(skb, IFA_ADDRESS, 16, &ifa->peer_addr) < 0) + if (nla_put_in6_addr(skb, IFA_LOCAL, &ifa->addr) < 0 || + nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->peer_addr) < 0) goto error; } else - if (nla_put(skb, IFA_ADDRESS, 16, &ifa->addr) < 0) + if (nla_put_in6_addr(skb, IFA_ADDRESS, &ifa->addr) < 0) goto error; if (put_cacheinfo(skb, ifa->cstamp, ifa->tstamp, preferred, valid) < 0) @@ -4084,11 +4271,11 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, scope = RT_SCOPE_SITE; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_MULTICAST, 16, &ifmca->mca_addr) < 0 || + if (nla_put_in6_addr(skb, IFA_MULTICAST, &ifmca->mca_addr) < 0 || put_cacheinfo(skb, ifmca->mca_cstamp, ifmca->mca_tstamp, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); @@ -4110,11 +4297,11 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, scope = RT_SCOPE_SITE; nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; put_ifaddrmsg(nlh, 128, IFA_F_PERMANENT, scope, ifindex); - if (nla_put(skb, IFA_ANYCAST, 16, &ifaca->aca_addr) < 0 || + if (nla_put_in6_addr(skb, IFA_ANYCAST, &ifaca->aca_addr) < 0 || put_cacheinfo(skb, ifaca->aca_cstamp, ifaca->aca_tstamp, INFINITY_LIFE_TIME, INFINITY_LIFE_TIME) < 0) { nlmsg_cancel(skb, nlh); @@ -4283,7 +4470,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh) goto errout; addr = extract_addr(tb[IFA_ADDRESS], tb[IFA_LOCAL], &peer); - if (addr == NULL) { + if (!addr) { err = -EINVAL; goto errout; } @@ -4326,7 +4513,7 @@ static void inet6_ifa_notify(int event, struct inet6_ifaddr *ifa) int err = -ENOBUFS; skb = nlmsg_new(inet6_ifaddr_msgsize(), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_fill_ifaddr(skb, ifa, 0, 0, event, 0); @@ -4398,6 +4585,7 @@ static inline void ipv6_store_devconf(struct ipv6_devconf *cnf, array[DEVCONF_SUPPRESS_FRAG_NDISC] = cnf->suppress_frag_ndisc; array[DEVCONF_ACCEPT_RA_FROM_LOCAL] = cnf->accept_ra_from_local; array[DEVCONF_ACCEPT_RA_MTU] = cnf->accept_ra_mtu; + /* we omit DEVCONF_STABLE_SECRET for now */ } static inline size_t inet6_ifla6_size(void) @@ -4478,24 +4666,24 @@ static int inet6_fill_ifla6_attrs(struct sk_buff *skb, struct inet6_dev *idev) if (nla_put(skb, IFLA_INET6_CACHEINFO, sizeof(ci), &ci)) goto nla_put_failure; nla = nla_reserve(skb, IFLA_INET6_CONF, DEVCONF_MAX * sizeof(s32)); - if (nla == NULL) + if (!nla) goto nla_put_failure; ipv6_store_devconf(&idev->cnf, nla_data(nla), nla_len(nla)); /* XXX - MC not implemented */ nla = nla_reserve(skb, IFLA_INET6_STATS, IPSTATS_MIB_MAX * sizeof(u64)); - if (nla == NULL) + if (!nla) goto nla_put_failure; snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_STATS, nla_len(nla)); nla = nla_reserve(skb, IFLA_INET6_ICMP6STATS, ICMP6_MIB_MAX * sizeof(u64)); - if (nla == NULL) + if (!nla) goto nla_put_failure; snmp6_fill_stats(nla_data(nla), idev, IFLA_INET6_ICMP6STATS, nla_len(nla)); nla = nla_reserve(skb, IFLA_INET6_TOKEN, sizeof(struct in6_addr)); - if (nla == NULL) + if (!nla) goto nla_put_failure; if (nla_put_u8(skb, IFLA_INET6_ADDR_GEN_MODE, idev->addr_gen_mode)) @@ -4541,7 +4729,7 @@ static int inet6_set_iftoken(struct inet6_dev *idev, struct in6_addr *token) ASSERT_RTNL(); - if (token == NULL) + if (!token) return -EINVAL; if (ipv6_addr_any(token)) return -EINVAL; @@ -4632,8 +4820,15 @@ static int inet6_set_link_af(struct net_device *dev, const struct nlattr *nla) u8 mode = nla_get_u8(tb[IFLA_INET6_ADDR_GEN_MODE]); if (mode != IN6_ADDR_GEN_MODE_EUI64 && - mode != IN6_ADDR_GEN_MODE_NONE) + mode != IN6_ADDR_GEN_MODE_NONE && + mode != IN6_ADDR_GEN_MODE_STABLE_PRIVACY) + return -EINVAL; + + if (mode == IN6_ADDR_GEN_MODE_STABLE_PRIVACY && + !idev->cnf.stable_secret.initialized && + !dev_net(dev)->ipv6.devconf_dflt->stable_secret.initialized) return -EINVAL; + idev->addr_gen_mode = mode; err = 0; } @@ -4650,7 +4845,7 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, void *protoinfo; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; hdr = nlmsg_data(nlh); @@ -4665,11 +4860,11 @@ static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, (dev->addr_len && nla_put(skb, IFLA_ADDRESS, dev->addr_len, dev->dev_addr)) || nla_put_u32(skb, IFLA_MTU, dev->mtu) || - (dev->ifindex != dev->iflink && - nla_put_u32(skb, IFLA_LINK, dev->iflink))) + (dev->ifindex != dev_get_iflink(dev) && + nla_put_u32(skb, IFLA_LINK, dev_get_iflink(dev)))) goto nla_put_failure; protoinfo = nla_nest_start(skb, IFLA_PROTINFO); - if (protoinfo == NULL) + if (!protoinfo) goto nla_put_failure; if (inet6_fill_ifla6_attrs(skb, idev) < 0) @@ -4730,7 +4925,7 @@ void inet6_ifinfo_notify(int event, struct inet6_dev *idev) int err = -ENOBUFS; skb = nlmsg_new(inet6_if_nlmsg_size(), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_fill_ifinfo(skb, idev, 0, 0, event, 0); @@ -4763,7 +4958,7 @@ static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, struct prefix_cacheinfo ci; nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; pmsg = nlmsg_data(nlh); @@ -4802,7 +4997,7 @@ static void inet6_prefix_notify(int event, struct inet6_dev *idev, int err = -ENOBUFS; skb = nlmsg_new(inet6_prefix_nlmsg_size(), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = inet6_fill_prefix(skb, idev, pinfo, 0, 0, event, 0); @@ -5042,6 +5237,74 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, return ret; } +static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + int err; + struct in6_addr addr; + char str[IPV6_MAX_STRLEN]; + struct ctl_table lctl = *ctl; + struct net *net = ctl->extra2; + struct ipv6_stable_secret *secret = ctl->data; + + if (&net->ipv6.devconf_all->stable_secret == ctl->data) + return -EIO; + + lctl.maxlen = IPV6_MAX_STRLEN; + lctl.data = str; + + if (!rtnl_trylock()) + return restart_syscall(); + + if (!write && !secret->initialized) { + err = -EIO; + goto out; + } + + if (!write) { + err = snprintf(str, sizeof(str), "%pI6", + &secret->secret); + if (err >= sizeof(str)) { + err = -EIO; + goto out; + } + } + + err = proc_dostring(&lctl, write, buffer, lenp, ppos); + if (err || !write) + goto out; + + if (in6_pton(str, -1, addr.in6_u.u6_addr8, -1, NULL) != 1) { + err = -EIO; + goto out; + } + + secret->initialized = true; + secret->secret = addr; + + if (&net->ipv6.devconf_dflt->stable_secret == ctl->data) { + struct net_device *dev; + + for_each_netdev(net, dev) { + struct inet6_dev *idev = __in6_dev_get(dev); + + if (idev) { + idev->addr_gen_mode = + IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + } + } + } else { + struct inet6_dev *idev = ctl->extra1; + + idev->addr_gen_mode = IN6_ADDR_GEN_MODE_STABLE_PRIVACY; + } + +out: + rtnl_unlock(); + + return err; +} static struct addrconf_sysctl_table { @@ -5315,6 +5578,13 @@ static struct addrconf_sysctl_table .proc_handler = proc_dointvec, }, { + .procname = "stable_secret", + .data = &ipv6_devconf.stable_secret, + .maxlen = IPV6_MAX_STRLEN, + .mode = 0600, + .proc_handler = addrconf_sysctl_stable_secret, + }, + { /* sentinel */ } }, @@ -5328,7 +5598,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, char path[sizeof("net/ipv6/conf/") + IFNAMSIZ]; t = kmemdup(&addrconf_sysctl, sizeof(*t), GFP_KERNEL); - if (t == NULL) + if (!t) goto out; for (i = 0; t->addrconf_vars[i].data; i++) { @@ -5340,7 +5610,7 @@ static int __addrconf_sysctl_register(struct net *net, char *dev_name, snprintf(path, sizeof(path), "net/ipv6/conf/%s", dev_name); t->sysctl_header = register_net_sysctl(net, path, t->addrconf_vars); - if (t->sysctl_header == NULL) + if (!t->sysctl_header) goto free; p->sysctl = t; @@ -5356,7 +5626,7 @@ static void __addrconf_sysctl_unregister(struct ipv6_devconf *p) { struct addrconf_sysctl_table *t; - if (p->sysctl == NULL) + if (!p->sysctl) return; t = p->sysctl; @@ -5399,17 +5669,20 @@ static int __net_init addrconf_init_net(struct net *net) struct ipv6_devconf *all, *dflt; all = kmemdup(&ipv6_devconf, sizeof(ipv6_devconf), GFP_KERNEL); - if (all == NULL) + if (!all) goto err_alloc_all; dflt = kmemdup(&ipv6_devconf_dflt, sizeof(ipv6_devconf_dflt), GFP_KERNEL); - if (dflt == NULL) + if (!dflt) goto err_alloc_dflt; /* these will be inherited by all namespaces */ dflt->autoconf = ipv6_defaults.autoconf; dflt->disable_ipv6 = ipv6_defaults.disable_ipv6; + dflt->stable_secret.initialized = false; + all->stable_secret.initialized = false; + net->ipv6.devconf_all = all; net->ipv6.devconf_dflt = dflt; diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c index 98cc4cd570e2..ca09bf49ac68 100644 --- a/net/ipv6/addrconf_core.c +++ b/net/ipv6/addrconf_core.c @@ -133,6 +133,14 @@ static void snmp6_free_dev(struct inet6_dev *idev) free_percpu(idev->stats.ipv6); } +static void in6_dev_finish_destroy_rcu(struct rcu_head *head) +{ + struct inet6_dev *idev = container_of(head, struct inet6_dev, rcu); + + snmp6_free_dev(idev); + kfree(idev); +} + /* Nobody refers to this device, we may destroy it. */ void in6_dev_finish_destroy(struct inet6_dev *idev) @@ -140,7 +148,7 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) struct net_device *dev = idev->dev; WARN_ON(!list_empty(&idev->addr_list)); - WARN_ON(idev->mc_list != NULL); + WARN_ON(idev->mc_list); WARN_ON(timer_pending(&idev->rs_timer)); #ifdef NET_REFCNT_DEBUG @@ -151,7 +159,6 @@ void in6_dev_finish_destroy(struct inet6_dev *idev) pr_warn("Freeing alive inet6 device %p\n", idev); return; } - snmp6_free_dev(idev); - kfree_rcu(idev, rcu); + call_rcu(&idev->rcu, in6_dev_finish_destroy_rcu); } EXPORT_SYMBOL(in6_dev_finish_destroy); diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index e43e79d0a612..882124ebb438 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -29,9 +29,7 @@ * Policy Table */ struct ip6addrlbl_entry { -#ifdef CONFIG_NET_NS - struct net *lbl_net; -#endif + possible_net_t lbl_net; struct in6_addr prefix; int prefixlen; int ifindex; @@ -129,9 +127,6 @@ static const __net_initconst struct ip6addrlbl_init_table /* Object management */ static inline void ip6addrlbl_free(struct ip6addrlbl_entry *p) { -#ifdef CONFIG_NET_NS - release_net(p->lbl_net); -#endif kfree(p); } @@ -240,9 +235,7 @@ static struct ip6addrlbl_entry *ip6addrlbl_alloc(struct net *net, newp->addrtype = addrtype; newp->label = label; INIT_HLIST_NODE(&newp->list); -#ifdef CONFIG_NET_NS - newp->lbl_net = hold_net(net); -#endif + write_pnet(&newp->lbl_net, net); atomic_set(&newp->refcnt, 1); return newp; } @@ -484,7 +477,7 @@ static int ip6addrlbl_fill(struct sk_buff *skb, ip6addrlbl_putmsg(nlh, p->prefixlen, p->ifindex, lseq); - if (nla_put(skb, IFAL_ADDRESS, 16, &p->prefix) < 0 || + if (nla_put_in6_addr(skb, IFAL_ADDRESS, &p->prefix) < 0 || nla_put_u32(skb, IFAL_LABEL, p->label) < 0) { nlmsg_cancel(skb, nlh); return -EMSGSIZE; diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index e8c4400f23e9..7de52b65173f 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -164,11 +164,11 @@ lookup_protocol: answer_flags = answer->flags; rcu_read_unlock(); - WARN_ON(answer_prot->slab == NULL); + WARN_ON(!answer_prot->slab); err = -ENOBUFS; - sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot); - if (sk == NULL) + sk = sk_alloc(net, PF_INET6, GFP_KERNEL, answer_prot, kern); + if (!sk) goto out; sock_init_data(sock, sk); @@ -362,7 +362,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) np->saddr = addr->sin6_addr; /* Make sure we are allowed to bind here. */ - if (sk->sk_prot->get_port(sk, snum)) { + if ((snum || !inet->bind_address_no_port) && + sk->sk_prot->get_port(sk, snum)) { inet_reset_saddr(sk); err = -EADDRINUSE; goto out; @@ -391,7 +392,7 @@ int inet6_release(struct socket *sock) { struct sock *sk = sock->sk; - if (sk == NULL) + if (!sk) return -EINVAL; /* Free mc lists */ @@ -413,11 +414,11 @@ void inet6_destroy_sock(struct sock *sk) /* Release rx options */ skb = xchg(&np->pktoptions, NULL); - if (skb != NULL) + if (skb) kfree_skb(skb); skb = xchg(&np->rxpmtu, NULL); - if (skb != NULL) + if (skb) kfree_skb(skb); /* Free flowlabels */ @@ -426,7 +427,7 @@ void inet6_destroy_sock(struct sock *sk) /* Free tx options */ opt = xchg(&np->opt, NULL); - if (opt != NULL) + if (opt) sock_kfree_s(sk, opt, opt->tot_len); } EXPORT_SYMBOL_GPL(inet6_destroy_sock); @@ -640,7 +641,7 @@ int inet6_sk_rebuild_header(struct sock *sk) dst = __sk_dst_check(sk, np->dst_cookie); - if (dst == NULL) { + if (!dst) { struct inet_sock *inet = inet_sk(sk); struct in6_addr *final_p, final; struct flowi6 fl6; @@ -766,6 +767,9 @@ static int __net_init inet6_net_init(struct net *net) net->ipv6.sysctl.icmpv6_time = 1*HZ; net->ipv6.sysctl.flowlabel_consistency = 1; net->ipv6.sysctl.auto_flowlabels = 0; + net->ipv6.sysctl.idgen_retries = 3; + net->ipv6.sysctl.idgen_delay = 1 * HZ; + net->ipv6.sysctl.flowlabel_state_ranges = 1; atomic_set(&net->ipv6.fib6_sernum, 1); err = ipv6_init_mibs(net); @@ -824,7 +828,7 @@ static int __init inet6_init(void) struct list_head *r; int err = 0; - BUILD_BUG_ON(sizeof(struct inet6_skb_parm) > FIELD_SIZEOF(struct sk_buff, cb)); + sock_skb_cb_check_size(sizeof(struct inet6_skb_parm)); /* Register the socket-side information for inet6_create. */ for (r = &inetsw6[0]; r < &inetsw6[SOCK_MAX]; ++r) diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c index a6727add2624..ed7d4e3f9c10 100644 --- a/net/ipv6/ah6.c +++ b/net/ipv6/ah6.c @@ -681,7 +681,7 @@ static int ah6_init_state(struct xfrm_state *x) goto error; ahp = kzalloc(sizeof(*ahp), GFP_KERNEL); - if (ahp == NULL) + if (!ahp) return -ENOMEM; ahash = crypto_alloc_ahash(x->aalg->alg_name, 0, 0); diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c index baf2742d1ec4..514ac259f543 100644 --- a/net/ipv6/anycast.c +++ b/net/ipv6/anycast.c @@ -60,6 +60,8 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) int ishost = !net->ipv6.devconf_all->forwarding; int err = 0; + ASSERT_RTNL(); + if (!ns_capable(net->user_ns, CAP_NET_ADMIN)) return -EPERM; if (ipv6_addr_is_multicast(addr)) @@ -68,12 +70,11 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) return -EINVAL; pac = sock_kmalloc(sk, sizeof(struct ipv6_ac_socklist), GFP_KERNEL); - if (pac == NULL) + if (!pac) return -ENOMEM; pac->acl_next = NULL; pac->acl_addr = *addr; - rtnl_lock(); if (ifindex == 0) { struct rt6_info *rt; @@ -92,7 +93,7 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } else dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) { + if (!dev) { err = -ENODEV; goto error; } @@ -130,7 +131,6 @@ int ipv6_sock_ac_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } error: - rtnl_unlock(); if (pac) sock_kfree_s(sk, pac, sizeof(*pac)); return err; @@ -146,7 +146,8 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_ac_socklist *pac, *prev_pac; struct net *net = sock_net(sk); - rtnl_lock(); + ASSERT_RTNL(); + prev_pac = NULL; for (pac = np->ipv6_ac_list; pac; pac = pac->acl_next) { if ((ifindex == 0 || pac->acl_ifindex == ifindex) && @@ -154,10 +155,8 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) break; prev_pac = pac; } - if (!pac) { - rtnl_unlock(); + if (!pac) return -ENOENT; - } if (prev_pac) prev_pac->acl_next = pac->acl_next; else @@ -166,7 +165,6 @@ int ipv6_sock_ac_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) dev = __dev_get_by_index(net, pac->acl_ifindex); if (dev) ipv6_dev_ac_dec(dev, &pac->acl_addr); - rtnl_unlock(); sock_kfree_s(sk, pac, sizeof(*pac)); return 0; @@ -224,7 +222,7 @@ static struct ifacaddr6 *aca_alloc(struct rt6_info *rt, struct ifacaddr6 *aca; aca = kzalloc(sizeof(*aca), GFP_ATOMIC); - if (aca == NULL) + if (!aca) return NULL; aca->aca_addr = *addr; @@ -270,7 +268,7 @@ int __ipv6_dev_ac_inc(struct inet6_dev *idev, const struct in6_addr *addr) goto out; } aca = aca_alloc(rt, addr); - if (aca == NULL) { + if (!aca) { ip6_rt_put(rt); err = -ENOMEM; goto out; @@ -339,7 +337,7 @@ static int ipv6_dev_ac_dec(struct net_device *dev, const struct in6_addr *addr) { struct inet6_dev *idev = __in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -ENODEV; return __ipv6_dev_ac_dec(idev, addr); } diff --git a/net/ipv6/datagram.c b/net/ipv6/datagram.c index ace8daca5c83..b10a88986a98 100644 --- a/net/ipv6/datagram.c +++ b/net/ipv6/datagram.c @@ -40,7 +40,7 @@ static bool ipv6_mapped_addr_any(const struct in6_addr *a) return ipv6_addr_v4mapped(a) && (a->s6_addr32[3] == 0); } -int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +static int __ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { struct sockaddr_in6 *usin = (struct sockaddr_in6 *) uaddr; struct inet_sock *inet = inet_sk(sk); @@ -56,7 +56,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) if (usin->sin6_family == AF_INET) { if (__ipv6_only_sock(sk)) return -EAFNOSUPPORT; - err = ip4_datagram_connect(sk, uaddr, addr_len); + err = __ip4_datagram_connect(sk, uaddr, addr_len); goto ipv4_connected; } @@ -71,7 +71,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) fl6.flowlabel = usin->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } } @@ -98,9 +98,9 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) sin.sin_addr.s_addr = daddr->s6_addr32[3]; sin.sin_port = usin->sin6_port; - err = ip4_datagram_connect(sk, - (struct sockaddr *) &sin, - sizeof(sin)); + err = __ip4_datagram_connect(sk, + (struct sockaddr *) &sin, + sizeof(sin)); ipv4_connected: if (err) @@ -204,6 +204,16 @@ out: fl6_sock_release(flowlabel); return err; } + +int ip6_datagram_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + int res; + + lock_sock(sk); + res = __ip6_datagram_connect(sk, uaddr, addr_len); + release_sock(sk); + return res; +} EXPORT_SYMBOL_GPL(ip6_datagram_connect); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *uaddr, @@ -325,6 +335,16 @@ void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu) kfree_skb(skb); } +/* For some errors we have valid addr_offset even with zero payload and + * zero port. Also, addr_offset should be supported if port is set. + */ +static inline bool ipv6_datagram_support_addr(struct sock_exterr_skb *serr) +{ + return serr->ee.ee_origin == SO_EE_ORIGIN_ICMP6 || + serr->ee.ee_origin == SO_EE_ORIGIN_ICMP || + serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL || serr->port; +} + /* IPv6 supports cmsg on all origins aside from SO_EE_ORIGIN_LOCAL. * * At one point, excluding local errors was a quick test to identify icmp/icmp6 @@ -373,7 +393,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) err = -EAGAIN; skb = sock_dequeue_err_skb(sk); - if (skb == NULL) + if (!skb) goto out; copied = skb->len; @@ -389,7 +409,7 @@ int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len) serr = SKB_EXT_ERR(skb); - if (sin && serr->port) { + if (sin && ipv6_datagram_support_addr(serr)) { const unsigned char *nh = skb_network_header(skb); sin->sin6_family = AF_INET6; sin->sin6_flowinfo = 0; @@ -463,7 +483,7 @@ int ipv6_recv_rxpmtu(struct sock *sk, struct msghdr *msg, int len, err = -EAGAIN; skb = xchg(&np->rxpmtu, NULL); - if (skb == NULL) + if (!skb) goto out; copied = skb->len; diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index e48f2c7c5c59..060a60b2f8a6 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -76,7 +76,7 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen) len = ALIGN(len, crypto_tfm_ctx_alignment()); } - len += sizeof(struct aead_givcrypt_request) + crypto_aead_reqsize(aead); + len += sizeof(struct aead_request) + crypto_aead_reqsize(aead); len = ALIGN(len, __alignof__(struct scatterlist)); len += sizeof(struct scatterlist) * nfrags; @@ -96,17 +96,6 @@ static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen) crypto_aead_alignmask(aead) + 1) : tmp + seqhilen; } -static inline struct aead_givcrypt_request *esp_tmp_givreq( - struct crypto_aead *aead, u8 *iv) -{ - struct aead_givcrypt_request *req; - - req = (void *)PTR_ALIGN(iv + crypto_aead_ivsize(aead), - crypto_tfm_ctx_alignment()); - aead_givcrypt_set_tfm(req, aead); - return req; -} - static inline struct aead_request *esp_tmp_req(struct crypto_aead *aead, u8 *iv) { struct aead_request *req; @@ -125,14 +114,6 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead, __alignof__(struct scatterlist)); } -static inline struct scatterlist *esp_givreq_sg( - struct crypto_aead *aead, struct aead_givcrypt_request *req) -{ - return (void *)ALIGN((unsigned long)(req + 1) + - crypto_aead_reqsize(aead), - __alignof__(struct scatterlist)); -} - static void esp_output_done(struct crypto_async_request *base, int err) { struct sk_buff *skb = base->data; @@ -141,32 +122,57 @@ static void esp_output_done(struct crypto_async_request *base, int err) xfrm_output_resume(skb, err); } +/* Move ESP header back into place. */ +static void esp_restore_header(struct sk_buff *skb, unsigned int offset) +{ + struct ip_esp_hdr *esph = (void *)(skb->data + offset); + void *tmp = ESP_SKB_CB(skb)->tmp; + __be32 *seqhi = esp_tmp_seqhi(tmp); + + esph->seq_no = esph->spi; + esph->spi = *seqhi; +} + +static void esp_output_restore_header(struct sk_buff *skb) +{ + esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32)); +} + +static void esp_output_done_esn(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + esp_output_restore_header(skb); + esp_output_done(base, err); +} + static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) { int err; struct ip_esp_hdr *esph; struct crypto_aead *aead; - struct aead_givcrypt_request *req; + struct aead_request *req; struct scatterlist *sg; - struct scatterlist *asg; struct sk_buff *trailer; void *tmp; int blksize; int clen; int alen; int plen; + int ivlen; int tfclen; int nfrags; int assoclen; - int sglists; int seqhilen; u8 *iv; u8 *tail; __be32 *seqhi; + __be64 seqno; /* skb is pure payload to encrypt */ aead = x->data; alen = crypto_aead_authsize(aead); + ivlen = crypto_aead_ivsize(aead); tfclen = 0; if (x->tfcpad) { @@ -187,16 +193,14 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) nfrags = err; assoclen = sizeof(*esph); - sglists = 1; seqhilen = 0; if (x->props.flags & XFRM_STATE_ESN) { - sglists += 2; seqhilen += sizeof(__be32); assoclen += seqhilen; } - tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) { err = -ENOMEM; goto error; @@ -204,9 +208,8 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); - req = esp_tmp_givreq(aead, iv); - asg = esp_givreq_sg(aead, req); - sg = asg + sglists; + req = esp_tmp_req(aead, iv); + sg = esp_req_sg(aead, req); /* Fill padding... */ tail = skb_tail_pointer(trailer); @@ -227,36 +230,53 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) esph = ip_esp_hdr(skb); *skb_mac_header(skb) = IPPROTO_ESP; - esph->spi = x->id.spi; esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low); + aead_request_set_callback(req, 0, esp_output_done, skb); + + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * encryption. + */ + if ((x->props.flags & XFRM_STATE_ESN)) { + esph = (void *)(skb_transport_header(skb) - sizeof(__be32)); + *seqhi = esph->spi; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi); + aead_request_set_callback(req, 0, esp_output_done_esn, skb); + } + + esph->spi = x->id.spi; + sg_init_table(sg, nfrags); skb_to_sgvec(skb, sg, - esph->enc_data + crypto_aead_ivsize(aead) - skb->data, - clen + alen); + (unsigned char *)esph - skb->data, + assoclen + ivlen + clen + alen); - if ((x->props.flags & XFRM_STATE_ESN)) { - sg_init_table(asg, 3); - sg_set_buf(asg, &esph->spi, sizeof(__be32)); - *seqhi = htonl(XFRM_SKB_CB(skb)->seq.output.hi); - sg_set_buf(asg + 1, seqhi, seqhilen); - sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); - } else - sg_init_one(asg, esph, sizeof(*esph)); - - aead_givcrypt_set_callback(req, 0, esp_output_done, skb); - aead_givcrypt_set_crypt(req, sg, sg, clen, iv); - aead_givcrypt_set_assoc(req, asg, assoclen); - aead_givcrypt_set_giv(req, esph->enc_data, - XFRM_SKB_CB(skb)->seq.output.low); + aead_request_set_crypt(req, sg, sg, ivlen + clen, iv); + aead_request_set_ad(req, assoclen); + + seqno = cpu_to_be64(XFRM_SKB_CB(skb)->seq.output.low + + ((u64)XFRM_SKB_CB(skb)->seq.output.hi << 32)); + + memset(iv, 0, ivlen); + memcpy(iv + ivlen - min(ivlen, 8), (u8 *)&seqno + 8 - min(ivlen, 8), + min(ivlen, 8)); ESP_SKB_CB(skb)->tmp = tmp; - err = crypto_aead_givencrypt(req); - if (err == -EINPROGRESS) + err = crypto_aead_encrypt(req); + + switch (err) { + case -EINPROGRESS: goto error; - if (err == -EBUSY) + case -EBUSY: err = NET_XMIT_DROP; + break; + + case 0: + if ((x->props.flags & XFRM_STATE_ESN)) + esp_output_restore_header(skb); + } kfree(tmp); @@ -317,25 +337,38 @@ static void esp_input_done(struct crypto_async_request *base, int err) xfrm_input_resume(skb, esp_input_done2(skb, err)); } +static void esp_input_restore_header(struct sk_buff *skb) +{ + esp_restore_header(skb, 0); + __skb_pull(skb, 4); +} + +static void esp_input_done_esn(struct crypto_async_request *base, int err) +{ + struct sk_buff *skb = base->data; + + esp_input_restore_header(skb); + esp_input_done(base, err); +} + static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) { struct ip_esp_hdr *esph; struct crypto_aead *aead = x->data; struct aead_request *req; struct sk_buff *trailer; - int elen = skb->len - sizeof(*esph) - crypto_aead_ivsize(aead); + int ivlen = crypto_aead_ivsize(aead); + int elen = skb->len - sizeof(*esph) - ivlen; int nfrags; int assoclen; - int sglists; int seqhilen; int ret = 0; void *tmp; __be32 *seqhi; u8 *iv; struct scatterlist *sg; - struct scatterlist *asg; - if (!pskb_may_pull(skb, sizeof(*esph) + crypto_aead_ivsize(aead))) { + if (!pskb_may_pull(skb, sizeof(*esph) + ivlen)) { ret = -EINVAL; goto out; } @@ -354,16 +387,14 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) ret = -ENOMEM; assoclen = sizeof(*esph); - sglists = 1; seqhilen = 0; if (x->props.flags & XFRM_STATE_ESN) { - sglists += 2; seqhilen += sizeof(__be32); assoclen += seqhilen; } - tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); + tmp = esp_alloc_tmp(aead, nfrags, seqhilen); if (!tmp) goto out; @@ -371,36 +402,39 @@ static int esp6_input(struct xfrm_state *x, struct sk_buff *skb) seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); req = esp_tmp_req(aead, iv); - asg = esp_req_sg(aead, req); - sg = asg + sglists; + sg = esp_req_sg(aead, req); skb->ip_summed = CHECKSUM_NONE; esph = (struct ip_esp_hdr *)skb->data; - /* Get ivec. This can be wrong, check against another impls. */ - iv = esph->enc_data; - - sg_init_table(sg, nfrags); - skb_to_sgvec(skb, sg, sizeof(*esph) + crypto_aead_ivsize(aead), elen); + aead_request_set_callback(req, 0, esp_input_done, skb); + /* For ESN we move the header forward by 4 bytes to + * accomodate the high bits. We will move it back after + * decryption. + */ if ((x->props.flags & XFRM_STATE_ESN)) { - sg_init_table(asg, 3); - sg_set_buf(asg, &esph->spi, sizeof(__be32)); - *seqhi = XFRM_SKB_CB(skb)->seq.input.hi; - sg_set_buf(asg + 1, seqhi, seqhilen); - sg_set_buf(asg + 2, &esph->seq_no, sizeof(__be32)); - } else - sg_init_one(asg, esph, sizeof(*esph)); + esph = (void *)skb_push(skb, 4); + *seqhi = esph->spi; + esph->spi = esph->seq_no; + esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.input.hi); + aead_request_set_callback(req, 0, esp_input_done_esn, skb); + } - aead_request_set_callback(req, 0, esp_input_done, skb); - aead_request_set_crypt(req, sg, sg, elen, iv); - aead_request_set_assoc(req, asg, assoclen); + sg_init_table(sg, nfrags); + skb_to_sgvec(skb, sg, 0, skb->len); + + aead_request_set_crypt(req, sg, sg, elen + ivlen, iv); + aead_request_set_ad(req, assoclen); ret = crypto_aead_decrypt(req); if (ret == -EINPROGRESS) goto out; + if ((x->props.flags & XFRM_STATE_ESN)) + esp_input_restore_header(skb); + ret = esp_input_done2(skb, ret); out: @@ -460,10 +494,16 @@ static void esp6_destroy(struct xfrm_state *x) static int esp_init_aead(struct xfrm_state *x) { + char aead_name[CRYPTO_MAX_ALG_NAME]; struct crypto_aead *aead; int err; - aead = crypto_alloc_aead(x->aead->alg_name, 0, 0); + err = -ENAMETOOLONG; + if (snprintf(aead_name, CRYPTO_MAX_ALG_NAME, "%s(%s)", + x->geniv, x->aead->alg_name) >= CRYPTO_MAX_ALG_NAME) + goto error; + + aead = crypto_alloc_aead(aead_name, 0, 0); err = PTR_ERR(aead); if (IS_ERR(aead)) goto error; @@ -495,22 +535,26 @@ static int esp_init_authenc(struct xfrm_state *x) int err; err = -EINVAL; - if (x->ealg == NULL) + if (!x->ealg) goto error; err = -ENAMETOOLONG; if ((x->props.flags & XFRM_STATE_ESN)) { if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, - "authencesn(%s,%s)", + "%s%sauthencesn(%s,%s)%s", + x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + x->ealg->alg_name, + x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) goto error; } else { if (snprintf(authenc_name, CRYPTO_MAX_ALG_NAME, - "authenc(%s,%s)", + "%s%sauthenc(%s,%s)%s", + x->geniv ?: "", x->geniv ? "(" : "", x->aalg ? x->aalg->alg_name : "digest_null", - x->ealg->alg_name) >= CRYPTO_MAX_ALG_NAME) + x->ealg->alg_name, + x->geniv ? ")" : "") >= CRYPTO_MAX_ALG_NAME) goto error; } diff --git a/net/ipv6/exthdrs_core.c b/net/ipv6/exthdrs_core.c index 8af3eb57f438..5c5d23e59da5 100644 --- a/net/ipv6/exthdrs_core.c +++ b/net/ipv6/exthdrs_core.c @@ -82,7 +82,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, if (nexthdr == NEXTHDR_NONE) return -1; hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); - if (hp == NULL) + if (!hp) return -1; if (nexthdr == NEXTHDR_FRAGMENT) { __be16 _frag_off, *fp; @@ -91,7 +91,7 @@ int ipv6_skip_exthdr(const struct sk_buff *skb, int start, u8 *nexthdrp, frag_off), sizeof(_frag_off), &_frag_off); - if (fp == NULL) + if (!fp) return -1; *frag_offp = *fp; @@ -218,7 +218,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, } hp = skb_header_pointer(skb, start, sizeof(_hdr), &_hdr); - if (hp == NULL) + if (!hp) return -EBADMSG; if (nexthdr == NEXTHDR_ROUTING) { @@ -226,7 +226,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, rh = skb_header_pointer(skb, start, sizeof(_rh), &_rh); - if (rh == NULL) + if (!rh) return -EBADMSG; if (flags && (*flags & IP6_FH_F_SKIP_RH) && @@ -245,7 +245,7 @@ int ipv6_find_hdr(const struct sk_buff *skb, unsigned int *offset, frag_off), sizeof(_frag_off), &_frag_off); - if (fp == NULL) + if (!fp) return -EBADMSG; _frag_off = ntohs(*fp) & ~0x7; diff --git a/net/ipv6/fib6_rules.c b/net/ipv6/fib6_rules.c index 70bc6abc0639..2367a16eae58 100644 --- a/net/ipv6/fib6_rules.c +++ b/net/ipv6/fib6_rules.c @@ -199,12 +199,10 @@ static int fib6_rule_configure(struct fib_rule *rule, struct sk_buff *skb, } if (frh->src_len) - nla_memcpy(&rule6->src.addr, tb[FRA_SRC], - sizeof(struct in6_addr)); + rule6->src.addr = nla_get_in6_addr(tb[FRA_SRC]); if (frh->dst_len) - nla_memcpy(&rule6->dst.addr, tb[FRA_DST], - sizeof(struct in6_addr)); + rule6->dst.addr = nla_get_in6_addr(tb[FRA_DST]); rule6->src.plen = frh->src_len; rule6->dst.plen = frh->dst_len; @@ -250,11 +248,9 @@ static int fib6_rule_fill(struct fib_rule *rule, struct sk_buff *skb, frh->tos = rule6->tclass; if ((rule6->dst.plen && - nla_put(skb, FRA_DST, sizeof(struct in6_addr), - &rule6->dst.addr)) || + nla_put_in6_addr(skb, FRA_DST, &rule6->dst.addr)) || (rule6->src.plen && - nla_put(skb, FRA_SRC, sizeof(struct in6_addr), - &rule6->src.addr))) + nla_put_in6_addr(skb, FRA_SRC, &rule6->src.addr))) goto nla_put_failure; return 0; @@ -299,19 +295,16 @@ static int __net_init fib6_rules_net_init(struct net *net) ops = fib_rules_register(&fib6_rules_ops_template, net); if (IS_ERR(ops)) return PTR_ERR(ops); - net->ipv6.fib6_rules_ops = ops; - - err = fib_default_rule_add(net->ipv6.fib6_rules_ops, 0, - RT6_TABLE_LOCAL, 0); + err = fib_default_rule_add(ops, 0, RT6_TABLE_LOCAL, 0); if (err) goto out_fib6_rules_ops; - err = fib_default_rule_add(net->ipv6.fib6_rules_ops, - 0x7FFE, RT6_TABLE_MAIN, 0); + err = fib_default_rule_add(ops, 0x7FFE, RT6_TABLE_MAIN, 0); if (err) goto out_fib6_rules_ops; + net->ipv6.fib6_rules_ops = ops; out: return err; diff --git a/net/ipv6/icmp.c b/net/ipv6/icmp.c index a5e95199585e..713d7434c911 100644 --- a/net/ipv6/icmp.c +++ b/net/ipv6/icmp.c @@ -160,8 +160,7 @@ static bool is_ineligible(const struct sk_buff *skb) tp = skb_header_pointer(skb, ptr+offsetof(struct icmp6hdr, icmp6_type), sizeof(_type), &_type); - if (tp == NULL || - !(*tp & ICMPV6_INFOMSG_MASK)) + if (!tp || !(*tp & ICMPV6_INFOMSG_MASK)) return true; } return false; @@ -208,7 +207,7 @@ static bool icmpv6_xrlim_allow(struct sock *sk, u8 type, struct inet_peer *peer; peer = inet_getpeer_v6(net->ipv6.peers, - &rt->rt6i_dst.addr, 1); + &fl6->daddr, 1); res = inet_peer_xrlim_allow(peer, tmo); if (peer) inet_putpeer(peer); @@ -231,7 +230,7 @@ static bool opt_unrec(struct sk_buff *skb, __u32 offset) offset += skb_network_offset(skb); op = skb_header_pointer(skb, offset, sizeof(_optval), &_optval); - if (op == NULL) + if (!op) return true; return (*op & 0xC0) == 0x80; } @@ -244,7 +243,7 @@ int icmpv6_push_pending_frames(struct sock *sk, struct flowi6 *fl6, int err = 0; skb = skb_peek(&sk->sk_write_queue); - if (skb == NULL) + if (!skb) goto out; icmp6h = icmp6_hdr(skb); @@ -338,7 +337,7 @@ static struct dst_entry *icmpv6_route_lookup(struct net *net, * We won't send icmp if the destination is known * anycast. */ - if (((struct rt6_info *)dst)->rt6i_flags & RTF_ANYCAST) { + if (ipv6_anycast_destination(dst, &fl6->daddr)) { net_dbg_ratelimited("icmp6_send: acast source\n"); dst_release(dst); return ERR_PTR(-EINVAL); @@ -479,7 +478,7 @@ static void icmp6_send(struct sk_buff *skb, u8 type, u8 code, __u32 info) security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); - if (sk == NULL) + if (!sk) return; sk->sk_mark = mark; np = inet6_sk(sk); @@ -565,7 +564,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) if (!ipv6_unicast_destination(skb) && !(net->ipv6.sysctl.anycast_src_echo_reply && - ipv6_anycast_destination(skb))) + ipv6_anycast_destination(skb_dst(skb), saddr))) saddr = NULL; memcpy(&tmp_hdr, icmph, sizeof(tmp_hdr)); @@ -582,7 +581,7 @@ static void icmpv6_echo_reply(struct sk_buff *skb) security_skb_classify_flow(skb, flowi6_to_flowi(&fl6)); sk = icmpv6_xmit_lock(net); - if (sk == NULL) + if (!sk) return; sk->sk_mark = mark; np = inet6_sk(sk); @@ -839,7 +838,7 @@ static int __net_init icmpv6_sk_init(struct net *net) net->ipv6.icmp_sk = kzalloc(nr_cpu_ids * sizeof(struct sock *), GFP_KERNEL); - if (net->ipv6.icmp_sk == NULL) + if (!net->ipv6.icmp_sk) return -ENOMEM; for_each_possible_cpu(i) { diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 29b32206e494..6927f3fb5597 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -112,22 +112,20 @@ static u32 inet6_synq_hash(const struct in6_addr *raddr, const __be16 rport, return c & (synq_hsize - 1); } -struct request_sock *inet6_csk_search_req(const struct sock *sk, - struct request_sock ***prevp, +struct request_sock *inet6_csk_search_req(struct sock *sk, const __be16 rport, const struct in6_addr *raddr, const struct in6_addr *laddr, const int iif) { - const struct inet_connection_sock *icsk = inet_csk(sk); + struct inet_connection_sock *icsk = inet_csk(sk); struct listen_sock *lopt = icsk->icsk_accept_queue.listen_opt; - struct request_sock *req, **prev; + struct request_sock *req; + u32 hash = inet6_synq_hash(raddr, rport, lopt->hash_rnd, + lopt->nr_table_entries); - for (prev = &lopt->syn_table[inet6_synq_hash(raddr, rport, - lopt->hash_rnd, - lopt->nr_table_entries)]; - (req = *prev) != NULL; - prev = &req->dl_next) { + spin_lock(&icsk->icsk_accept_queue.syn_wait_lock); + for (req = lopt->syn_table[hash]; req != NULL; req = req->dl_next) { const struct inet_request_sock *ireq = inet_rsk(req); if (ireq->ir_rmt_port == rport && @@ -135,13 +133,14 @@ struct request_sock *inet6_csk_search_req(const struct sock *sk, ipv6_addr_equal(&ireq->ir_v6_rmt_addr, raddr) && ipv6_addr_equal(&ireq->ir_v6_loc_addr, laddr) && (!ireq->ir_iif || ireq->ir_iif == iif)) { + atomic_inc(&req->rsk_refcnt); WARN_ON(req->sk != NULL); - *prevp = prev; - return req; + break; } } + spin_unlock(&icsk->icsk_accept_queue.syn_wait_lock); - return NULL; + return req; } EXPORT_SYMBOL_GPL(inet6_csk_search_req); diff --git a/net/ipv6/inet6_hashtables.c b/net/ipv6/inet6_hashtables.c index 051dffb49c90..b4fd96de97e6 100644 --- a/net/ipv6/inet6_hashtables.c +++ b/net/ipv6/inet6_hashtables.c @@ -23,11 +23,9 @@ #include <net/secure_seq.h> #include <net/ip.h> -static unsigned int inet6_ehashfn(struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +u32 inet6_ehashfn(const struct net *net, + const struct in6_addr *laddr, const u16 lport, + const struct in6_addr *faddr, const __be16 fport) { static u32 inet6_ehash_secret __read_mostly; static u32 ipv6_hash_secret __read_mostly; @@ -44,54 +42,6 @@ static unsigned int inet6_ehashfn(struct net *net, inet6_ehash_secret + net_hash_mix(net)); } -static int inet6_sk_ehashfn(const struct sock *sk) -{ - const struct inet_sock *inet = inet_sk(sk); - const struct in6_addr *laddr = &sk->sk_v6_rcv_saddr; - const struct in6_addr *faddr = &sk->sk_v6_daddr; - const __u16 lport = inet->inet_num; - const __be16 fport = inet->inet_dport; - struct net *net = sock_net(sk); - - return inet6_ehashfn(net, laddr, lport, faddr, fport); -} - -int __inet6_hash(struct sock *sk, struct inet_timewait_sock *tw) -{ - struct inet_hashinfo *hashinfo = sk->sk_prot->h.hashinfo; - int twrefcnt = 0; - - WARN_ON(!sk_unhashed(sk)); - - if (sk->sk_state == TCP_LISTEN) { - struct inet_listen_hashbucket *ilb; - - ilb = &hashinfo->listening_hash[inet_sk_listen_hashfn(sk)]; - spin_lock(&ilb->lock); - __sk_nulls_add_node_rcu(sk, &ilb->head); - spin_unlock(&ilb->lock); - } else { - unsigned int hash; - struct hlist_nulls_head *list; - spinlock_t *lock; - - sk->sk_hash = hash = inet6_sk_ehashfn(sk); - list = &inet_ehash_bucket(hashinfo, hash)->chain; - lock = inet_ehash_lockp(hashinfo, hash); - spin_lock(lock); - __sk_nulls_add_node_rcu(sk, list); - if (tw) { - WARN_ON(sk->sk_hash != tw->tw_hash); - twrefcnt = inet_twsk_unhash(tw); - } - spin_unlock(lock); - } - - sock_prot_inuse_add(sock_net(sk), sk->sk_prot, 1); - return twrefcnt; -} -EXPORT_SYMBOL(__inet6_hash); - /* * Sockets in TCP_CLOSE state are _always_ taken out of the hash, so * we need not check it for TCP lookups anymore, thanks Alexey. -DaveM @@ -296,7 +246,7 @@ static int __inet6_check_established(struct inet_timewait_death_row *death_row, *twp = tw; } else if (tw) { /* Silly. Should hash-dance instead... */ - inet_twsk_deschedule(tw, death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); } @@ -307,7 +257,7 @@ not_unique: return -EADDRNOTAVAIL; } -static inline u32 inet6_sk_port_offset(const struct sock *sk) +static u32 inet6_sk_port_offset(const struct sock *sk) { const struct inet_sock *inet = inet_sk(sk); @@ -319,7 +269,11 @@ static inline u32 inet6_sk_port_offset(const struct sock *sk) int inet6_hash_connect(struct inet_timewait_death_row *death_row, struct sock *sk) { - return __inet_hash_connect(death_row, sk, inet6_sk_port_offset(sk), - __inet6_check_established, __inet6_hash); + u32 port_offset = 0; + + if (!inet_sk(sk)->inet_num) + port_offset = inet6_sk_port_offset(sk); + return __inet_hash_connect(death_row, sk, port_offset, + __inet6_check_established); } EXPORT_SYMBOL_GPL(inet6_hash_connect); diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 263ef4143bff..55d19861ab20 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -154,10 +154,32 @@ static void node_free(struct fib6_node *fn) kmem_cache_free(fib6_node_kmem, fn); } +static void rt6_free_pcpu(struct rt6_info *non_pcpu_rt) +{ + int cpu; + + if (!non_pcpu_rt->rt6i_pcpu) + return; + + for_each_possible_cpu(cpu) { + struct rt6_info **ppcpu_rt; + struct rt6_info *pcpu_rt; + + ppcpu_rt = per_cpu_ptr(non_pcpu_rt->rt6i_pcpu, cpu); + pcpu_rt = *ppcpu_rt; + if (pcpu_rt) { + dst_free(&pcpu_rt->dst); + *ppcpu_rt = NULL; + } + } +} + static void rt6_release(struct rt6_info *rt) { - if (atomic_dec_and_test(&rt->rt6i_ref)) + if (atomic_dec_and_test(&rt->rt6i_ref)) { + rt6_free_pcpu(rt); dst_free(&rt->dst); + } } static void fib6_link_table(struct net *net, struct fib6_table *tb) @@ -693,6 +715,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, { struct rt6_info *iter = NULL; struct rt6_info **ins; + struct rt6_info **fallback_ins = NULL; int replace = (info->nlh && (info->nlh->nlmsg_flags & NLM_F_REPLACE)); int add = (!info->nlh || @@ -716,8 +739,13 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, (info->nlh->nlmsg_flags & NLM_F_EXCL)) return -EEXIST; if (replace) { - found++; - break; + if (rt_can_ecmp == rt6_qualify_for_ecmp(iter)) { + found++; + break; + } + if (rt_can_ecmp) + fallback_ins = fallback_ins ?: ins; + goto next_iter; } if (iter->dst.dev == rt->dst.dev && @@ -732,6 +760,7 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, rt6_clean_expires(iter); else rt6_set_expires(iter, rt->dst.expires); + iter->rt6i_pmtu = rt->rt6i_pmtu; return -EEXIST; } /* If we have the same destination and the same metric, @@ -753,9 +782,17 @@ static int fib6_add_rt2node(struct fib6_node *fn, struct rt6_info *rt, if (iter->rt6i_metric > rt->rt6i_metric) break; +next_iter: ins = &iter->dst.rt6_next; } + if (fallback_ins && !found) { + /* No ECMP-able route found, replace first non-ECMP one */ + ins = fallback_ins; + iter = *ins; + found++; + } + /* Reset round-robin state, if necessary */ if (ins == &fn->leaf) fn->rr_ptr = NULL; @@ -815,6 +852,8 @@ add: } } else { + int nsiblings; + if (!found) { if (add) goto add; @@ -835,8 +874,27 @@ add: info->nl_net->ipv6.rt6_stats->fib_route_nodes++; fn->fn_flags |= RTN_RTINFO; } + nsiblings = iter->rt6i_nsiblings; fib6_purge_rt(iter, fn, info->nl_net); rt6_release(iter); + + if (nsiblings) { + /* Replacing an ECMP route, remove all siblings */ + ins = &rt->dst.rt6_next; + iter = *ins; + while (iter) { + if (rt6_qualify_for_ecmp(iter)) { + *ins = iter->dst.rt6_next; + fib6_purge_rt(iter, fn, info->nl_net); + rt6_release(iter); + nsiblings--; + } else { + ins = &iter->dst.rt6_next; + } + iter = *ins; + } + WARN_ON(nsiblings != 0); + } } return 0; @@ -1206,7 +1264,7 @@ static struct fib6_node *fib6_repair_tree(struct net *net, WARN_ON(fn->fn_flags & RTN_RTINFO); WARN_ON(fn->fn_flags & RTN_TL_ROOT); - WARN_ON(fn->leaf != NULL); + WARN_ON(fn->leaf); children = 0; child = NULL; @@ -1361,7 +1419,7 @@ int fib6_del(struct rt6_info *rt, struct nl_info *info) #if RT6_DEBUG >= 2 if (rt->dst.obsolete > 0) { - WARN_ON(fn != NULL); + WARN_ON(fn); return -ENOENT; } #endif diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index f45d6db50a45..1f9ebe3cbb4a 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -100,7 +100,6 @@ static void fl_free(struct ip6_flowlabel *fl) if (fl) { if (fl->share == IPV6_FL_S_PROCESS) put_pid(fl->owner.pid); - release_net(fl->fl_net); kfree(fl->opt); kfree_rcu(fl, rcu); } @@ -206,7 +205,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, fl->label = htonl(prandom_u32())&IPV6_FLOWLABEL_MASK; if (fl->label) { lfl = __fl_lookup(net, fl->label); - if (lfl == NULL) + if (!lfl) break; } } @@ -220,7 +219,7 @@ static struct ip6_flowlabel *fl_intern(struct net *net, * with the same label can only appear on another sock */ lfl = __fl_lookup(net, fl->label); - if (lfl != NULL) { + if (lfl) { atomic_inc(&lfl->users); spin_unlock_bh(&ip6_fl_lock); return lfl; @@ -298,10 +297,10 @@ struct ipv6_txoptions *fl6_merge_options(struct ipv6_txoptions *opt_space, { struct ipv6_txoptions *fl_opt = fl->opt; - if (fopt == NULL || fopt->opt_flen == 0) + if (!fopt || fopt->opt_flen == 0) return fl_opt; - if (fl_opt != NULL) { + if (fl_opt) { opt_space->hopopt = fl_opt->hopopt; opt_space->dst0opt = fl_opt->dst0opt; opt_space->srcrt = fl_opt->srcrt; @@ -367,7 +366,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, err = -ENOMEM; fl = kzalloc(sizeof(*fl), GFP_KERNEL); - if (fl == NULL) + if (!fl) goto done; if (olen > 0) { @@ -377,7 +376,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, err = -ENOMEM; fl->opt = kmalloc(sizeof(*fl->opt) + olen, GFP_KERNEL); - if (fl->opt == NULL) + if (!fl->opt) goto done; memset(fl->opt, 0, sizeof(*fl->opt)); @@ -403,7 +402,7 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, } } - fl->fl_net = hold_net(net); + fl->fl_net = net; fl->expires = jiffies; err = fl6_renew(fl, freq->flr_linger, freq->flr_expires); if (err) @@ -596,8 +595,12 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) if (freq.flr_label & ~IPV6_FLOWLABEL_MASK) return -EINVAL; + if (net->ipv6.sysctl.flowlabel_state_ranges && + (freq.flr_label & IPV6_FLOWLABEL_STATELESS_FLAG)) + return -ERANGE; + fl = fl_create(net, sk, &freq, optval, optlen, &err); - if (fl == NULL) + if (!fl) return err; sfl1 = kmalloc(sizeof(*sfl1), GFP_KERNEL); @@ -617,7 +620,7 @@ int ipv6_flowlabel_opt(struct sock *sk, char __user *optval, int optlen) } rcu_read_unlock_bh(); - if (fl1 == NULL) + if (!fl1) fl1 = fl_lookup(net, freq.flr_label); if (fl1) { recheck: @@ -634,7 +637,7 @@ recheck: goto release; err = -ENOMEM; - if (sfl1 == NULL) + if (!sfl1) goto release; if (fl->linger > fl1->linger) fl1->linger = fl->linger; @@ -654,7 +657,7 @@ release: goto done; err = -ENOMEM; - if (sfl1 == NULL) + if (!sfl1) goto done; err = mem_check(sk); @@ -662,7 +665,7 @@ release: goto done; fl1 = fl_intern(net, fl, freq.flr_label); - if (fl1 != NULL) + if (fl1) goto recheck; if (!freq.flr_label) { diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index bc28b7d42a6d..a38d3ac0f18f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -223,7 +223,7 @@ static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, } } - if (cand != NULL) + if (cand) return cand; dev = ign->fb_tunnel_dev; @@ -395,7 +395,7 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, p[1]); - if (t == NULL) + if (!t) return; switch (type) { @@ -760,7 +760,7 @@ static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, skb_set_inner_protocol(skb, protocol); - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(NULL, skb, dev); if (ndst) ip6_tnl_dst_store(tunnel, ndst); return 0; @@ -980,7 +980,7 @@ static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) &p->raddr, &p->laddr, p->link, strict); - if (rt == NULL) + if (!rt) return; if (rt->dst.dev) { @@ -1073,7 +1073,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, } ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); - if (t == NULL) + if (!t) t = netdev_priv(dev); } memset(&p, 0, sizeof(p)); @@ -1105,7 +1105,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -1144,7 +1144,7 @@ static int ip6gre_tunnel_ioctl(struct net_device *dev, err = -ENOENT; ip6gre_tnl_parm_from_user(&p1, &p); t = ip6gre_tunnel_locate(net, &p1, 0); - if (t == NULL) + if (!t) goto done; err = -EPERM; if (t == netdev_priv(ign->fb_tunnel_dev)) @@ -1216,6 +1216,7 @@ static const struct net_device_ops ip6gre_netdev_ops = { .ndo_do_ioctl = ip6gre_tunnel_ioctl, .ndo_change_mtu = ip6gre_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip6_tnl_get_iflink, }; static void ip6gre_dev_free(struct net_device *dev) @@ -1238,7 +1239,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) dev->mtu -= 8; dev->flags |= IFF_NOARP; - dev->iflink = 0; dev->addr_len = sizeof(struct in6_addr); netif_keep_dst(dev); } @@ -1246,7 +1246,6 @@ static void ip6gre_tunnel_setup(struct net_device *dev) static int ip6gre_tunnel_init(struct net_device *dev) { struct ip6_tnl *tunnel; - int i; tunnel = netdev_priv(dev); @@ -1260,18 +1259,10 @@ static int ip6gre_tunnel_init(struct net_device *dev) if (ipv6_addr_any(&tunnel->parms.raddr)) dev->header_ops = &ip6gre_header_ops; - dev->tstats = alloc_percpu(struct pcpu_sw_netstats); + dev->tstats = netdev_alloc_pcpu_stats(struct pcpu_sw_netstats); if (!dev->tstats) return -ENOMEM; - for_each_possible_cpu(i) { - struct pcpu_sw_netstats *ip6gre_tunnel_stats; - ip6gre_tunnel_stats = per_cpu_ptr(dev->tstats, i); - u64_stats_init(&ip6gre_tunnel_stats->syncp); - } - - dev->iflink = tunnel->parms.link; - return 0; } @@ -1313,7 +1304,7 @@ static void ip6gre_destroy_tunnels(struct net *net, struct list_head *head) t = rtnl_dereference(ign->tunnels[prio][h]); - while (t != NULL) { + while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ @@ -1412,7 +1403,7 @@ static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) goto out; if (data[IFLA_GRE_REMOTE]) { - nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + daddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]); if (ipv6_addr_any(&daddr)) return -EINVAL; } @@ -1446,10 +1437,10 @@ static void ip6gre_netlink_parms(struct nlattr *data[], parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); if (data[IFLA_GRE_LOCAL]) - nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr)); + parms->laddr = nla_get_in6_addr(data[IFLA_GRE_LOCAL]); if (data[IFLA_GRE_REMOTE]) - nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + parms->raddr = nla_get_in6_addr(data[IFLA_GRE_REMOTE]); if (data[IFLA_GRE_TTL]) parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]); @@ -1480,8 +1471,6 @@ static int ip6gre_tap_init(struct net_device *dev) if (!dev->tstats) return -ENOMEM; - dev->iflink = tunnel->parms.link; - return 0; } @@ -1493,6 +1482,7 @@ static const struct net_device_ops ip6gre_tap_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = ip6gre_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip6_tnl_get_iflink, }; static void ip6gre_tap_setup(struct net_device *dev) @@ -1503,7 +1493,6 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->netdev_ops = &ip6gre_tap_netdev_ops; dev->destructor = ip6gre_dev_free; - dev->iflink = 0; dev->features |= NETIF_F_NETNS_LOCAL; } @@ -1622,8 +1611,8 @@ static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || - nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->laddr) || - nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->raddr) || + nla_put_in6_addr(skb, IFLA_GRE_LOCAL, &p->laddr) || + nla_put_in6_addr(skb, IFLA_GRE_REMOTE, &p->raddr) || nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/ nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index aacdcb4dc762..57990c929cd8 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -46,8 +46,7 @@ #include <net/xfrm.h> #include <net/inet_ecn.h> - -int ip6_rcv_finish(struct sk_buff *skb) +int ip6_rcv_finish(struct sock *sk, struct sk_buff *skb) { if (sysctl_ip_early_demux && !skb_dst(skb) && skb->sk == NULL) { const struct inet6_protocol *ipprot; @@ -183,7 +182,8 @@ int ipv6_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *pt /* Must drop socket now because of tproxy. */ skb_orphan(skb); - return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, dev, NULL, + return NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, + dev, NULL, ip6_rcv_finish); err: IP6_INC_STATS_BH(net, idev, IPSTATS_MIB_INHDRERRORS); @@ -198,7 +198,7 @@ drop: */ -static int ip6_input_finish(struct sk_buff *skb) +static int ip6_input_finish(struct sock *sk, struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); const struct inet6_protocol *ipprot; @@ -221,7 +221,7 @@ resubmit: raw = raw6_local_deliver(skb, nexthdr); ipprot = rcu_dereference(inet6_protos[nexthdr]); - if (ipprot != NULL) { + if (ipprot) { int ret; if (ipprot->flags & INET6_PROTO_FINAL) { @@ -277,7 +277,8 @@ discard: int ip6_input(struct sk_buff *skb) { - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, skb, skb->dev, NULL, + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_IN, NULL, skb, + skb->dev, NULL, ip6_input_finish); } @@ -330,10 +331,10 @@ int ip6_mc_input(struct sk_buff *skb) if (offset < 0) goto out; - if (!ipv6_is_mld(skb, nexthdr, offset)) - goto out; + if (ipv6_is_mld(skb, nexthdr, offset)) + deliver = true; - deliver = true; + goto out; } /* unknown RA - process it normally */ } diff --git a/net/ipv6/ip6_offload.c b/net/ipv6/ip6_offload.c index 46d452a56d3e..08b62047c67f 100644 --- a/net/ipv6/ip6_offload.c +++ b/net/ipv6/ip6_offload.c @@ -124,7 +124,7 @@ static struct sk_buff *ipv6_gso_segment(struct sk_buff *skb, unfrag_ip6hlen = ip6_find_1stfragopt(skb, &prevhdr); fptr = (struct frag_hdr *)((u8 *)ipv6h + unfrag_ip6hlen); fptr->frag_off = htons(offset); - if (skb->next != NULL) + if (skb->next) fptr->frag_off |= htons(IP6_MF); offset += (ntohs(ipv6h->payload_len) - sizeof(struct frag_hdr)); @@ -292,8 +292,6 @@ static struct packet_offload ipv6_packet_offload __read_mostly = { static const struct net_offload sit_offload = { .callbacks = { .gso_segment = ipv6_gso_segment, - .gro_receive = ipv6_gro_receive, - .gro_complete = ipv6_gro_complete, }, }; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 36cf0ab685a0..d5f7716662db 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -56,7 +56,7 @@ #include <net/checksum.h> #include <linux/mroute6.h> -static int ip6_finish_output2(struct sk_buff *skb) +static int ip6_finish_output2(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct net_device *dev = dst->dev; @@ -70,7 +70,7 @@ static int ip6_finish_output2(struct sk_buff *skb) if (ipv6_addr_is_multicast(&ipv6_hdr(skb)->daddr)) { struct inet6_dev *idev = ip6_dst_idev(skb_dst(skb)); - if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(skb->sk) && + if (!(dev->flags & IFF_LOOPBACK) && sk_mc_loop(sk) && ((mroute6_socket(dev_net(dev), skb) && !(IP6CB(skb)->flags & IP6SKB_FORWARDED)) || ipv6_chk_mcast_addr(dev, &ipv6_hdr(skb)->daddr, @@ -82,7 +82,7 @@ static int ip6_finish_output2(struct sk_buff *skb) */ if (newskb) NF_HOOK(NFPROTO_IPV6, NF_INET_POST_ROUTING, - newskb, NULL, newskb->dev, + sk, newskb, NULL, newskb->dev, dev_loopback_xmit); if (ipv6_hdr(skb)->hop_limit == 0) { @@ -105,7 +105,7 @@ static int ip6_finish_output2(struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop((struct rt6_info *)dst); + nexthop = rt6_nexthop((struct rt6_info *)dst, &ipv6_hdr(skb)->daddr); neigh = __ipv6_neigh_lookup_noref(dst->dev, nexthop); if (unlikely(!neigh)) neigh = __neigh_create(&nd_tbl, nexthop, dst->dev, false); @@ -122,14 +122,14 @@ static int ip6_finish_output2(struct sk_buff *skb) return -EINVAL; } -static int ip6_finish_output(struct sk_buff *skb) +static int ip6_finish_output(struct sock *sk, struct sk_buff *skb) { if ((skb->len > ip6_skb_dst_mtu(skb) && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)) || (IP6CB(skb)->frag_max_size && skb->len > IP6CB(skb)->frag_max_size)) - return ip6_fragment(skb, ip6_finish_output2); + return ip6_fragment(sk, skb, ip6_finish_output2); else - return ip6_finish_output2(skb); + return ip6_finish_output2(sk, skb); } int ip6_output(struct sock *sk, struct sk_buff *skb) @@ -143,7 +143,8 @@ int ip6_output(struct sock *sk, struct sk_buff *skb) return 0; } - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, NULL, dev, + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, + NULL, dev, ip6_finish_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } @@ -177,7 +178,7 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if (skb_headroom(skb) < head_room) { struct sk_buff *skb2 = skb_realloc_headroom(skb, head_room); - if (skb2 == NULL) { + if (!skb2) { IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTDISCARDS); kfree_skb(skb); @@ -223,8 +224,8 @@ int ip6_xmit(struct sock *sk, struct sk_buff *skb, struct flowi6 *fl6, if ((skb->len <= mtu) || skb->ignore_df || skb_is_gso(skb)) { IP6_UPD_PO_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUT, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - dst->dev, dst_output); + return NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, dst->dev, dst_output_sk); } skb->dev = dst->dev; @@ -316,10 +317,10 @@ static int ip6_forward_proxy_check(struct sk_buff *skb) return 0; } -static inline int ip6_forward_finish(struct sk_buff *skb) +static inline int ip6_forward_finish(struct sock *sk, struct sk_buff *skb) { skb_sender_cpu_clear(skb); - return dst_output(skb); + return dst_output_sk(sk, skb); } static unsigned int ip6_dst_mtu_forward(const struct dst_entry *dst) @@ -458,7 +459,7 @@ int ip6_forward(struct sk_buff *skb) else target = &hdr->daddr; - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + peer = inet_getpeer_v6(net->ipv6.peers, &hdr->daddr, 1); /* Limit redirects both by destination (here) and by source (inside ndisc_send_redirect) @@ -511,7 +512,8 @@ int ip6_forward(struct sk_buff *skb) IP6_INC_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(net, ip6_dst_idev(dst), IPSTATS_MIB_OUTOCTETS, skb->len); - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dst->dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, + skb->dev, dst->dev, ip6_forward_finish); error: @@ -538,7 +540,8 @@ static void ip6_copy_metadata(struct sk_buff *to, struct sk_buff *from) skb_copy_secmark(to, from); } -int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) +int ip6_fragment(struct sock *sk, struct sk_buff *skb, + int (*output)(struct sock *, struct sk_buff *)) { struct sk_buff *frag; struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); @@ -548,7 +551,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) struct frag_hdr *fh; unsigned int mtu, hlen, left, len; int hroom, troom; - __be32 frag_id = 0; + __be32 frag_id; int ptr, offset = 0, err = 0; u8 *prevhdr, nexthdr = 0; struct net *net = dev_net(skb_dst(skb)->dev); @@ -561,18 +564,17 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->ignore_df && skb->len > mtu) || - (IP6CB(skb)->frag_max_size && - IP6CB(skb)->frag_max_size > mtu)) { - if (skb->sk && dst_allfrag(skb_dst(skb))) - sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); + if (unlikely(!skb->ignore_df && skb->len > mtu)) + goto fail_toobig; - skb->dev = skb_dst(skb)->dev; - icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); - IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), - IPSTATS_MIB_FRAGFAILS); - kfree_skb(skb); - return -EMSGSIZE; + if (IP6CB(skb)->frag_max_size) { + if (IP6CB(skb)->frag_max_size > mtu) + goto fail_toobig; + + /* don't send fragments larger than what we received */ + mtu = IP6CB(skb)->frag_max_size; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; } if (np && np->frag_size < mtu) { @@ -581,6 +583,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) } mtu -= hlen + sizeof(struct frag_hdr); + frag_id = ipv6_select_ident(net, &ipv6_hdr(skb)->daddr, + &ipv6_hdr(skb)->saddr); + if (skb_has_frag_list(skb)) { int first_len = skb_pagelen(skb); struct sk_buff *frag2; @@ -629,11 +634,10 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) skb_reset_network_header(skb); memcpy(skb_network_header(skb), tmp_hdr, hlen); - ipv6_select_ident(fh, rt); fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(IP6_MF); - frag_id = fh->identification; + fh->identification = frag_id; first_len = skb_pagelen(skb); skb->data_len = first_len - skb_headlen(skb); @@ -658,7 +662,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) fh->nexthdr = nexthdr; fh->reserved = 0; fh->frag_off = htons(offset); - if (frag->next != NULL) + if (frag->next) fh->frag_off |= htons(IP6_MF); fh->identification = frag_id; ipv6_hdr(frag)->payload_len = @@ -667,7 +671,7 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) ip6_copy_metadata(frag, skb); } - err = output(skb); + err = output(sk, skb); if (!err) IP6_INC_STATS(net, ip6_dst_idev(&rt->dst), IPSTATS_MIB_FRAGCREATES); @@ -775,11 +779,7 @@ slow_path: */ fh->nexthdr = nexthdr; fh->reserved = 0; - if (!frag_id) { - ipv6_select_ident(fh, rt); - frag_id = fh->identification; - } else - fh->identification = frag_id; + fh->identification = frag_id; /* * Copy a block of the IP datagram. @@ -800,7 +800,7 @@ slow_path: /* * Put this fragment into the sending queue. */ - err = output(frag); + err = output(sk, frag); if (err) goto fail; @@ -812,6 +812,14 @@ slow_path: consume_skb(skb); return err; +fail_toobig: + if (skb->sk && dst_allfrag(skb_dst(skb))) + sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); + + skb->dev = skb_dst(skb)->dev; + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + err = -EMSGSIZE; + fail: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_FRAGFAILS); @@ -824,7 +832,7 @@ static inline int ip6_rt_check(const struct rt6key *rt_key, const struct in6_addr *addr_cache) { return (rt_key->plen != 128 || !ipv6_addr_equal(fl_addr, &rt_key->addr)) && - (addr_cache == NULL || !ipv6_addr_equal(fl_addr, addr_cache)); + (!addr_cache || !ipv6_addr_equal(fl_addr, addr_cache)); } static struct dst_entry *ip6_sk_dst_check(struct sock *sk, @@ -883,22 +891,45 @@ static int ip6_dst_lookup_tail(struct sock *sk, #endif int err; - if (*dst == NULL) - *dst = ip6_route_output(net, sk, fl6); - - err = (*dst)->error; - if (err) - goto out_err_release; + /* The correct way to handle this would be to do + * ip6_route_get_saddr, and then ip6_route_output; however, + * the route-specific preferred source forces the + * ip6_route_output call _before_ ip6_route_get_saddr. + * + * In source specific routing (no src=any default route), + * ip6_route_output will fail given src=any saddr, though, so + * that's why we try it again later. + */ + if (ipv6_addr_any(&fl6->saddr) && (!*dst || !(*dst)->error)) { + struct rt6_info *rt; + bool had_dst = *dst != NULL; - if (ipv6_addr_any(&fl6->saddr)) { - struct rt6_info *rt = (struct rt6_info *) *dst; + if (!had_dst) + *dst = ip6_route_output(net, sk, fl6); + rt = (*dst)->error ? NULL : (struct rt6_info *)*dst; err = ip6_route_get_saddr(net, rt, &fl6->daddr, sk ? inet6_sk(sk)->srcprefs : 0, &fl6->saddr); if (err) goto out_err_release; + + /* If we had an erroneous initial result, pretend it + * never existed and let the SA-enabled version take + * over. + */ + if (!had_dst && (*dst)->error) { + dst_release(*dst); + *dst = NULL; + } } + if (!*dst) + *dst = ip6_route_output(net, sk, fl6); + + err = (*dst)->error; + if (err) + goto out_err_release; + #ifdef CONFIG_IPV6_OPTIMISTIC_DAD /* * Here if the dst entry we've looked up @@ -910,7 +941,8 @@ static int ip6_dst_lookup_tail(struct sock *sk, */ rt = (struct rt6_info *) *dst; rcu_read_lock_bh(); - n = __ipv6_neigh_lookup_noref(rt->dst.dev, rt6_nexthop(rt)); + n = __ipv6_neigh_lookup_noref(rt->dst.dev, + rt6_nexthop(rt, &fl6->daddr)); err = n && !(n->nud_state & NUD_VALID) ? -EINVAL : 0; rcu_read_unlock_bh(); @@ -1034,11 +1066,10 @@ static inline int ip6_ufo_append_data(struct sock *sk, int odd, struct sk_buff *skb), void *from, int length, int hh_len, int fragheaderlen, int transhdrlen, int mtu, unsigned int flags, - struct rt6_info *rt) + const struct flowi6 *fl6) { struct sk_buff *skb; - struct frag_hdr fhdr; int err; /* There is support for UDP large send offload by network @@ -1046,11 +1077,11 @@ static inline int ip6_ufo_append_data(struct sock *sk, * udp datagram */ skb = skb_peek_tail(queue); - if (skb == NULL) { + if (!skb) { skb = sock_alloc_send_skb(sk, hh_len + fragheaderlen + transhdrlen + 20, (flags & MSG_DONTWAIT), &err); - if (skb == NULL) + if (!skb) return err; /* reserve space for Hardware header */ @@ -1080,8 +1111,9 @@ static inline int ip6_ufo_append_data(struct sock *sk, skb_shinfo(skb)->gso_size = (mtu - fragheaderlen - sizeof(struct frag_hdr)) & ~7; skb_shinfo(skb)->gso_type = SKB_GSO_UDP; - ipv6_select_ident(&fhdr, rt); - skb_shinfo(skb)->ip6_frag_id = fhdr.identification; + skb_shinfo(skb)->ip6_frag_id = ipv6_select_ident(sock_net(sk), + &fl6->daddr, + &fl6->saddr); append: return skb_append_datato_frags(sk, skb, getfrag, from, @@ -1108,7 +1140,7 @@ static void ip6_append_data_mtu(unsigned int *mtu, unsigned int orig_mtu) { if (!(rt->dst.flags & DST_XFRM_TUNNEL)) { - if (skb == NULL) { + if (!skb) { /* first fragment, reserve header_len */ *mtu = orig_mtu - rt->dst.header_len; @@ -1140,7 +1172,7 @@ static int ip6_setup_cork(struct sock *sk, struct inet_cork_full *cork, return -EINVAL; v6_cork->opt = kzalloc(opt->tot_len, sk->sk_allocation); - if (unlikely(v6_cork->opt == NULL)) + if (unlikely(!v6_cork->opt)) return -ENOBUFS; v6_cork->opt->tot_len = opt->tot_len; @@ -1274,8 +1306,10 @@ emsgsize: /* If this is the first and only packet and device * supports checksum offloading, let's use it. + * Use transhdrlen, same as IPv4, because partial + * sums only work when transhdrlen is set. */ - if (!skb && sk->sk_protocol == IPPROTO_UDP && + if (transhdrlen && sk->sk_protocol == IPPROTO_UDP && length + fragheaderlen < mtu && rt->dst.dev->features & NETIF_F_V6_CSUM && !exthdrlen) @@ -1304,7 +1338,7 @@ emsgsize: (sk->sk_type == SOCK_DGRAM)) { err = ip6_ufo_append_data(sk, queue, getfrag, from, length, hh_len, fragheaderlen, - transhdrlen, mtu, flags, rt); + transhdrlen, mtu, flags, fl6); if (err) goto error; return 0; @@ -1332,7 +1366,7 @@ alloc_new_skb: else fraggap = 0; /* update mtu and maxfraglen if necessary */ - if (skb == NULL || skb_prev == NULL) + if (!skb || !skb_prev) ip6_append_data_mtu(&mtu, &maxfraglen, fragheaderlen, skb, rt, orig_mtu); @@ -1384,10 +1418,10 @@ alloc_new_skb: skb = sock_wmalloc(sk, alloclen + hh_len, 1, sk->sk_allocation); - if (unlikely(skb == NULL)) + if (unlikely(!skb)) err = -ENOBUFS; } - if (skb == NULL) + if (!skb) goto error; /* * Fill in the control structures @@ -1579,7 +1613,7 @@ struct sk_buff *__ip6_make_skb(struct sock *sk, unsigned char proto = fl6->flowi6_proto; skb = __skb_dequeue(queue); - if (skb == NULL) + if (!skb) goto out; tail_skb = &(skb_shinfo(skb)->frag_list); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index ddd94eca19b3..2e67b660118b 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -64,12 +64,6 @@ MODULE_LICENSE("GPL"); MODULE_ALIAS_RTNL_LINK("ip6tnl"); MODULE_ALIAS_NETDEV("ip6tnl0"); -#ifdef IP6_TNL_DEBUG -#define IP6_TNL_TRACE(x...) pr_debug("%s:" x "\n", __func__) -#else -#define IP6_TNL_TRACE(x...) do {;} while(0) -#endif - #define HASH_SIZE_SHIFT 5 #define HASH_SIZE (1 << HASH_SIZE_SHIFT) @@ -137,7 +131,7 @@ struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) struct dst_entry *dst = t->dst_cache; if (dst && dst->obsolete && - dst->ops->check(dst, t->dst_cookie) == NULL) { + !dst->ops->check(dst, t->dst_cookie)) { t->dst_cache = NULL; dst_release(dst); return NULL; @@ -157,7 +151,7 @@ EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; - t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; + t->dst_cookie = rt6_get_cookie(rt); dst_release(t->dst_cache); t->dst_cache = dst; } @@ -331,7 +325,7 @@ static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ip6_tnl_dev_setup); - if (dev == NULL) + if (!dev) goto failed; dev_net_set(dev, net); @@ -502,7 +496,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, rcu_read_lock(); t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->daddr, &ipv6h->saddr); - if (t == NULL) + if (!t) goto out; tproto = ACCESS_ONCE(t->parms.proto); @@ -813,7 +807,7 @@ static int ip6_tnl_rcv(struct sk_buff *skb, __u16 protocol, rcu_read_lock(); t = ip6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); - if (t != NULL) { + if (t) { struct pcpu_sw_netstats *tstats; tproto = ACCESS_ONCE(t->parms.proto); @@ -1106,7 +1100,7 @@ static int ip6_tnl_xmit2(struct sk_buff *skb, ipv6h->nexthdr = proto; ipv6h->saddr = fl6->saddr; ipv6h->daddr = fl6->daddr; - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(NULL, skb, dev); if (ndst) ip6_tnl_dst_store(t, ndst); return 0; @@ -1270,8 +1264,6 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) else dev->flags &= ~IFF_POINTOPOINT; - dev->iflink = p->link; - if (p->flags & IP6_TNL_F_CAP_XMIT) { int strict = (ipv6_addr_type(&p->raddr) & (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); @@ -1280,7 +1272,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) &p->raddr, &p->laddr, p->link, strict); - if (rt == NULL) + if (!rt) return; if (rt->dst.dev) { @@ -1523,6 +1515,13 @@ ip6_tnl_change_mtu(struct net_device *dev, int new_mtu) return 0; } +int ip6_tnl_get_iflink(const struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + + return t->parms.link; +} +EXPORT_SYMBOL(ip6_tnl_get_iflink); static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_init = ip6_tnl_dev_init, @@ -1531,6 +1530,7 @@ static const struct net_device_ops ip6_tnl_netdev_ops = { .ndo_do_ioctl = ip6_tnl_ioctl, .ndo_change_mtu = ip6_tnl_change_mtu, .ndo_get_stats = ip6_get_stats, + .ndo_get_iflink = ip6_tnl_get_iflink, }; @@ -1646,12 +1646,10 @@ static void ip6_tnl_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_IPTUN_LINK]); if (data[IFLA_IPTUN_LOCAL]) - nla_memcpy(&parms->laddr, data[IFLA_IPTUN_LOCAL], - sizeof(struct in6_addr)); + parms->laddr = nla_get_in6_addr(data[IFLA_IPTUN_LOCAL]); if (data[IFLA_IPTUN_REMOTE]) - nla_memcpy(&parms->raddr, data[IFLA_IPTUN_REMOTE], - sizeof(struct in6_addr)); + parms->raddr = nla_get_in6_addr(data[IFLA_IPTUN_REMOTE]); if (data[IFLA_IPTUN_TTL]) parms->hop_limit = nla_get_u8(data[IFLA_IPTUN_TTL]); @@ -1745,10 +1743,8 @@ static int ip6_tnl_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put(skb, IFLA_IPTUN_LOCAL, sizeof(struct in6_addr), - &parm->laddr) || - nla_put(skb, IFLA_IPTUN_REMOTE, sizeof(struct in6_addr), - &parm->raddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_LOCAL, &parm->laddr) || + nla_put_in6_addr(skb, IFLA_IPTUN_REMOTE, &parm->raddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->hop_limit) || nla_put_u8(skb, IFLA_IPTUN_ENCAP_LIMIT, parm->encap_limit) || nla_put_be32(skb, IFLA_IPTUN_FLOWINFO, parm->flowinfo) || @@ -1821,7 +1817,7 @@ static void __net_exit ip6_tnl_destroy_tunnels(struct net *net) for (h = 0; h < HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); - while (t != NULL) { + while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c index 32d9b268e7d8..e1a1136bda7c 100644 --- a/net/ipv6/ip6_udp_tunnel.c +++ b/net/ipv6/ip6_udp_tunnel.c @@ -19,12 +19,10 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, int err; struct socket *sock = NULL; - err = sock_create_kern(AF_INET6, SOCK_DGRAM, 0, &sock); + err = sock_create_kern(net, AF_INET6, SOCK_DGRAM, 0, &sock); if (err < 0) goto error; - sk_change_net(sock->sk, net); - udp6_addr.sin6_family = AF_INET6; memcpy(&udp6_addr.sin6_addr, &cfg->local_ip6, sizeof(udp6_addr.sin6_addr)); @@ -55,14 +53,15 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg, error: if (sock) { kernel_sock_shutdown(sock, SHUT_RDWR); - sk_release_kernel(sock->sk); + sock_release(sock); } *sockp = NULL; return err; } EXPORT_SYMBOL_GPL(udp_sock_create6); -int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, +int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, struct net_device *dev, struct in6_addr *saddr, struct in6_addr *daddr, __u8 prio, __u8 ttl, __be16 src_port, @@ -97,7 +96,7 @@ int udp_tunnel6_xmit_skb(struct dst_entry *dst, struct sk_buff *skb, ip6h->daddr = *daddr; ip6h->saddr = *saddr; - ip6tunnel_xmit(skb, dev); + ip6tunnel_xmit(sk, skb, dev); return 0; } EXPORT_SYMBOL_GPL(udp_tunnel6_xmit_skb); diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c index 5fb9e212eca8..0224c032dca5 100644 --- a/net/ipv6/ip6_vti.c +++ b/net/ipv6/ip6_vti.c @@ -218,7 +218,7 @@ static struct ip6_tnl *vti6_tnl_create(struct net *net, struct __ip6_tnl_parm *p sprintf(name, "ip6_vti%%d"); dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, vti6_dev_setup); - if (dev == NULL) + if (!dev) goto failed; dev_net_set(dev, net); @@ -288,8 +288,7 @@ static struct ip6_tnl *vti6_locate(struct net *net, struct __ip6_tnl_parm *p, static void vti6_dev_uninit(struct net_device *dev) { struct ip6_tnl *t = netdev_priv(dev); - struct net *net = dev_net(dev); - struct vti6_net *ip6n = net_generic(net, vti6_net_id); + struct vti6_net *ip6n = net_generic(t->net, vti6_net_id); if (dev == ip6n->fb_tnl_dev) RCU_INIT_POINTER(ip6n->tnls_wc[0], NULL); @@ -305,7 +304,7 @@ static int vti6_rcv(struct sk_buff *skb) rcu_read_lock(); t = vti6_tnl_lookup(dev_net(skb->dev), &ipv6h->saddr, &ipv6h->daddr); - if (t != NULL) { + if (t) { if (t->parms.proto != IPPROTO_IPV6 && t->parms.proto != 0) { rcu_read_unlock(); goto discard; @@ -323,7 +322,6 @@ static int vti6_rcv(struct sk_buff *skb) } XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t; - skb->mark = be32_to_cpu(t->parms.i_key); rcu_read_unlock(); @@ -343,6 +341,8 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) struct pcpu_sw_netstats *tstats; struct xfrm_state *x; struct ip6_tnl *t = XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6; + u32 orig_mark = skb->mark; + int ret; if (!t) return 1; @@ -359,7 +359,11 @@ static int vti6_rcv_cb(struct sk_buff *skb, int err) x = xfrm_input_state(skb); family = x->inner_mode->afinfo->family; - if (!xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family)) + skb->mark = be32_to_cpu(t->parms.i_key); + ret = xfrm_policy_check(NULL, XFRM_POLICY_IN, skb, family); + skb->mark = orig_mark; + + if (!ret) return -EPERM; skb_scrub_packet(skb, !net_eq(t->net, dev_net(skb->dev))); @@ -431,6 +435,7 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) struct net_device *tdev; struct xfrm_state *x; int err = -1; + int mtu; if (!dst) goto tx_err_link_failure; @@ -464,6 +469,19 @@ vti6_xmit(struct sk_buff *skb, struct net_device *dev, struct flowi *fl) skb_dst_set(skb, dst); skb->dev = skb_dst(skb)->dev; + mtu = dst_mtu(dst); + if (!skb->ignore_df && skb->len > mtu) { + skb_dst(skb)->ops->update_pmtu(dst, NULL, skb, mtu); + + if (skb->protocol == htons(ETH_P_IPV6)) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + else + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + + return -EMSGSIZE; + } + err = dst_output(skb); if (net_xmit_eval(err) == 0) { struct pcpu_sw_netstats *tstats = this_cpu_ptr(dev->tstats); @@ -496,7 +514,6 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) int ret; memset(&fl, 0, sizeof(fl)); - skb->mark = be32_to_cpu(t->parms.o_key); switch (skb->protocol) { case htons(ETH_P_IPV6): @@ -517,6 +534,9 @@ vti6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) goto tx_err; } + /* override mark with tunnel output key */ + fl.flowi_mark = be32_to_cpu(t->parms.o_key); + ret = vti6_xmit(skb, dev, &fl); if (ret < 0) goto tx_err; @@ -601,8 +621,6 @@ static void vti6_link_config(struct ip6_tnl *t) dev->flags |= IFF_POINTOPOINT; else dev->flags &= ~IFF_POINTOPOINT; - - dev->iflink = p->link; } /** @@ -716,7 +734,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } else { memset(&p, 0, sizeof(p)); } - if (t == NULL) + if (!t) t = netdev_priv(dev); vti6_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) @@ -736,7 +754,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) vti6_parm_from_user(&p1, &p); t = vti6_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -767,7 +785,7 @@ vti6_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = -ENOENT; vti6_parm_from_user(&p1, &p); t = vti6_locate(net, &p1, 0); - if (t == NULL) + if (!t) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) @@ -808,6 +826,7 @@ static const struct net_device_ops vti6_netdev_ops = { .ndo_do_ioctl = vti6_ioctl, .ndo_change_mtu = vti6_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip6_tnl_get_iflink, }; /** @@ -897,12 +916,10 @@ static void vti6_netlink_parms(struct nlattr *data[], parms->link = nla_get_u32(data[IFLA_VTI_LINK]); if (data[IFLA_VTI_LOCAL]) - nla_memcpy(&parms->laddr, data[IFLA_VTI_LOCAL], - sizeof(struct in6_addr)); + parms->laddr = nla_get_in6_addr(data[IFLA_VTI_LOCAL]); if (data[IFLA_VTI_REMOTE]) - nla_memcpy(&parms->raddr, data[IFLA_VTI_REMOTE], - sizeof(struct in6_addr)); + parms->raddr = nla_get_in6_addr(data[IFLA_VTI_REMOTE]); if (data[IFLA_VTI_IKEY]) parms->i_key = nla_get_be32(data[IFLA_VTI_IKEY]); @@ -983,10 +1000,8 @@ static int vti6_fill_info(struct sk_buff *skb, const struct net_device *dev) struct __ip6_tnl_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_VTI_LINK, parm->link) || - nla_put(skb, IFLA_VTI_LOCAL, sizeof(struct in6_addr), - &parm->laddr) || - nla_put(skb, IFLA_VTI_REMOTE, sizeof(struct in6_addr), - &parm->raddr) || + nla_put_in6_addr(skb, IFLA_VTI_LOCAL, &parm->laddr) || + nla_put_in6_addr(skb, IFLA_VTI_REMOTE, &parm->raddr) || nla_put_be32(skb, IFLA_VTI_IKEY, parm->i_key) || nla_put_be32(skb, IFLA_VTI_OKEY, parm->o_key)) goto nla_put_failure; @@ -1027,7 +1042,7 @@ static void __net_exit vti6_destroy_tunnels(struct vti6_net *ip6n) for (h = 0; h < HASH_SIZE; h++) { t = rtnl_dereference(ip6n->tnls_r_l[h]); - while (t != NULL) { + while (t) { unregister_netdevice_queue(t->dev, &list); t = rtnl_dereference(t->next); } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 312e0ff47339..74ceb73c1c9a 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -56,9 +56,7 @@ struct mr6_table { struct list_head list; -#ifdef CONFIG_NET_NS - struct net *net; -#endif + possible_net_t net; u32 id; struct sock *mroute6_sk; struct timer_list ipmr_expire_timer; @@ -175,7 +173,7 @@ static int ip6mr_rule_action(struct fib_rule *rule, struct flowi *flp, } mrt = ip6mr_get_table(rule->fr_net, rule->table); - if (mrt == NULL) + if (!mrt) return -EAGAIN; res->mrt = mrt; return 0; @@ -239,7 +237,7 @@ static int __net_init ip6mr_rules_init(struct net *net) INIT_LIST_HEAD(&net->ipv6.mr6_tables); mrt = ip6mr_new_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) { + if (!mrt) { err = -ENOMEM; goto err1; } @@ -307,11 +305,11 @@ static struct mr6_table *ip6mr_new_table(struct net *net, u32 id) unsigned int i; mrt = ip6mr_get_table(net, id); - if (mrt != NULL) + if (mrt) return mrt; mrt = kzalloc(sizeof(*mrt), GFP_KERNEL); - if (mrt == NULL) + if (!mrt) return NULL; mrt->id = id; write_pnet(&mrt->net, net); @@ -410,7 +408,7 @@ static void *ip6mr_vif_seq_start(struct seq_file *seq, loff_t *pos) struct mr6_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return ERR_PTR(-ENOENT); iter->mrt = mrt; @@ -494,7 +492,7 @@ static void *ipmr_mfc_seq_start(struct seq_file *seq, loff_t *pos) struct mr6_table *mrt; mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return ERR_PTR(-ENOENT); it->mrt = mrt; @@ -667,7 +665,7 @@ static int pim6_rcv(struct sk_buff *skb) dev_hold(reg_dev); read_unlock(&mrt_lock); - if (reg_dev == NULL) + if (!reg_dev) goto drop; skb->mac_header = skb->network_header; @@ -720,8 +718,14 @@ static netdev_tx_t reg_vif_xmit(struct sk_buff *skb, return NETDEV_TX_OK; } +static int reg_vif_get_iflink(const struct net_device *dev) +{ + return 0; +} + static const struct net_device_ops reg_vif_netdev_ops = { .ndo_start_xmit = reg_vif_xmit, + .ndo_get_iflink = reg_vif_get_iflink, }; static void reg_vif_setup(struct net_device *dev) @@ -745,7 +749,7 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) sprintf(name, "pim6reg%u", mrt->id); dev = alloc_netdev(0, name, NET_NAME_UNKNOWN, reg_vif_setup); - if (dev == NULL) + if (!dev) return NULL; dev_net_set(dev, net); @@ -754,7 +758,6 @@ static struct net_device *ip6mr_reg_vif(struct net *net, struct mr6_table *mrt) free_netdev(dev); return NULL; } - dev->iflink = 0; if (dev_open(dev)) goto failure; @@ -994,7 +997,7 @@ static int mif6_add(struct net *net, struct mr6_table *mrt, v->pkt_out = 0; v->link = dev->ifindex; if (v->flags & MIFF_REGISTER) - v->link = dev->iflink; + v->link = dev_get_iflink(dev); /* And finish update writing critical data */ write_lock_bh(&mrt_lock); @@ -1074,7 +1077,7 @@ skip: static struct mfc6_cache *ip6mr_cache_alloc(void) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_KERNEL); - if (c == NULL) + if (!c) return NULL; c->mfc_un.res.minvif = MAXMIFS; return c; @@ -1083,7 +1086,7 @@ static struct mfc6_cache *ip6mr_cache_alloc(void) static struct mfc6_cache *ip6mr_cache_alloc_unres(void) { struct mfc6_cache *c = kmem_cache_zalloc(mrt_cachep, GFP_ATOMIC); - if (c == NULL) + if (!c) return NULL; skb_queue_head_init(&c->mfc_un.unres.unresolved); c->mfc_un.unres.expires = jiffies + 10 * HZ; @@ -1200,7 +1203,7 @@ static int ip6mr_cache_report(struct mr6_table *mrt, struct sk_buff *pkt, skb->ip_summed = CHECKSUM_UNNECESSARY; } - if (mrt->mroute6_sk == NULL) { + if (!mrt->mroute6_sk) { kfree_skb(skb); return -EINVAL; } @@ -1495,7 +1498,7 @@ static int ip6mr_mfc_add(struct net *net, struct mr6_table *mrt, return -EINVAL; c = ip6mr_cache_alloc(); - if (c == NULL) + if (!c) return -ENOMEM; c->mf6c_origin = mfc->mf6cc_origin.sin6_addr; @@ -1665,7 +1668,7 @@ int ip6_mroute_setsockopt(struct sock *sk, int optname, char __user *optval, uns struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; if (optname != MRT6_INIT) { @@ -1814,7 +1817,7 @@ int ip6_mroute_getsockopt(struct sock *sk, int optname, char __user *optval, struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (optname) { @@ -1861,7 +1864,7 @@ int ip6mr_ioctl(struct sock *sk, int cmd, void __user *arg) struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (cmd) { @@ -1935,7 +1938,7 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) struct mr6_table *mrt; mrt = ip6mr_get_table(net, raw6_sk(sk)->ip6mr_table ? : RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; switch (cmd) { @@ -1983,13 +1986,13 @@ int ip6mr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg) } #endif -static inline int ip6mr_forward2_finish(struct sk_buff *skb) +static inline int ip6mr_forward2_finish(struct sock *sk, struct sk_buff *skb) { IP6_INC_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTFORWDATAGRAMS); IP6_ADD_STATS_BH(dev_net(skb_dst(skb)->dev), ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_OUTOCTETS, skb->len); - return dst_output(skb); + return dst_output_sk(sk, skb); } /* @@ -2005,7 +2008,7 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, struct dst_entry *dst; struct flowi6 fl6; - if (vif->dev == NULL) + if (!vif->dev) goto out_free; #ifdef CONFIG_IPV6_PIMSM_V2 @@ -2061,7 +2064,8 @@ static int ip6mr_forward2(struct net *net, struct mr6_table *mrt, IP6CB(skb)->flags |= IP6SKB_FORWARDED; - return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, skb, skb->dev, dev, + return NF_HOOK(NFPROTO_IPV6, NF_INET_FORWARD, NULL, skb, + skb->dev, dev, ip6mr_forward2_finish); out_free: @@ -2194,7 +2198,7 @@ int ip6_mr_input(struct sk_buff *skb) read_lock(&mrt_lock); cache = ip6mr_cache_find(mrt, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr); - if (cache == NULL) { + if (!cache) { int vif = ip6mr_find_vif(mrt, skb->dev); if (vif >= 0) @@ -2206,7 +2210,7 @@ int ip6_mr_input(struct sk_buff *skb) /* * No usable cache entry */ - if (cache == NULL) { + if (!cache) { int vif; vif = ip6mr_find_vif(mrt, skb->dev); @@ -2245,13 +2249,13 @@ static int __ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, nla_put_u32(skb, RTA_IIF, mrt->vif6_table[c->mf6c_parent].dev->ifindex) < 0) return -EMSGSIZE; mp_attr = nla_nest_start(skb, RTA_MULTIPATH); - if (mp_attr == NULL) + if (!mp_attr) return -EMSGSIZE; for (ct = c->mfc_un.res.minvif; ct < c->mfc_un.res.maxvif; ct++) { if (MIF_EXISTS(mrt, ct) && c->mfc_un.res.ttls[ct] < 255) { nhp = nla_reserve_nohdr(skb, sizeof(*nhp)); - if (nhp == NULL) { + if (!nhp) { nla_nest_cancel(skb, mp_attr); return -EMSGSIZE; } @@ -2284,7 +2288,7 @@ int ip6mr_get_route(struct net *net, struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); mrt = ip6mr_get_table(net, RT6_TABLE_DFLT); - if (mrt == NULL) + if (!mrt) return -ENOENT; read_lock(&mrt_lock); @@ -2309,7 +2313,7 @@ int ip6mr_get_route(struct net *net, } dev = skb->dev; - if (dev == NULL || (vif = ip6mr_find_vif(mrt, dev)) < 0) { + if (!dev || (vif = ip6mr_find_vif(mrt, dev)) < 0) { read_unlock(&mrt_lock); return -ENODEV; } @@ -2361,7 +2365,7 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, int err; nlh = nlmsg_put(skb, portid, seq, cmd, sizeof(*rtm), flags); - if (nlh == NULL) + if (!nlh) return -EMSGSIZE; rtm = nlmsg_data(nlh); @@ -2380,8 +2384,8 @@ static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, rtm->rtm_protocol = RTPROT_MROUTED; rtm->rtm_flags = 0; - if (nla_put(skb, RTA_SRC, 16, &c->mf6c_origin) || - nla_put(skb, RTA_DST, 16, &c->mf6c_mcastgrp)) + if (nla_put_in6_addr(skb, RTA_SRC, &c->mf6c_origin) || + nla_put_in6_addr(skb, RTA_DST, &c->mf6c_mcastgrp)) goto nla_put_failure; err = __ip6mr_fill_mroute(mrt, skb, c, rtm); /* do not break the dump if cache is unresolved */ @@ -2426,7 +2430,7 @@ static void mr6_netlink_event(struct mr6_table *mrt, struct mfc6_cache *mfc, skb = nlmsg_new(mr6_msgsize(mfc->mf6c_parent >= MAXMIFS, mrt->maxvif), GFP_ATOMIC); - if (skb == NULL) + if (!skb) goto errout; err = ip6mr_fill_mroute(mrt, skb, 0, 0, mfc, cmd, 0); diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 8d766d9100cb..63e6956917c9 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -85,7 +85,7 @@ int ip6_ra_control(struct sock *sk, int sel) return 0; } } - if (new_ra == NULL) { + if (!new_ra) { write_unlock_bh(&ip6_ra_lock); return -ENOBUFS; } @@ -117,6 +117,25 @@ struct ipv6_txoptions *ipv6_update_options(struct sock *sk, return opt; } +static bool setsockopt_needs_rtnl(int optname) +{ + switch (optname) { + case IPV6_ADD_MEMBERSHIP: + case IPV6_DROP_MEMBERSHIP: + case IPV6_JOIN_ANYCAST: + case IPV6_LEAVE_ANYCAST: + case MCAST_JOIN_GROUP: + case MCAST_LEAVE_GROUP: + case MCAST_JOIN_SOURCE_GROUP: + case MCAST_LEAVE_SOURCE_GROUP: + case MCAST_BLOCK_SOURCE: + case MCAST_UNBLOCK_SOURCE: + case MCAST_MSFILTER: + return true; + } + return false; +} + static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, char __user *optval, unsigned int optlen) { @@ -124,8 +143,9 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, struct net *net = sock_net(sk); int val, valbool; int retv = -ENOPROTOOPT; + bool needs_rtnl = setsockopt_needs_rtnl(optname); - if (optval == NULL) + if (!optval) val = 0; else { if (optlen >= sizeof(int)) { @@ -140,6 +160,8 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, if (ip6_mroute_opt(optname)) return ip6_mroute_setsockopt(sk, optname, optval, optlen); + if (needs_rtnl) + rtnl_lock(); lock_sock(sk); switch (optname) { @@ -370,7 +392,7 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname, */ if (optlen == 0) optval = NULL; - else if (optval == NULL) + else if (!optval) goto e_inval; else if (optlen < sizeof(struct ipv6_opt_hdr) || optlen & 0x7 || optlen > 8 * 255) @@ -421,7 +443,7 @@ sticky_done: if (optlen == 0) goto e_inval; - else if (optlen < sizeof(struct in6_pktinfo) || optval == NULL) + else if (optlen < sizeof(struct in6_pktinfo) || !optval) goto e_inval; if (copy_from_user(&pkt, optval, sizeof(struct in6_pktinfo))) { @@ -460,7 +482,7 @@ sticky_done: opt = sock_kmalloc(sk, sizeof(*opt) + optlen, GFP_KERNEL); retv = -ENOBUFS; - if (opt == NULL) + if (!opt) break; memset(opt, 0, sizeof(*opt)); @@ -624,10 +646,10 @@ done: psin6 = (struct sockaddr_in6 *)&greq.gr_group; if (optname == MCAST_JOIN_GROUP) retv = ipv6_sock_mc_join(sk, greq.gr_interface, - &psin6->sin6_addr); + &psin6->sin6_addr); else retv = ipv6_sock_mc_drop(sk, greq.gr_interface, - &psin6->sin6_addr); + &psin6->sin6_addr); break; } case MCAST_JOIN_SOURCE_GROUP: @@ -660,7 +682,7 @@ done: psin6 = (struct sockaddr_in6 *)&greqs.gsr_group; retv = ipv6_sock_mc_join(sk, greqs.gsr_interface, - &psin6->sin6_addr); + &psin6->sin6_addr); /* prior join w/ different source is ok */ if (retv && retv != -EADDRINUSE) break; @@ -837,11 +859,15 @@ pref_skip_coa: } release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); return retv; e_inval: release_sock(sk); + if (needs_rtnl) + rtnl_unlock(); return -EINVAL; } diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c index 5ce107c8aab3..083b2927fc67 100644 --- a/net/ipv6/mcast.c +++ b/net/ipv6/mcast.c @@ -140,6 +140,8 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) struct net *net = sock_net(sk); int err; + ASSERT_RTNL(); + if (!ipv6_addr_is_multicast(addr)) return -EINVAL; @@ -155,13 +157,12 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst = sock_kmalloc(sk, sizeof(struct ipv6_mc_socklist), GFP_KERNEL); - if (mc_lst == NULL) + if (!mc_lst) return -ENOMEM; mc_lst->next = NULL; mc_lst->addr = *addr; - rtnl_lock(); if (ifindex == 0) { struct rt6_info *rt; rt = rt6_lookup(net, addr, NULL, 0, 0); @@ -172,8 +173,7 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) } else dev = __dev_get_by_index(net, ifindex); - if (dev == NULL) { - rtnl_unlock(); + if (!dev) { sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return -ENODEV; } @@ -190,7 +190,6 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) err = ipv6_dev_mc_inc(dev, addr); if (err) { - rtnl_unlock(); sock_kfree_s(sk, mc_lst, sizeof(*mc_lst)); return err; } @@ -198,10 +197,9 @@ int ipv6_sock_mc_join(struct sock *sk, int ifindex, const struct in6_addr *addr) mc_lst->next = np->ipv6_mc_list; rcu_assign_pointer(np->ipv6_mc_list, mc_lst); - rtnl_unlock(); - return 0; } +EXPORT_SYMBOL(ipv6_sock_mc_join); /* * socket leave on multicast group @@ -213,10 +211,11 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) struct ipv6_mc_socklist __rcu **lnk; struct net *net = sock_net(sk); + ASSERT_RTNL(); + if (!ipv6_addr_is_multicast(addr)) return -EINVAL; - rtnl_lock(); for (lnk = &np->ipv6_mc_list; (mc_lst = rtnl_dereference(*lnk)) != NULL; lnk = &mc_lst->next) { @@ -227,7 +226,7 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) *lnk = mc_lst->next; dev = __dev_get_by_index(net, mc_lst->ifindex); - if (dev != NULL) { + if (dev) { struct inet6_dev *idev = __in6_dev_get(dev); (void) ip6_mc_leave_src(sk, mc_lst, idev); @@ -235,17 +234,16 @@ int ipv6_sock_mc_drop(struct sock *sk, int ifindex, const struct in6_addr *addr) __ipv6_dev_mc_dec(idev, &mc_lst->addr); } else (void) ip6_mc_leave_src(sk, mc_lst, NULL); - rtnl_unlock(); atomic_sub(sizeof(*mc_lst), &sk->sk_omem_alloc); kfree_rcu(mc_lst, rcu); return 0; } } - rtnl_unlock(); return -EADDRNOTAVAIL; } +EXPORT_SYMBOL(ipv6_sock_mc_drop); /* called with rcu_read_lock() */ static struct inet6_dev *ip6_mc_find_dev_rcu(struct net *net, @@ -438,7 +436,7 @@ done: read_unlock_bh(&idev->lock); rcu_read_unlock(); if (leavegroup) - return ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); + err = ipv6_sock_mc_drop(sk, pgsr->gsr_interface, group); return err; } @@ -825,7 +823,7 @@ static struct ifmcaddr6 *mca_alloc(struct inet6_dev *idev, struct ifmcaddr6 *mc; mc = kzalloc(sizeof(*mc), GFP_ATOMIC); - if (mc == NULL) + if (!mc) return NULL; setup_timer(&mc->mca_timer, igmp6_timer_handler, (unsigned long)mc); @@ -862,7 +860,7 @@ int ipv6_dev_mc_inc(struct net_device *dev, const struct in6_addr *addr) /* we need to take a reference on idev */ idev = in6_dev_get(dev); - if (idev == NULL) + if (!idev) return -EINVAL; write_lock_bh(&idev->lock); @@ -1330,7 +1328,7 @@ int igmp6_event_query(struct sk_buff *skb) return -EINVAL; idev = __in6_dev_get(skb->dev); - if (idev == NULL) + if (!idev) return 0; mld = (struct mld_msg *)icmp6_hdr(skb); @@ -1445,7 +1443,7 @@ int igmp6_event_report(struct sk_buff *skb) return -EINVAL; idev = __in6_dev_get(skb->dev); - if (idev == NULL) + if (!idev) return -ENODEV; /* @@ -1646,8 +1644,9 @@ static void mld_sendpack(struct sk_buff *skb) payload_len = skb->len; - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, + net->ipv6.igmp_sk, skb, NULL, skb->dev, + dst_output_sk); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, ICMPV6_MLD2_REPORT); @@ -1964,7 +1963,7 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) skb = sock_alloc_send_skb(sk, hlen + tlen + full_len, 1, &err); - if (skb == NULL) { + if (!skb) { rcu_read_lock(); IP6_INC_STATS(net, __in6_dev_get(dev), IPSTATS_MIB_OUTDISCARDS); @@ -2009,8 +2008,8 @@ static void igmp6_send(struct in6_addr *addr, struct net_device *dev, int type) } skb_dst_set(skb, dst); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, skb->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, skb->dev, dst_output_sk); out: if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); @@ -2613,7 +2612,7 @@ static struct ifmcaddr6 *igmp6_mc_get_next(struct seq_file *seq, struct ifmcaddr im = im->next; while (!im) { - if (likely(state->idev != NULL)) + if (likely(state->idev)) read_unlock_bh(&state->idev->lock); state->dev = next_net_device_rcu(state->dev); @@ -2659,7 +2658,7 @@ static void igmp6_mc_seq_stop(struct seq_file *seq, void *v) { struct igmp6_mc_iter_state *state = igmp6_mc_seq_private(seq); - if (likely(state->idev != NULL)) { + if (likely(state->idev)) { read_unlock_bh(&state->idev->lock); state->idev = NULL; } @@ -2728,10 +2727,10 @@ static inline struct ip6_sf_list *igmp6_mcf_get_first(struct seq_file *seq) continue; read_lock_bh(&idev->lock); im = idev->mc_list; - if (likely(im != NULL)) { + if (likely(im)) { spin_lock_bh(&im->mca_lock); psf = im->mca_sources; - if (likely(psf != NULL)) { + if (likely(psf)) { state->im = im; state->idev = idev; break; @@ -2752,7 +2751,7 @@ static struct ip6_sf_list *igmp6_mcf_get_next(struct seq_file *seq, struct ip6_s spin_unlock_bh(&state->im->mca_lock); state->im = state->im->next; while (!state->im) { - if (likely(state->idev != NULL)) + if (likely(state->idev)) read_unlock_bh(&state->idev->lock); state->dev = next_net_device_rcu(state->dev); @@ -2806,11 +2805,11 @@ static void igmp6_mcf_seq_stop(struct seq_file *seq, void *v) __releases(RCU) { struct igmp6_mcf_iter_state *state = igmp6_mcf_seq_private(seq); - if (likely(state->im != NULL)) { + if (likely(state->im)) { spin_unlock_bh(&state->im->mca_lock); state->im = NULL; } - if (likely(state->idev != NULL)) { + if (likely(state->idev)) { read_unlock_bh(&state->idev->lock); state->idev = NULL; } @@ -2907,20 +2906,32 @@ static int __net_init igmp6_net_init(struct net *net) inet6_sk(net->ipv6.igmp_sk)->hop_limit = 1; + err = inet_ctl_sock_create(&net->ipv6.mc_autojoin_sk, PF_INET6, + SOCK_RAW, IPPROTO_ICMPV6, net); + if (err < 0) { + pr_err("Failed to initialize the IGMP6 autojoin socket (err %d)\n", + err); + goto out_sock_create; + } + err = igmp6_proc_init(net); if (err) - goto out_sock_create; -out: - return err; + goto out_sock_create_autojoin; + + return 0; +out_sock_create_autojoin: + inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk); out_sock_create: inet_ctl_sock_destroy(net->ipv6.igmp_sk); - goto out; +out: + return err; } static void __net_exit igmp6_net_exit(struct net *net) { inet_ctl_sock_destroy(net->ipv6.igmp_sk); + inet_ctl_sock_destroy(net->ipv6.mc_autojoin_sk); igmp6_proc_exit(net); } diff --git a/net/ipv6/mcast_snoop.c b/net/ipv6/mcast_snoop.c new file mode 100644 index 000000000000..df8afe5ab31e --- /dev/null +++ b/net/ipv6/mcast_snoop.c @@ -0,0 +1,213 @@ +/* Copyright (C) 2010: YOSHIFUJI Hideaki <yoshfuji@linux-ipv6.org> + * Copyright (C) 2015: Linus Lüssing <linus.luessing@c0d3.blue> + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of version 2 of the GNU General Public + * License as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see <http://www.gnu.org/licenses/>. + * + * + * Based on the MLD support added to br_multicast.c by YOSHIFUJI Hideaki. + */ + +#include <linux/skbuff.h> +#include <net/ipv6.h> +#include <net/mld.h> +#include <net/addrconf.h> +#include <net/ip6_checksum.h> + +static int ipv6_mc_check_ip6hdr(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + unsigned int len; + unsigned int offset = skb_network_offset(skb) + sizeof(*ip6h); + + if (!pskb_may_pull(skb, offset)) + return -EINVAL; + + ip6h = ipv6_hdr(skb); + + if (ip6h->version != 6) + return -EINVAL; + + len = offset + ntohs(ip6h->payload_len); + if (skb->len < len || len <= offset) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_exthdrs(struct sk_buff *skb) +{ + const struct ipv6hdr *ip6h; + int offset; + u8 nexthdr; + __be16 frag_off; + + ip6h = ipv6_hdr(skb); + + if (ip6h->nexthdr != IPPROTO_HOPOPTS) + return -ENOMSG; + + nexthdr = ip6h->nexthdr; + offset = skb_network_offset(skb) + sizeof(*ip6h); + offset = ipv6_skip_exthdr(skb, offset, &nexthdr, &frag_off); + + if (offset < 0) + return -EINVAL; + + if (nexthdr != IPPROTO_ICMPV6) + return -ENOMSG; + + skb_set_transport_header(skb, offset); + + return 0; +} + +static int ipv6_mc_check_mld_reportv2(struct sk_buff *skb) +{ + unsigned int len = skb_transport_offset(skb); + + len += sizeof(struct mld2_report); + + return pskb_may_pull(skb, len) ? 0 : -EINVAL; +} + +static int ipv6_mc_check_mld_query(struct sk_buff *skb) +{ + struct mld_msg *mld; + unsigned int len = skb_transport_offset(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require link-local source addresses */ + if (!(ipv6_addr_type(&ipv6_hdr(skb)->saddr) & IPV6_ADDR_LINKLOCAL)) + return -EINVAL; + + len += sizeof(struct mld_msg); + if (skb->len < len) + return -EINVAL; + + /* MLDv1? */ + if (skb->len != len) { + /* or MLDv2? */ + len += sizeof(struct mld2_query) - sizeof(struct mld_msg); + if (skb->len < len || !pskb_may_pull(skb, len)) + return -EINVAL; + } + + mld = (struct mld_msg *)skb_transport_header(skb); + + /* RFC2710+RFC3810 (MLDv1+MLDv2) require the multicast link layer + * all-nodes destination address (ff02::1) for general queries + */ + if (ipv6_addr_any(&mld->mld_mca) && + !ipv6_addr_is_ll_all_nodes(&ipv6_hdr(skb)->daddr)) + return -EINVAL; + + return 0; +} + +static int ipv6_mc_check_mld_msg(struct sk_buff *skb) +{ + struct mld_msg *mld = (struct mld_msg *)skb_transport_header(skb); + + switch (mld->mld_type) { + case ICMPV6_MGM_REDUCTION: + case ICMPV6_MGM_REPORT: + /* fall through */ + return 0; + case ICMPV6_MLD2_REPORT: + return ipv6_mc_check_mld_reportv2(skb); + case ICMPV6_MGM_QUERY: + return ipv6_mc_check_mld_query(skb); + default: + return -ENOMSG; + } +} + +static inline __sum16 ipv6_mc_validate_checksum(struct sk_buff *skb) +{ + return skb_checksum_validate(skb, IPPROTO_ICMPV6, ip6_compute_pseudo); +} + +static int __ipv6_mc_check_mld(struct sk_buff *skb, + struct sk_buff **skb_trimmed) + +{ + struct sk_buff *skb_chk = NULL; + unsigned int transport_len; + unsigned int len = skb_transport_offset(skb) + sizeof(struct mld_msg); + int ret; + + transport_len = ntohs(ipv6_hdr(skb)->payload_len); + transport_len -= skb_transport_offset(skb) - sizeof(struct ipv6hdr); + + skb_get(skb); + skb_chk = skb_checksum_trimmed(skb, transport_len, + ipv6_mc_validate_checksum); + if (!skb_chk) + return -EINVAL; + + if (!pskb_may_pull(skb_chk, len)) { + kfree_skb(skb_chk); + return -EINVAL; + } + + ret = ipv6_mc_check_mld_msg(skb_chk); + if (ret) { + kfree_skb(skb_chk); + return ret; + } + + if (skb_trimmed) + *skb_trimmed = skb_chk; + else + kfree_skb(skb_chk); + + return 0; +} + +/** + * ipv6_mc_check_mld - checks whether this is a sane MLD packet + * @skb: the skb to validate + * @skb_trimmed: to store an skb pointer trimmed to IPv6 packet tail (optional) + * + * Checks whether an IPv6 packet is a valid MLD packet. If so sets + * skb network and transport headers accordingly and returns zero. + * + * -EINVAL: A broken packet was detected, i.e. it violates some internet + * standard + * -ENOMSG: IP header validation succeeded but it is not an MLD packet. + * -ENOMEM: A memory allocation failure happened. + * + * Optionally, an skb pointer might be provided via skb_trimmed (or set it + * to NULL): After parsing an MLD packet successfully it will point to + * an skb which has its tail aligned to the IP packet end. This might + * either be the originally provided skb or a trimmed, cloned version if + * the skb frame had data beyond the IP packet. A cloned skb allows us + * to leave the original skb and its full frame unchanged (which might be + * desirable for layer 2 frame jugglers). + * + * The caller needs to release a reference count from any returned skb_trimmed. + */ +int ipv6_mc_check_mld(struct sk_buff *skb, struct sk_buff **skb_trimmed) +{ + int ret; + + ret = ipv6_mc_check_ip6hdr(skb); + if (ret < 0) + return ret; + + ret = ipv6_mc_check_exthdrs(skb); + if (ret < 0) + return ret; + + return __ipv6_mc_check_mld(skb, skb_trimmed); +} +EXPORT_SYMBOL(ipv6_mc_check_mld); diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 14ecdaf06bf7..c53331cfed95 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -84,6 +84,7 @@ do { \ static u32 ndisc_hash(const void *pkey, const struct net_device *dev, __u32 *hash_rnd); +static bool ndisc_key_eq(const struct neighbour *neigh, const void *pkey); static int ndisc_constructor(struct neighbour *neigh); static void ndisc_solicit(struct neighbour *neigh, struct sk_buff *skb); static void ndisc_error_report(struct neighbour *neigh, struct sk_buff *skb); @@ -117,7 +118,9 @@ static const struct neigh_ops ndisc_direct_ops = { struct neigh_table nd_tbl = { .family = AF_INET6, .key_len = sizeof(struct in6_addr), + .protocol = cpu_to_be16(ETH_P_IPV6), .hash = ndisc_hash, + .key_eq = ndisc_key_eq, .constructor = ndisc_constructor, .pconstructor = pndisc_constructor, .pdestructor = pndisc_destructor, @@ -294,6 +297,11 @@ static u32 ndisc_hash(const void *pkey, return ndisc_hashfn(pkey, dev, hash_rnd); } +static bool ndisc_key_eq(const struct neighbour *n, const void *pkey) +{ + return neigh_key_eq128(n, pkey); +} + static int ndisc_constructor(struct neighbour *neigh) { struct in6_addr *addr = (struct in6_addr *)&neigh->primary_key; @@ -303,7 +311,7 @@ static int ndisc_constructor(struct neighbour *neigh) bool is_multicast = ipv6_addr_is_multicast(addr); in6_dev = in6_dev_get(dev); - if (in6_dev == NULL) { + if (!in6_dev) { return -EINVAL; } @@ -348,7 +356,7 @@ static int pndisc_constructor(struct pneigh_entry *n) struct in6_addr maddr; struct net_device *dev = n->dev; - if (dev == NULL || __in6_dev_get(dev) == NULL) + if (!dev || !__in6_dev_get(dev)) return -EINVAL; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_inc(dev, &maddr); @@ -361,7 +369,7 @@ static void pndisc_destructor(struct pneigh_entry *n) struct in6_addr maddr; struct net_device *dev = n->dev; - if (dev == NULL || __in6_dev_get(dev) == NULL) + if (!dev || !__in6_dev_get(dev)) return; addrconf_addr_solict_mult(addr, &maddr); ipv6_dev_mc_dec(dev, &maddr); @@ -455,8 +463,9 @@ static void ndisc_send_skb(struct sk_buff *skb, idev = __in6_dev_get(dst->dev); IP6_UPD_PO_STATS(net, idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, dst->dev, - dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, dst->dev, + dst_output_sk); if (!err) { ICMP6MSGOUT_INC_STATS(net, idev, type); ICMP6_INC_STATS(net, idev, ICMP6_MIB_OUTMSGS); @@ -552,7 +561,7 @@ void ndisc_send_ns(struct net_device *dev, struct neighbour *neigh, int optlen = 0; struct nd_msg *msg; - if (saddr == NULL) { + if (!saddr) { if (ipv6_get_lladdr(dev, &addr_buf, (IFA_F_TENTATIVE|IFA_F_OPTIMISTIC))) return; @@ -1022,13 +1031,13 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) size_t msg_size = base_size + nla_total_size(sizeof(struct in6_addr)); skb = nlmsg_new(msg_size, GFP_ATOMIC); - if (skb == NULL) { + if (!skb) { err = -ENOBUFS; goto errout; } nlh = nlmsg_put(skb, 0, 0, RTM_NEWNDUSEROPT, base_size, 0); - if (nlh == NULL) { + if (!nlh) { goto nla_put_failure; } @@ -1041,8 +1050,7 @@ static void ndisc_ra_useropt(struct sk_buff *ra, struct nd_opt_hdr *opt) memcpy(ndmsg + 1, opt, opt->nd_opt_len << 3); - if (nla_put(skb, NDUSEROPT_SRCADDR, sizeof(struct in6_addr), - &ipv6_hdr(ra)->saddr)) + if (nla_put_in6_addr(skb, NDUSEROPT_SRCADDR, &ipv6_hdr(ra)->saddr)) goto nla_put_failure; nlmsg_end(skb, nlh); @@ -1096,7 +1104,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) */ in6_dev = __in6_dev_get(skb->dev); - if (in6_dev == NULL) { + if (!in6_dev) { ND_PRINTK(0, err, "RA: can't find inet6 device for %s\n", skb->dev->name); return; @@ -1191,11 +1199,11 @@ static void ndisc_router_discovery(struct sk_buff *skb) ND_PRINTK(3, info, "RA: rt: %p lifetime: %d, for dev: %s\n", rt, lifetime, skb->dev->name); - if (rt == NULL && lifetime) { + if (!rt && lifetime) { ND_PRINTK(3, info, "RA: adding default router\n"); rt = rt6_add_dflt_router(&ipv6_hdr(skb)->saddr, skb->dev, pref); - if (rt == NULL) { + if (!rt) { ND_PRINTK(0, err, "RA: %s failed to add default route\n", __func__); @@ -1203,7 +1211,7 @@ static void ndisc_router_discovery(struct sk_buff *skb) } neigh = dst_neigh_lookup(&rt->dst, &ipv6_hdr(skb)->saddr); - if (neigh == NULL) { + if (!neigh) { ND_PRINTK(0, err, "RA: %s got default router without neighbour\n", __func__); @@ -1498,7 +1506,7 @@ void ndisc_send_redirect(struct sk_buff *skb, const struct in6_addr *target) "Redirect: destination is not a neighbour\n"); goto release; } - peer = inet_getpeer_v6(net->ipv6.peers, &rt->rt6i_dst.addr, 1); + peer = inet_getpeer_v6(net->ipv6.peers, &ipv6_hdr(skb)->saddr, 1); ret = inet_peer_xrlim_allow(peer, 1*HZ); if (peer) inet_putpeer(peer); @@ -1642,6 +1650,7 @@ int ndisc_rcv(struct sk_buff *skb) static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = netdev_notifier_info_to_dev(ptr); + struct netdev_notifier_change_info *change_info; struct net *net = dev_net(dev); struct inet6_dev *idev; @@ -1656,6 +1665,11 @@ static int ndisc_netdev_event(struct notifier_block *this, unsigned long event, ndisc_send_unsol_na(dev); in6_dev_put(idev); break; + case NETDEV_CHANGE: + change_info = ptr; + if (change_info->flags_changed & IFF_NOARP) + neigh_changeaddr(&nd_tbl, dev); + break; case NETDEV_DOWN: neigh_ifdown(&nd_tbl, dev); fib6_run_gc(0, net, false); diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index 398377a9d018..b4de08a83e0b 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -84,7 +84,7 @@ static void nf_ip6_saveroute(const struct sk_buff *skb, { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - if (entry->hook == NF_INET_LOCAL_OUT) { + if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct ipv6hdr *iph = ipv6_hdr(skb); rt_info->daddr = iph->daddr; @@ -98,7 +98,7 @@ static int nf_ip6_reroute(struct sk_buff *skb, { struct ip6_rt_info *rt_info = nf_queue_entry_reroute(entry); - if (entry->hook == NF_INET_LOCAL_OUT) { + if (entry->state.hook == NF_INET_LOCAL_OUT) { const struct ipv6hdr *iph = ipv6_hdr(skb); if (!ipv6_addr_equal(&iph->daddr, &rt_info->daddr) || !ipv6_addr_equal(&iph->saddr, &rt_info->saddr) || @@ -191,6 +191,8 @@ static __sum16 nf_ip6_checksum_partial(struct sk_buff *skb, unsigned int hook, static const struct nf_ipv6_ops ipv6ops = { .chk_addr = ipv6_chk_addr, + .route_input = ip6_route_input, + .fragment = ip6_fragment }; static const struct nf_afinfo nf_ip6_afinfo = { diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index a069822936e6..b552cf0d6198 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,14 +25,16 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +if NF_TABLES + config NF_TABLES_IPV6 - depends on NF_TABLES tristate "IPv6 nf_tables support" help This option enables the IPv6 support for nf_tables. +if NF_TABLES_IPV6 + config NFT_CHAIN_ROUTE_IPV6 - depends on NF_TABLES_IPV6 tristate "IPv6 nf_tables route chain support" help This option enables the "route" chain for IPv6 in nf_tables. This @@ -40,16 +42,18 @@ config NFT_CHAIN_ROUTE_IPV6 fields such as the source, destination, flowlabel, hop-limit and the packet mark. -config NF_REJECT_IPV6 - tristate "IPv6 packet rejection" - default m if NETFILTER_ADVANCED=n - config NFT_REJECT_IPV6 - depends on NF_TABLES_IPV6 select NF_REJECT_IPV6 default NFT_REJECT tristate +endif # NF_TABLES_IPV6 +endif # NF_TABLES + +config NF_REJECT_IPV6 + tristate "IPv6 packet rejection" + default m if NETFILTER_ADVANCED=n + config NF_LOG_IPV6 tristate "IPv6 packet logging" default m if NETFILTER_ADVANCED=n @@ -182,7 +186,8 @@ config IP6_NF_MATCH_MH config IP6_NF_MATCH_RPFILTER tristate '"rpfilter" reverse path filter match support' - depends on NETFILTER_ADVANCED && (IP6_NF_MANGLE || IP6_NF_RAW) + depends on NETFILTER_ADVANCED + depends on IP6_NF_MANGLE || IP6_NF_RAW ---help--- This option allows you to match packets whose replies would go out via the interface the packet came in. diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index bb00c6f2a885..3c35ced39b42 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -9,7 +9,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> #include <linux/capability.h> #include <linux/in.h> #include <linux/skbuff.h> @@ -234,7 +237,7 @@ static struct nf_loginfo trace_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 4, + .level = LOGLEVEL_WARNING, .logflags = NF_LOG_MASK, }, }, @@ -280,15 +283,13 @@ static void trace_packet(const struct sk_buff *skb, const struct xt_table_info *private, const struct ip6t_entry *e) { - const void *table_base; const struct ip6t_entry *root; const char *hookname, *chainname, *comment; const struct ip6t_entry *iter; unsigned int rulenum = 0; struct net *net = dev_net(in ? in : out); - table_base = private->entries[smp_processor_id()]; - root = get_entry(table_base, private->hook_entry[hook]); + root = get_entry(private->entries, private->hook_entry[hook]); hookname = chainname = hooknames[hook]; comment = comments[NF_IP6_TRACE_COMMENT_RULE]; @@ -314,8 +315,7 @@ ip6t_next_entry(const struct ip6t_entry *entry) unsigned int ip6t_do_table(struct sk_buff *skb, unsigned int hook, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct xt_table *table) { static const char nulldevname[IFNAMSIZ] __attribute__((aligned(sizeof(long)))); @@ -330,8 +330,8 @@ ip6t_do_table(struct sk_buff *skb, unsigned int addend; /* Initialization */ - indev = in ? in->name : nulldevname; - outdev = out ? out->name : nulldevname; + indev = state->in ? state->in->name : nulldevname; + outdev = state->out ? state->out->name : nulldevname; /* We handle fragments by dealing with the first fragment as * if it was a normal packet. All other fragments are treated * normally, except that they will NEVER match rules that ask @@ -339,8 +339,8 @@ ip6t_do_table(struct sk_buff *skb, * rule is also a fragment-specific rule, non-fragments won't * match it. */ acpar.hotdrop = false; - acpar.in = in; - acpar.out = out; + acpar.in = state->in; + acpar.out = state->out; acpar.family = NFPROTO_IPV6; acpar.hooknum = hook; @@ -355,7 +355,7 @@ ip6t_do_table(struct sk_buff *skb, */ smp_read_barrier_depends(); cpu = smp_processor_id(); - table_base = private->entries[cpu]; + table_base = private->entries; jumpstack = (struct ip6t_entry **)private->jumpstack[cpu]; stackptr = per_cpu_ptr(private->stackptr, cpu); origptr = *stackptr; @@ -365,6 +365,7 @@ ip6t_do_table(struct sk_buff *skb, do { const struct xt_entry_target *t; const struct xt_entry_match *ematch; + struct xt_counters *counter; IP_NF_ASSERT(e); acpar.thoff = 0; @@ -382,7 +383,8 @@ ip6t_do_table(struct sk_buff *skb, goto no_match; } - ADD_COUNTER(e->counters, skb->len, 1); + counter = xt_get_this_cpu_counter(&e->counters); + ADD_COUNTER(*counter, skb->len, 1); t = ip6t_get_target_c(e); IP_NF_ASSERT(t->u.kernel.target); @@ -390,7 +392,7 @@ ip6t_do_table(struct sk_buff *skb, #if IS_ENABLED(CONFIG_NETFILTER_XT_TARGET_TRACE) /* The packet is traced: log it */ if (unlikely(skb->nf_trace)) - trace_packet(skb, hook, in, out, + trace_packet(skb, hook, state->in, state->out, table->name, private, e); #endif /* Standard target? */ @@ -677,6 +679,10 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, if (ret) return ret; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; + j = 0; mtpar.net = net; mtpar.table = name; @@ -712,6 +718,9 @@ find_check_entry(struct ip6t_entry *e, struct net *net, const char *name, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -795,6 +804,8 @@ static void cleanup_entry(struct ip6t_entry *e, struct net *net) if (par.target->destroy != NULL) par.target->destroy(&par); module_put(par.target->me); + + xt_percpu_counter_free(e->counters.pcnt); } /* Checks and translates the user-supplied table segment (held in @@ -877,12 +888,6 @@ translate_table(struct net *net, struct xt_table_info *newinfo, void *entry0, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) { - if (newinfo->entries[i] && newinfo->entries[i] != entry0) - memcpy(newinfo->entries[i], entry0, newinfo->size); - } - return ret; } @@ -898,14 +903,16 @@ get_counters(const struct xt_table_info *t, seqcount_t *s = &per_cpu(xt_recseq, cpu); i = 0; - xt_entry_foreach(iter, t->entries[cpu], t->size) { + xt_entry_foreach(iter, t->entries, t->size) { + struct xt_counters *tmp; u64 bcnt, pcnt; unsigned int start; + tmp = xt_get_per_cpu_counter(&iter->counters, cpu); do { start = read_seqcount_begin(s); - bcnt = iter->counters.bcnt; - pcnt = iter->counters.pcnt; + bcnt = tmp->bcnt; + pcnt = tmp->pcnt; } while (read_seqcount_retry(s, start)); ADD_COUNTER(counters[i], bcnt, pcnt); @@ -950,11 +957,7 @@ copy_entries_to_user(unsigned int total_size, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; if (copy_to_user(userptr, loc_cpu_entry, total_size) != 0) { ret = -EFAULT; goto free_counters; @@ -1062,16 +1065,16 @@ static int compat_table_info(const struct xt_table_info *info, struct xt_table_info *newinfo) { struct ip6t_entry *iter; - void *loc_cpu_entry; + const void *loc_cpu_entry; int ret; if (!newinfo || !info) return -EINVAL; - /* we dont care about newinfo->entries[] */ + /* we dont care about newinfo->entries */ memcpy(newinfo, info, offsetof(struct xt_table_info, entries)); newinfo->initial_entries = 0; - loc_cpu_entry = info->entries[raw_smp_processor_id()]; + loc_cpu_entry = info->entries; xt_compat_init_offsets(AF_INET6, info->number); xt_entry_foreach(iter, loc_cpu_entry, info->size) { ret = compat_calc_entry(iter, info, loc_cpu_entry, newinfo); @@ -1192,7 +1195,6 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, struct xt_table *t; struct xt_table_info *oldinfo; struct xt_counters *counters; - const void *loc_cpu_old_entry; struct ip6t_entry *iter; ret = 0; @@ -1235,8 +1237,7 @@ __do_replace(struct net *net, const char *name, unsigned int valid_hooks, get_counters(oldinfo, counters); /* Decrease module usage counts and free resource */ - loc_cpu_old_entry = oldinfo->entries[raw_smp_processor_id()]; - xt_entry_foreach(iter, loc_cpu_old_entry, oldinfo->size) + xt_entry_foreach(iter, oldinfo->entries, oldinfo->size) cleanup_entry(iter, net); xt_free_table_info(oldinfo); @@ -1273,14 +1274,16 @@ do_replace(struct net *net, const void __user *user, unsigned int len) /* overflow check */ if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1311,7 +1314,7 @@ static int do_add_counters(struct net *net, const void __user *user, unsigned int len, int compat) { - unsigned int i, curcpu; + unsigned int i; struct xt_counters_info tmp; struct xt_counters *paddc; unsigned int num_counters; @@ -1321,7 +1324,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, struct xt_table *t; const struct xt_table_info *private; int ret = 0; - const void *loc_cpu_entry; struct ip6t_entry *iter; unsigned int addend; #ifdef CONFIG_COMPAT @@ -1369,7 +1371,6 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, goto free; } - local_bh_disable(); private = t->private; if (private->number != num_counters) { @@ -1378,16 +1379,15 @@ do_add_counters(struct net *net, const void __user *user, unsigned int len, } i = 0; - /* Choose the copy that is on our node */ - curcpu = smp_processor_id(); addend = xt_write_recseq_begin(); - loc_cpu_entry = private->entries[curcpu]; - xt_entry_foreach(iter, loc_cpu_entry, private->size) { - ADD_COUNTER(iter->counters, paddc[i].bcnt, paddc[i].pcnt); + xt_entry_foreach(iter, private->entries, private->size) { + struct xt_counters *tmp; + + tmp = xt_get_this_cpu_counter(&iter->counters); + ADD_COUNTER(*tmp, paddc[i].bcnt, paddc[i].pcnt); ++i; } xt_write_recseq_end(addend); - unlock_up_free: local_bh_enable(); xt_table_unlock(t); @@ -1454,7 +1454,6 @@ static int compat_find_calc_match(struct xt_entry_match *m, const char *name, const struct ip6t_ip6 *ipv6, - unsigned int hookmask, int *size) { struct xt_match *match; @@ -1523,8 +1522,7 @@ check_compat_entry_size_and_hooks(struct compat_ip6t_entry *e, entry_offset = (void *)e - (void *)base; j = 0; xt_ematch_foreach(ematch, e) { - ret = compat_find_calc_match(ematch, name, - &e->ipv6, e->comefrom, &off); + ret = compat_find_calc_match(ematch, name, &e->ipv6, &off); if (ret != 0) goto release_matches; ++j; @@ -1618,6 +1616,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, struct xt_mtchk_param mtpar; struct xt_entry_match *ematch; + e->counters.pcnt = xt_percpu_counter_alloc(); + if (IS_ERR_VALUE(e->counters.pcnt)) + return -ENOMEM; j = 0; mtpar.net = net; mtpar.table = name; @@ -1642,6 +1643,9 @@ static int compat_check_entry(struct ip6t_entry *e, struct net *net, break; cleanup_match(ematch, net); } + + xt_percpu_counter_free(e->counters.pcnt); + return ret; } @@ -1726,7 +1730,7 @@ translate_compat_table(struct net *net, newinfo->hook_entry[i] = info->hook_entry[i]; newinfo->underflow[i] = info->underflow[i]; } - entry1 = newinfo->entries[raw_smp_processor_id()]; + entry1 = newinfo->entries; pos = entry1; size = total_size; xt_entry_foreach(iter0, entry0, total_size) { @@ -1778,11 +1782,6 @@ translate_compat_table(struct net *net, return ret; } - /* And one copy for every other CPU */ - for_each_possible_cpu(i) - if (newinfo->entries[i] && newinfo->entries[i] != entry1) - memcpy(newinfo->entries[i], entry1, newinfo->size); - *pinfo = newinfo; *pentry0 = entry1; xt_free_table_info(info); @@ -1820,14 +1819,16 @@ compat_do_replace(struct net *net, void __user *user, unsigned int len) return -ENOMEM; if (tmp.num_counters >= INT_MAX / sizeof(struct xt_counters)) return -ENOMEM; + if (tmp.num_counters == 0) + return -EINVAL; + tmp.name[sizeof(tmp.name)-1] = 0; newinfo = xt_alloc_table_info(tmp.size); if (!newinfo) return -ENOMEM; - /* choose the copy that is on our node/cpu */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; if (copy_from_user(loc_cpu_entry, user + sizeof(tmp), tmp.size) != 0) { ret = -EFAULT; @@ -1898,7 +1899,6 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, void __user *pos; unsigned int size; int ret = 0; - const void *loc_cpu_entry; unsigned int i = 0; struct ip6t_entry *iter; @@ -1906,14 +1906,9 @@ compat_copy_entries_to_user(unsigned int total_size, struct xt_table *table, if (IS_ERR(counters)) return PTR_ERR(counters); - /* choose the copy that is on our node/cpu, ... - * This choice is lazy (because current thread is - * allowed to migrate to another cpu) - */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; pos = userptr; size = total_size; - xt_entry_foreach(iter, loc_cpu_entry, total_size) { + xt_entry_foreach(iter, private->entries, total_size) { ret = compat_copy_entry_to_user(iter, &pos, &size, counters, i++); if (ret != 0) @@ -2088,8 +2083,7 @@ struct xt_table *ip6t_register_table(struct net *net, goto out; } - /* choose the copy on our node/cpu, but dont care about preemption */ - loc_cpu_entry = newinfo->entries[raw_smp_processor_id()]; + loc_cpu_entry = newinfo->entries; memcpy(loc_cpu_entry, repl->entries, repl->size); ret = translate_table(net, newinfo, loc_cpu_entry, repl); @@ -2119,7 +2113,7 @@ void ip6t_unregister_table(struct net *net, struct xt_table *table) private = xt_unregister_table(table); /* Decrease module usage counts and free resources */ - loc_cpu_entry = private->entries[raw_smp_processor_id()]; + loc_cpu_entry = private->entries; xt_entry_foreach(iter, loc_cpu_entry, private->size) cleanup_entry(iter, net); if (private->number > private->initial_entries) diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index 544b0a9da1b5..12331efd49cf 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -83,7 +83,8 @@ static int reject_tg6_check(const struct xt_tgchk_param *par) return -EINVAL; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ - if (e->ipv6.proto != IPPROTO_TCP || + if (!(e->ipv6.flags & IP6T_F_PROTO) || + e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { pr_info("TCP_RESET illegal for non-tcp\n"); return -EINVAL; diff --git a/net/ipv6/netfilter/ip6t_SYNPROXY.c b/net/ipv6/netfilter/ip6t_SYNPROXY.c index a0d17270117c..6edb7b106de7 100644 --- a/net/ipv6/netfilter/ip6t_SYNPROXY.c +++ b/net/ipv6/netfilter/ip6t_SYNPROXY.c @@ -315,11 +315,9 @@ synproxy_tg6(struct sk_buff *skb, const struct xt_action_param *par) static unsigned int ipv6_synproxy_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *nhs) { - struct synproxy_net *snet = synproxy_pernet(dev_net(in ? : out)); + struct synproxy_net *snet = synproxy_pernet(dev_net(nhs->in ? : nhs->out)); enum ip_conntrack_info ctinfo; struct nf_conn *ct; struct nf_conn_synproxy *synproxy; diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index ca7f6c128086..5c33d8abc077 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -33,13 +33,11 @@ static const struct xt_table packet_filter = { /* The work comes in here from netfilter.c. */ static unsigned int ip6table_filter_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, in, out, - net->ipv6.ip6table_filter); + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_filter); } static struct nf_hook_ops *filter_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 307bbb782d14..b551f5b79fe2 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -32,7 +32,7 @@ static const struct xt_table packet_mangler = { }; static unsigned int -ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) +ip6t_mangle_out(struct sk_buff *skb, const struct nf_hook_state *state) { unsigned int ret; struct in6_addr saddr, daddr; @@ -57,8 +57,8 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* flowlabel and prio (includes version, which shouldn't change either */ flowlabel = *((u_int32_t *)ipv6_hdr(skb)); - ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, NULL, out, - dev_net(out)->ipv6.ip6table_mangle); + ret = ip6t_do_table(skb, NF_INET_LOCAL_OUT, state, + dev_net(state->out)->ipv6.ip6table_mangle); if (ret != NF_DROP && ret != NF_STOLEN && (!ipv6_addr_equal(&ipv6_hdr(skb)->saddr, &saddr) || @@ -77,17 +77,16 @@ ip6t_mangle_out(struct sk_buff *skb, const struct net_device *out) /* The work comes in here from netfilter.c. */ static unsigned int ip6table_mangle_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (ops->hooknum == NF_INET_LOCAL_OUT) - return ip6t_mangle_out(skb, out); + return ip6t_mangle_out(skb, state); if (ops->hooknum == NF_INET_POST_ROUTING) - return ip6t_do_table(skb, ops->hooknum, in, out, - dev_net(out)->ipv6.ip6table_mangle); + return ip6t_do_table(skb, ops->hooknum, state, + dev_net(state->out)->ipv6.ip6table_mangle); /* INPUT/FORWARD */ - return ip6t_do_table(skb, ops->hooknum, in, out, - dev_net(in)->ipv6.ip6table_mangle); + return ip6t_do_table(skb, ops->hooknum, state, + dev_net(state->in)->ipv6.ip6table_mangle); } static struct nf_hook_ops *mangle_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c index b0634ac996b7..c3a7f7af0ed4 100644 --- a/net/ipv6/netfilter/ip6table_nat.c +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -32,49 +32,40 @@ static const struct xt_table nf_nat_ipv6_table = { static unsigned int ip6table_nat_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct) { struct net *net = nf_ct_net(ct); - return ip6t_do_table(skb, ops->hooknum, in, out, net->ipv6.ip6table_nat); + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_nat); } static unsigned int ip6table_nat_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_fn(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_fn(ops, skb, state, ip6table_nat_do_chain); } static unsigned int ip6table_nat_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_in(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_in(ops, skb, state, ip6table_nat_do_chain); } static unsigned int ip6table_nat_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_out(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_out(ops, skb, state, ip6table_nat_do_chain); } static unsigned int ip6table_nat_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_local_fn(ops, skb, in, out, ip6table_nat_do_chain); + return nf_nat_ipv6_local_fn(ops, skb, state, ip6table_nat_do_chain); } static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 5274740acecc..0b33caad2b69 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -20,13 +20,11 @@ static const struct xt_table packet_raw = { /* The work comes in here from netfilter.c. */ static unsigned int ip6table_raw_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, in, out, - net->ipv6.ip6table_raw); + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_raw); } static struct nf_hook_ops *rawtable_ops __read_mostly; diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index ab3b0219ecfa..fcef83c25f7b 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -37,13 +37,11 @@ static const struct xt_table security_table = { static unsigned int ip6table_security_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - const struct net *net = dev_net((in != NULL) ? in : out); + const struct net *net = dev_net(state->in ? state->in : state->out); - return ip6t_do_table(skb, ops->hooknum, in, out, + return ip6t_do_table(skb, ops->hooknum, state, net->ipv6.ip6table_security); } diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index b68d0e59c1f8..4ba0c34c627b 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -97,9 +97,7 @@ static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, static unsigned int ipv6_helper(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_conn *ct; const struct nf_conn_help *help; @@ -135,9 +133,7 @@ static unsigned int ipv6_helper(const struct nf_hook_ops *ops, static unsigned int ipv6_confirm(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nf_conn *ct; enum ip_conntrack_info ctinfo; @@ -171,25 +167,21 @@ out: static unsigned int ipv6_conntrack_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_conntrack_in(dev_net(in), PF_INET6, ops->hooknum, skb); + return nf_conntrack_in(dev_net(state->in), PF_INET6, ops->hooknum, skb); } static unsigned int ipv6_conntrack_local(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { /* root is playing with raw sockets. */ if (skb->len < sizeof(struct ipv6hdr)) { net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return nf_conntrack_in(dev_net(out), PF_INET6, ops->hooknum, skb); + return nf_conntrack_in(dev_net(state->out), PF_INET6, ops->hooknum, skb); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { @@ -290,10 +282,8 @@ ipv6_getorigdst(struct sock *sk, int optval, void __user *user, int *len) static int ipv6_tuple_to_nlattr(struct sk_buff *skb, const struct nf_conntrack_tuple *tuple) { - if (nla_put(skb, CTA_IP_V6_SRC, sizeof(u_int32_t) * 4, - &tuple->src.u3.ip6) || - nla_put(skb, CTA_IP_V6_DST, sizeof(u_int32_t) * 4, - &tuple->dst.u3.ip6)) + if (nla_put_in6_addr(skb, CTA_IP_V6_SRC, &tuple->src.u3.in6) || + nla_put_in6_addr(skb, CTA_IP_V6_DST, &tuple->dst.u3.in6)) goto nla_put_failure; return 0; @@ -312,10 +302,8 @@ static int ipv6_nlattr_to_tuple(struct nlattr *tb[], if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST]) return -EINVAL; - memcpy(&t->src.u3.ip6, nla_data(tb[CTA_IP_V6_SRC]), - sizeof(u_int32_t) * 4); - memcpy(&t->dst.u3.ip6, nla_data(tb[CTA_IP_V6_DST]), - sizeof(u_int32_t) * 4); + t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]); + t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]); return 0; } diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 6f187c8d8a1b..6d02498172c1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -348,7 +348,7 @@ found: fq->ecn |= ecn; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -430,7 +430,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -454,7 +454,7 @@ nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - sub_frag_mem_limit(&fq->q, head->truesize); + sub_frag_mem_limit(fq->q.net, head->truesize); head->ignore_df = 1; head->next = NULL; diff --git a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c index e70382e4dfb5..a45db0b4785c 100644 --- a/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c +++ b/net/ipv6/netfilter/nf_defrag_ipv6_hooks.c @@ -54,9 +54,7 @@ static enum ip6_defrag_users nf_ct6_defrag_user(unsigned int hooknum, static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct sk_buff *reasm; @@ -77,9 +75,9 @@ static unsigned int ipv6_defrag(const struct nf_hook_ops *ops, nf_ct_frag6_consume_orig(reasm); - NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, reasm, - (struct net_device *) in, (struct net_device *) out, - okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + NF_HOOK_THRESH(NFPROTO_IPV6, ops->hooknum, state->sk, reasm, + state->in, state->out, + state->okfn, NF_IP6_PRI_CONNTRACK_DEFRAG + 1); return NF_STOLEN; } diff --git a/net/ipv6/netfilter/nf_log_ipv6.c b/net/ipv6/netfilter/nf_log_ipv6.c index ddf07e6f59d7..8dd869642f45 100644 --- a/net/ipv6/netfilter/nf_log_ipv6.c +++ b/net/ipv6/netfilter/nf_log_ipv6.c @@ -5,8 +5,10 @@ * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. */ + #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/kernel.h> #include <linux/module.h> #include <linux/spinlock.h> #include <linux/skbuff.h> @@ -27,7 +29,7 @@ static struct nf_loginfo default_loginfo = { .type = NF_LOG_TYPE_LOG, .u = { .log = { - .level = 5, + .level = LOGLEVEL_NOTICE, .logflags = NF_LOG_MASK, }, }, diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index c5812e1c1ffb..e76900e0aa92 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -263,11 +263,10 @@ EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); unsigned int nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { struct nf_conn *ct; @@ -318,7 +317,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (!nf_nat_initialized(ct, maniptype)) { unsigned int ret; - ret = do_chain(ops, skb, in, out, ct); + ret = do_chain(ops, skb, state, ct); if (ret != NF_ACCEPT) return ret; @@ -332,7 +331,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, pr_debug("Already setup manip %s for ct %p\n", maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", ct); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) goto oif_changed; } break; @@ -341,7 +340,7 @@ nf_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, /* ESTABLISHED */ NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || ctinfo == IP_CT_ESTABLISHED_REPLY); - if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, out)) + if (nf_nat_oif_changed(ops->hooknum, ctinfo, nat, state->out)) goto oif_changed; } @@ -355,17 +354,16 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv6_fn); unsigned int nf_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { unsigned int ret; struct in6_addr daddr = ipv6_hdr(skb)->daddr; - ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain); + ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) skb_dst_drop(skb); @@ -376,11 +374,10 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv6_in); unsigned int nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { #ifdef CONFIG_XFRM @@ -394,7 +391,7 @@ nf_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain); + ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); #ifdef CONFIG_XFRM if (ret != NF_DROP && ret != NF_STOLEN && !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && @@ -418,11 +415,10 @@ EXPORT_SYMBOL_GPL(nf_nat_ipv6_out); unsigned int nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, + const struct nf_hook_state *state, unsigned int (*do_chain)(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct)) { const struct nf_conn *ct; @@ -434,7 +430,7 @@ nf_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, if (skb->len < sizeof(struct ipv6hdr)) return NF_ACCEPT; - ret = nf_nat_ipv6_fn(ops, skb, in, out, do_chain); + ret = nf_nat_ipv6_fn(ops, skb, state, do_chain); if (ret != NF_DROP && ret != NF_STOLEN && (ct = nf_ct_get(skb, &ctinfo)) != NULL) { enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); diff --git a/net/ipv6/netfilter/nf_reject_ipv6.c b/net/ipv6/netfilter/nf_reject_ipv6.c index d05b36440e8b..94b4c6dfb400 100644 --- a/net/ipv6/netfilter/nf_reject_ipv6.c +++ b/net/ipv6/netfilter/nf_reject_ipv6.c @@ -13,6 +13,7 @@ #include <net/ip6_checksum.h> #include <net/netfilter/ipv6/nf_reject.h> #include <linux/netfilter_ipv6.h> +#include <linux/netfilter_bridge.h> #include <net/netfilter/ipv6/nf_reject.h> const struct tcphdr *nf_reject_ip6_tcphdr_get(struct sk_buff *oldskb, @@ -65,7 +66,7 @@ EXPORT_SYMBOL_GPL(nf_reject_ip6_tcphdr_get); struct ipv6hdr *nf_reject_ip6hdr_put(struct sk_buff *nskb, const struct sk_buff *oldskb, - __be16 protocol, int hoplimit) + __u8 protocol, int hoplimit) { struct ipv6hdr *ip6h; const struct ipv6hdr *oip6h = ipv6_hdr(oldskb); @@ -195,7 +196,8 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) */ if (oldskb->nf_bridge) { struct ethhdr *oeth = eth_hdr(oldskb); - nskb->dev = oldskb->nf_bridge->physindev; + + nskb->dev = nf_bridge_get_physindev(oldskb); nskb->protocol = htons(ETH_P_IPV6); ip6h->payload_len = htons(sizeof(struct tcphdr)); if (dev_hard_header(nskb, nskb->dev, ntohs(nskb->protocol), @@ -208,4 +210,39 @@ void nf_send_reset6(struct net *net, struct sk_buff *oldskb, int hook) } EXPORT_SYMBOL_GPL(nf_send_reset6); +static bool reject6_csum_ok(struct sk_buff *skb, int hook) +{ + const struct ipv6hdr *ip6h = ipv6_hdr(skb); + int thoff; + __be16 fo; + u8 proto; + + if (skb->csum_bad) + return false; + + if (skb_csum_unnecessary(skb)) + return true; + + proto = ip6h->nexthdr; + thoff = ipv6_skip_exthdr(skb, ((u8*)(ip6h+1) - skb->data), &proto, &fo); + + if (thoff < 0 || thoff >= skb->len || (fo & htons(~0x7)) != 0) + return false; + + return nf_ip6_checksum(skb, hook, thoff, proto) == 0; +} + +void nf_send_unreach6(struct net *net, struct sk_buff *skb_in, + unsigned char code, unsigned int hooknum) +{ + if (!reject6_csum_ok(skb_in, hooknum)) + return; + + if (hooknum == NF_INET_LOCAL_OUT && skb_in->dev == NULL) + skb_in->dev = net->loopback_dev; + + icmpv6_send(skb_in, ICMPV6_DEST_UNREACH, code, 0); +} +EXPORT_SYMBOL_GPL(nf_send_unreach6); + MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nf_tables_ipv6.c b/net/ipv6/netfilter/nf_tables_ipv6.c index 0d812b31277d..c8148ba76d1a 100644 --- a/net/ipv6/netfilter/nf_tables_ipv6.c +++ b/net/ipv6/netfilter/nf_tables_ipv6.c @@ -18,14 +18,12 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { struct nft_pktinfo pkt; /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0) return NF_DROP; return nft_do_chain(&pkt, ops); @@ -33,9 +31,7 @@ static unsigned int nft_do_chain_ipv6(const struct nf_hook_ops *ops, static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { if (unlikely(skb->len < sizeof(struct ipv6hdr))) { if (net_ratelimit()) @@ -44,7 +40,7 @@ static unsigned int nft_ipv6_output(const struct nf_hook_ops *ops, return NF_ACCEPT; } - return nft_do_chain_ipv6(ops, skb, in, out, okfn); + return nft_do_chain_ipv6(ops, skb, state); } struct nft_af_info nft_af_ipv6 __read_mostly = { diff --git a/net/ipv6/netfilter/nft_chain_nat_ipv6.c b/net/ipv6/netfilter/nft_chain_nat_ipv6.c index 1c4b75dd425b..951bb458b7bd 100644 --- a/net/ipv6/netfilter/nft_chain_nat_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_nat_ipv6.c @@ -26,51 +26,42 @@ static unsigned int nft_nat_do_chain(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, + const struct nf_hook_state *state, struct nf_conn *ct) { struct nft_pktinfo pkt; - nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out); + nft_set_pktinfo_ipv6(&pkt, ops, skb, state); return nft_do_chain(&pkt, ops); } static unsigned int nft_nat_ipv6_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_fn(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_fn(ops, skb, state, nft_nat_do_chain); } static unsigned int nft_nat_ipv6_in(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_in(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_in(ops, skb, state, nft_nat_do_chain); } static unsigned int nft_nat_ipv6_out(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_out(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_out(ops, skb, state, nft_nat_do_chain); } static unsigned int nft_nat_ipv6_local_fn(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { - return nf_nat_ipv6_local_fn(ops, skb, in, out, nft_nat_do_chain); + return nf_nat_ipv6_local_fn(ops, skb, state, nft_nat_do_chain); } static const struct nf_chain_type nft_chain_nat_ipv6 = { diff --git a/net/ipv6/netfilter/nft_chain_route_ipv6.c b/net/ipv6/netfilter/nft_chain_route_ipv6.c index 42031299585e..0dafdaac5e17 100644 --- a/net/ipv6/netfilter/nft_chain_route_ipv6.c +++ b/net/ipv6/netfilter/nft_chain_route_ipv6.c @@ -24,9 +24,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, struct sk_buff *skb, - const struct net_device *in, - const struct net_device *out, - int (*okfn)(struct sk_buff *)) + const struct nf_hook_state *state) { unsigned int ret; struct nft_pktinfo pkt; @@ -35,7 +33,7 @@ static unsigned int nf_route_table_hook(const struct nf_hook_ops *ops, u32 mark, flowlabel; /* malformed packet, drop it */ - if (nft_set_pktinfo_ipv6(&pkt, ops, skb, in, out) < 0) + if (nft_set_pktinfo_ipv6(&pkt, ops, skb, state) < 0) return NF_DROP; /* save source/dest address, mark, hoplimit, flowlabel, priority */ diff --git a/net/ipv6/netfilter/nft_masq_ipv6.c b/net/ipv6/netfilter/nft_masq_ipv6.c index 529c119cbb14..cd1ac1637a05 100644 --- a/net/ipv6/netfilter/nft_masq_ipv6.c +++ b/net/ipv6/netfilter/nft_masq_ipv6.c @@ -18,19 +18,16 @@ #include <net/netfilter/ipv6/nf_nat_masquerade.h> static void nft_masq_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_masq *priv = nft_expr_priv(expr); struct nf_nat_range range; - unsigned int verdict; memset(&range, 0, sizeof(range)); range.flags = priv->flags; - verdict = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); - - data[NFT_REG_VERDICT].verdict = verdict; + regs->verdict.code = nf_nat_masquerade_ipv6(pkt->skb, &range, pkt->out); } static struct nft_expr_type nft_masq_ipv6_type; diff --git a/net/ipv6/netfilter/nft_redir_ipv6.c b/net/ipv6/netfilter/nft_redir_ipv6.c index 11820b6b3613..effd393bd517 100644 --- a/net/ipv6/netfilter/nft_redir_ipv6.c +++ b/net/ipv6/netfilter/nft_redir_ipv6.c @@ -18,26 +18,25 @@ #include <net/netfilter/nf_nat_redirect.h> static void nft_redir_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_redir *priv = nft_expr_priv(expr); struct nf_nat_range range; - unsigned int verdict; memset(&range, 0, sizeof(range)); if (priv->sreg_proto_min) { range.min_proto.all = - *(__be16 *)&data[priv->sreg_proto_min].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_min], range.max_proto.all = - *(__be16 *)&data[priv->sreg_proto_max].data[0]; + *(__be16 *)®s->data[priv->sreg_proto_max], range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED; } range.flags |= priv->flags; - verdict = nf_nat_redirect_ipv6(pkt->skb, &range, pkt->ops->hooknum); - data[NFT_REG_VERDICT].verdict = verdict; + regs->verdict.code = nf_nat_redirect_ipv6(pkt->skb, &range, + pkt->ops->hooknum); } static struct nft_expr_type nft_redir_ipv6_type; diff --git a/net/ipv6/netfilter/nft_reject_ipv6.c b/net/ipv6/netfilter/nft_reject_ipv6.c index f73285924144..d0d1540ecf87 100644 --- a/net/ipv6/netfilter/nft_reject_ipv6.c +++ b/net/ipv6/netfilter/nft_reject_ipv6.c @@ -20,7 +20,7 @@ #include <net/netfilter/ipv6/nf_reject.h> static void nft_reject_ipv6_eval(const struct nft_expr *expr, - struct nft_data data[NFT_REG_MAX + 1], + struct nft_regs *regs, const struct nft_pktinfo *pkt) { struct nft_reject *priv = nft_expr_priv(expr); @@ -34,9 +34,11 @@ static void nft_reject_ipv6_eval(const struct nft_expr *expr, case NFT_REJECT_TCP_RST: nf_send_reset6(net, pkt->skb, pkt->ops->hooknum); break; + default: + break; } - data[NFT_REG_VERDICT].verdict = NF_DROP; + regs->verdict.code = NF_DROP; } static struct nft_expr_type nft_reject_ipv6_type; diff --git a/net/ipv6/output_core.c b/net/ipv6/output_core.c index 74581f706c4d..928a0fb0b744 100644 --- a/net/ipv6/output_core.c +++ b/net/ipv6/output_core.c @@ -8,14 +8,17 @@ #include <net/ip6_fib.h> #include <net/addrconf.h> #include <net/secure_seq.h> +#include <linux/netfilter.h> -static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, - struct in6_addr *src) +static u32 __ipv6_select_ident(struct net *net, u32 hashrnd, + const struct in6_addr *dst, + const struct in6_addr *src) { u32 hash, id; hash = __ipv6_addr_jhash(dst, hashrnd); hash = __ipv6_addr_jhash(src, hash); + hash ^= net_hash_mix(net); /* Treat id of 0 as unset and if we get 0 back from ip_idents_reserve, * set the hight order instead thus minimizing possible future @@ -36,7 +39,7 @@ static u32 __ipv6_select_ident(u32 hashrnd, struct in6_addr *dst, * * The network header must be set before calling this. */ -void ipv6_proxy_select_ident(struct sk_buff *skb) +void ipv6_proxy_select_ident(struct net *net, struct sk_buff *skb) { static u32 ip6_proxy_idents_hashrnd __read_mostly; struct in6_addr buf[2]; @@ -53,22 +56,23 @@ void ipv6_proxy_select_ident(struct sk_buff *skb) net_get_random_once(&ip6_proxy_idents_hashrnd, sizeof(ip6_proxy_idents_hashrnd)); - id = __ipv6_select_ident(ip6_proxy_idents_hashrnd, + id = __ipv6_select_ident(net, ip6_proxy_idents_hashrnd, &addrs[1], &addrs[0]); skb_shinfo(skb)->ip6_frag_id = htonl(id); } EXPORT_SYMBOL_GPL(ipv6_proxy_select_ident); -void ipv6_select_ident(struct frag_hdr *fhdr, struct rt6_info *rt) +__be32 ipv6_select_ident(struct net *net, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { static u32 ip6_idents_hashrnd __read_mostly; u32 id; net_get_random_once(&ip6_idents_hashrnd, sizeof(ip6_idents_hashrnd)); - id = __ipv6_select_ident(ip6_idents_hashrnd, &rt->rt6i_dst.addr, - &rt->rt6i_src.addr); - fhdr->identification = htonl(id); + id = __ipv6_select_ident(net, ip6_idents_hashrnd, daddr, saddr); + return htonl(id); } EXPORT_SYMBOL(ipv6_select_ident); @@ -134,7 +138,7 @@ int ip6_dst_hoplimit(struct dst_entry *dst) EXPORT_SYMBOL(ip6_dst_hoplimit); #endif -int __ip6_local_out(struct sk_buff *skb) +static int __ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) { int len; @@ -144,19 +148,30 @@ int __ip6_local_out(struct sk_buff *skb) ipv6_hdr(skb)->payload_len = htons(len); IP6CB(skb)->nhoff = offsetof(struct ipv6hdr, nexthdr); - return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - skb_dst(skb)->dev, dst_output); + return nf_hook(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, skb_dst(skb)->dev, dst_output_sk); +} + +int __ip6_local_out(struct sk_buff *skb) +{ + return __ip6_local_out_sk(skb->sk, skb); } EXPORT_SYMBOL_GPL(__ip6_local_out); -int ip6_local_out(struct sk_buff *skb) +int ip6_local_out_sk(struct sock *sk, struct sk_buff *skb) { int err; - err = __ip6_local_out(skb); + err = __ip6_local_out_sk(sk, skb); if (likely(err == 1)) - err = dst_output(skb); + err = dst_output_sk(sk, skb); return err; } +EXPORT_SYMBOL_GPL(ip6_local_out_sk); + +int ip6_local_out(struct sk_buff *skb) +{ + return ip6_local_out_sk(skb->sk, skb); +} EXPORT_SYMBOL_GPL(ip6_local_out); diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index a2dfff6ff227..263a5164a6f5 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -77,8 +77,7 @@ static int dummy_ipv6_chk_addr(struct net *net, const struct in6_addr *addr, return 0; } -int ping_v6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len) +int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct inet_sock *inet = inet_sk(sk); struct ipv6_pinfo *np = inet6_sk(sk); diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index dae7f1a1e464..ca4700cb26c4 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -32,7 +32,7 @@ #include <linux/netfilter_ipv6.h> #include <linux/skbuff.h> #include <linux/compat.h> -#include <asm/uaccess.h> +#include <linux/uaccess.h> #include <asm/ioctls.h> #include <net/net_namespace.h> @@ -172,7 +172,7 @@ static bool ipv6_raw_deliver(struct sk_buff *skb, int nexthdr) read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); - if (sk == NULL) + if (!sk) goto out; net = dev_net(skb->dev); @@ -367,7 +367,7 @@ void raw6_icmp_error(struct sk_buff *skb, int nexthdr, read_lock(&raw_v6_hashinfo.lock); sk = sk_head(&raw_v6_hashinfo.ht[hash]); - if (sk != NULL) { + if (sk) { /* Note: ipv6_hdr(skb) != skb->data */ const struct ipv6hdr *ip6h = (const struct ipv6hdr *)skb->data; saddr = &ip6h->saddr; @@ -456,9 +456,8 @@ int rawv6_rcv(struct sock *sk, struct sk_buff *skb) * we return it, otherwise we block. */ -static int rawv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, - int noblock, int flags, int *addr_len) +static int rawv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, + int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -631,7 +630,7 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, skb = sock_alloc_send_skb(sk, length + hlen + tlen + 15, flags & MSG_DONTWAIT, &err); - if (skb == NULL) + if (!skb) goto error; skb_reserve(skb, hlen); @@ -653,8 +652,8 @@ static int rawv6_send_hdrinc(struct sock *sk, struct msghdr *msg, int length, goto error_fault; IP6_UPD_PO_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUT, skb->len); - err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, skb, NULL, - rt->dst.dev, dst_output); + err = NF_HOOK(NFPROTO_IPV6, NF_INET_LOCAL_OUT, sk, skb, + NULL, rt->dst.dev, dst_output_sk); if (err > 0) err = net_xmit_errno(err); if (err) @@ -730,8 +729,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd, return ip_generic_getfrag(rfv->msg, to, offset, len, odd, skb); } -static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +static int rawv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; DECLARE_SOCKADDR(struct sockaddr_in6 *, sin6, msg->msg_name); @@ -791,7 +789,7 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } } @@ -833,13 +831,13 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, } if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; } - if (opt == NULL) + if (!opt) opt = np->opt; if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); @@ -867,6 +865,9 @@ static int rawv6_sendmsg(struct kiocb *iocb, struct sock *sk, fl6.flowi6_oif = np->ucast_oif; security_sk_classify_flow(sk, flowi6_to_flowi(&fl6)); + if (inet->hdrincl) + fl6.flowi6_flags |= FLOWI_FLAG_KNOWN_NH; + dst = ip6_dst_lookup_flow(sk, &fl6, final_p); if (IS_ERR(dst)) { err = PTR_ERR(dst); @@ -1132,7 +1133,7 @@ static int rawv6_ioctl(struct sock *sk, int cmd, unsigned long arg) spin_lock_bh(&sk->sk_receive_queue.lock); skb = skb_peek(&sk->sk_receive_queue); - if (skb != NULL) + if (skb) amount = skb_tail_pointer(skb) - skb_transport_header(skb); spin_unlock_bh(&sk->sk_receive_queue.lock); @@ -1326,13 +1327,7 @@ static struct inet_protosw rawv6_protosw = { int __init rawv6_init(void) { - int ret; - - ret = inet6_register_protosw(&rawv6_protosw); - if (ret) - goto out; -out: - return ret; + return inet6_register_protosw(&rawv6_protosw); } void rawv6_exit(void) diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index d7d70e69973b..f1159bb76e0a 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -144,7 +144,7 @@ void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMFAILS); - if (fq->q.flags & INET_FRAG_EVICTED) + if (inet_frag_evicting(&fq->q)) goto out_rcu_unlock; IP6_INC_STATS_BH(net, __in6_dev_get(dev), IPSTATS_MIB_REASMTIMEOUT); @@ -330,7 +330,7 @@ found: fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; fq->ecn |= ecn; - add_frag_mem_limit(&fq->q, skb->truesize); + add_frag_mem_limit(fq->q.net, skb->truesize); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -430,7 +430,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, int i, plen = 0; clone = alloc_skb(0, GFP_ATOMIC); - if (clone == NULL) + if (!clone) goto out_oom; clone->next = head->next; head->next = clone; @@ -443,7 +443,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, head->len -= clone->len; clone->csum = 0; clone->ip_summed = head->ip_summed; - add_frag_mem_limit(&fq->q, clone->truesize); + add_frag_mem_limit(fq->q.net, clone->truesize); } /* We have to remove fragment header from datagram and to relocate @@ -481,7 +481,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, } fp = next; } - sub_frag_mem_limit(&fq->q, sum_truesize); + sub_frag_mem_limit(fq->q.net, sum_truesize); head->next = NULL; head->dev = dev; @@ -552,7 +552,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr, ip6_frag_ecn(hdr)); - if (fq != NULL) { + if (fq) { int ret; spin_lock(&fq->q.lock); @@ -632,7 +632,7 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) table = ip6_frags_ns_ctl_table; if (!net_eq(net, &init_net)) { table = kmemdup(table, sizeof(ip6_frags_ns_ctl_table), GFP_KERNEL); - if (table == NULL) + if (!table) goto err_alloc; table[0].data = &net->ipv6.frags.high_thresh; @@ -648,7 +648,7 @@ static int __net_init ip6_frags_ns_sysctl_register(struct net *net) } hdr = register_net_sysctl(net, "net/ipv6", table); - if (hdr == NULL) + if (!hdr) goto err_reg; net->ipv6.sysctl.frags_hdr = hdr; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 4688bd4d7f59..6090969937f8 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -72,8 +72,7 @@ enum rt6_nud_state { RT6_NUD_SUCCEED = 1 }; -static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, - const struct in6_addr *dest); +static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort); static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie); static unsigned int ip6_default_advmss(const struct dst_entry *dst); static unsigned int ip6_mtu(const struct dst_entry *dst); @@ -92,6 +91,7 @@ static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb, u32 mtu); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); +static void rt6_dst_from_metrics_check(struct rt6_info *rt); static int rt6_score_route(struct rt6_info *rt, int oif, int strict); #ifdef CONFIG_IPV6_ROUTE_INFO @@ -104,65 +104,82 @@ static struct rt6_info *rt6_get_route_info(struct net *net, const struct in6_addr *gwaddr, int ifindex); #endif -static void rt6_bind_peer(struct rt6_info *rt, int create) +struct uncached_list { + spinlock_t lock; + struct list_head head; +}; + +static DEFINE_PER_CPU_ALIGNED(struct uncached_list, rt6_uncached_list); + +static void rt6_uncached_list_add(struct rt6_info *rt) { - struct inet_peer_base *base; - struct inet_peer *peer; + struct uncached_list *ul = raw_cpu_ptr(&rt6_uncached_list); - base = inetpeer_base_ptr(rt->_rt6i_peer); - if (!base) - return; + rt->dst.flags |= DST_NOCACHE; + rt->rt6i_uncached_list = ul; - peer = inet_getpeer_v6(base, &rt->rt6i_dst.addr, create); - if (peer) { - if (!rt6_set_peer(rt, peer)) - inet_putpeer(peer); + spin_lock_bh(&ul->lock); + list_add_tail(&rt->rt6i_uncached, &ul->head); + spin_unlock_bh(&ul->lock); +} + +static void rt6_uncached_list_del(struct rt6_info *rt) +{ + if (!list_empty(&rt->rt6i_uncached)) { + struct uncached_list *ul = rt->rt6i_uncached_list; + + spin_lock_bh(&ul->lock); + list_del(&rt->rt6i_uncached); + spin_unlock_bh(&ul->lock); } } -static struct inet_peer *__rt6_get_peer(struct rt6_info *rt, int create) +static void rt6_uncached_list_flush_dev(struct net *net, struct net_device *dev) { - if (rt6_has_peer(rt)) - return rt6_peer_ptr(rt); + struct net_device *loopback_dev = net->loopback_dev; + int cpu; + + for_each_possible_cpu(cpu) { + struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); + struct rt6_info *rt; + + spin_lock_bh(&ul->lock); + list_for_each_entry(rt, &ul->head, rt6i_uncached) { + struct inet6_dev *rt_idev = rt->rt6i_idev; + struct net_device *rt_dev = rt->dst.dev; - rt6_bind_peer(rt, create); - return (rt6_has_peer(rt) ? rt6_peer_ptr(rt) : NULL); + if (rt_idev && (rt_idev->dev == dev || !dev) && + rt_idev->dev != loopback_dev) { + rt->rt6i_idev = in6_dev_get(loopback_dev); + in6_dev_put(rt_idev); + } + + if (rt_dev && (rt_dev == dev || !dev) && + rt_dev != loopback_dev) { + rt->dst.dev = loopback_dev; + dev_hold(rt->dst.dev); + dev_put(rt_dev); + } + } + spin_unlock_bh(&ul->lock); + } } -static struct inet_peer *rt6_get_peer_create(struct rt6_info *rt) +static u32 *rt6_pcpu_cow_metrics(struct rt6_info *rt) { - return __rt6_get_peer(rt, 1); + return dst_metrics_write_ptr(rt->dst.from); } static u32 *ipv6_cow_metrics(struct dst_entry *dst, unsigned long old) { - struct rt6_info *rt = (struct rt6_info *) dst; - struct inet_peer *peer; - u32 *p = NULL; + struct rt6_info *rt = (struct rt6_info *)dst; - if (!(rt->dst.flags & DST_HOST)) + if (rt->rt6i_flags & RTF_PCPU) + return rt6_pcpu_cow_metrics(rt); + else if (rt->rt6i_flags & RTF_CACHE) + return NULL; + else return dst_cow_metrics_generic(dst, old); - - peer = rt6_get_peer_create(rt); - if (peer) { - u32 *old_p = __DST_METRICS_PTR(old); - unsigned long prev, new; - - p = peer->metrics; - if (inet_metrics_new(peer) || - (old & DST_METRICS_FORCE_OVERWRITE)) - memcpy(p, old_p, sizeof(u32) * RTAX_MAX); - - new = (unsigned long) p; - prev = cmpxchg(&dst->_metrics, old, new); - - if (prev != old) { - p = __DST_METRICS_PTR(prev); - if (prev & DST_METRICS_READ_ONLY) - p = NULL; - } - } - return p; } static inline const void *choose_neigh_daddr(struct rt6_info *rt, @@ -194,7 +211,6 @@ static struct neighbour *ip6_neigh_lookup(const struct dst_entry *dst, static struct dst_ops ip6_dst_ops_template = { .family = AF_INET6, - .protocol = cpu_to_be16(ETH_P_IPV6), .gc = ip6_dst_gc, .gc_thresh = 1024, .check = ip6_dst_check, @@ -236,7 +252,6 @@ static u32 *ip6_rt_blackhole_cow_metrics(struct dst_entry *dst, static struct dst_ops ip6_dst_blackhole_ops = { .family = AF_INET6, - .protocol = cpu_to_be16(ETH_P_IPV6), .destroy = ip6_dst_destroy, .check = ip6_dst_check, .mtu = ip6_blackhole_mtu, @@ -301,10 +316,10 @@ static const struct rt6_info ip6_blk_hole_entry_template = { #endif /* allocate dst with ip6_dst_ops */ -static inline struct rt6_info *ip6_dst_alloc(struct net *net, - struct net_device *dev, - int flags, - struct fib6_table *table) +static struct rt6_info *__ip6_dst_alloc(struct net *net, + struct net_device *dev, + int flags, + struct fib6_table *table) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, 0, DST_OBSOLETE_FORCE_CHK, flags); @@ -313,21 +328,51 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); - rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); INIT_LIST_HEAD(&rt->rt6i_siblings); + INIT_LIST_HEAD(&rt->rt6i_uncached); } return rt; } +static struct rt6_info *ip6_dst_alloc(struct net *net, + struct net_device *dev, + int flags, + struct fib6_table *table) +{ + struct rt6_info *rt = __ip6_dst_alloc(net, dev, flags, table); + + if (rt) { + rt->rt6i_pcpu = alloc_percpu_gfp(struct rt6_info *, GFP_ATOMIC); + if (rt->rt6i_pcpu) { + int cpu; + + for_each_possible_cpu(cpu) { + struct rt6_info **p; + + p = per_cpu_ptr(rt->rt6i_pcpu, cpu); + /* no one shares rt */ + *p = NULL; + } + } else { + dst_destroy((struct dst_entry *)rt); + return NULL; + } + } + + return rt; +} + static void ip6_dst_destroy(struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *)dst; - struct inet6_dev *idev = rt->rt6i_idev; struct dst_entry *from = dst->from; + struct inet6_dev *idev; - if (!(rt->dst.flags & DST_HOST)) - dst_destroy_metrics_generic(dst); + dst_destroy_metrics_generic(dst); + free_percpu(rt->rt6i_pcpu); + rt6_uncached_list_del(rt); + idev = rt->rt6i_idev; if (idev) { rt->rt6i_idev = NULL; in6_dev_put(idev); @@ -335,11 +380,6 @@ static void ip6_dst_destroy(struct dst_entry *dst) dst->from = NULL; dst_release(from); - - if (rt6_has_peer(rt)) { - struct inet_peer *peer = rt6_peer_ptr(rt); - inet_putpeer(peer); - } } static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, @@ -654,15 +694,33 @@ static struct rt6_info *find_rr_leaf(struct fib6_node *fn, u32 metric, int oif, int strict, bool *do_rr) { - struct rt6_info *rt, *match; + struct rt6_info *rt, *match, *cont; int mpri = -1; match = NULL; - for (rt = rr_head; rt && rt->rt6i_metric == metric; - rt = rt->dst.rt6_next) + cont = NULL; + for (rt = rr_head; rt; rt = rt->dst.rt6_next) { + if (rt->rt6i_metric != metric) { + cont = rt; + break; + } + + match = find_match(rt, oif, strict, &mpri, match, do_rr); + } + + for (rt = fn->leaf; rt && rt != rr_head; rt = rt->dst.rt6_next) { + if (rt->rt6i_metric != metric) { + cont = rt; + break; + } + match = find_match(rt, oif, strict, &mpri, match, do_rr); - for (rt = fn->leaf; rt && rt != rr_head && rt->rt6i_metric == metric; - rt = rt->dst.rt6_next) + } + + if (match || !cont) + return match; + + for (rt = cont; rt; rt = rt->dst.rt6_next) match = find_match(rt, oif, strict, &mpri, match, do_rr); return match; @@ -696,6 +754,11 @@ static struct rt6_info *rt6_select(struct fib6_node *fn, int oif, int strict) return match ? match : net->ipv6.ip6_null_entry; } +static bool rt6_is_gw_or_nonexthop(const struct rt6_info *rt) +{ + return (rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY)); +} + #ifdef CONFIG_IPV6_ROUTE_INFO int rt6_route_rcv(struct net_device *dev, u8 *opt, int len, const struct in6_addr *gwaddr) @@ -874,9 +937,9 @@ int ip6_ins_rt(struct rt6_info *rt) return __ip6_ins_rt(rt, &info, &mxc); } -static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, - const struct in6_addr *daddr, - const struct in6_addr *saddr) +static struct rt6_info *ip6_rt_cache_alloc(struct rt6_info *ort, + const struct in6_addr *daddr, + const struct in6_addr *saddr) { struct rt6_info *rt; @@ -884,15 +947,26 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, * Clone the route. */ - rt = ip6_rt_copy(ort, daddr); + if (ort->rt6i_flags & (RTF_CACHE | RTF_PCPU)) + ort = (struct rt6_info *)ort->dst.from; - if (rt) { + rt = __ip6_dst_alloc(dev_net(ort->dst.dev), ort->dst.dev, + 0, ort->rt6i_table); + + if (!rt) + return NULL; + + ip6_rt_copy_init(rt, ort); + rt->rt6i_flags |= RTF_CACHE; + rt->rt6i_metric = 0; + rt->dst.flags |= DST_HOST; + rt->rt6i_dst.addr = *daddr; + rt->rt6i_dst.plen = 128; + + if (!rt6_is_gw_or_nonexthop(ort)) { if (ort->rt6i_dst.plen != 128 && ipv6_addr_equal(&ort->rt6i_dst.addr, daddr)) rt->rt6i_flags |= RTF_ANYCAST; - - rt->rt6i_flags |= RTF_CACHE; - #ifdef CONFIG_IPV6_SUBTREES if (rt->rt6i_src.plen && saddr) { rt->rt6i_src.addr = *saddr; @@ -904,30 +978,65 @@ static struct rt6_info *rt6_alloc_cow(struct rt6_info *ort, return rt; } -static struct rt6_info *rt6_alloc_clone(struct rt6_info *ort, - const struct in6_addr *daddr) +static struct rt6_info *ip6_rt_pcpu_alloc(struct rt6_info *rt) { - struct rt6_info *rt = ip6_rt_copy(ort, daddr); + struct rt6_info *pcpu_rt; - if (rt) - rt->rt6i_flags |= RTF_CACHE; - return rt; + pcpu_rt = __ip6_dst_alloc(dev_net(rt->dst.dev), + rt->dst.dev, rt->dst.flags, + rt->rt6i_table); + + if (!pcpu_rt) + return NULL; + ip6_rt_copy_init(pcpu_rt, rt); + pcpu_rt->rt6i_protocol = rt->rt6i_protocol; + pcpu_rt->rt6i_flags |= RTF_PCPU; + return pcpu_rt; +} + +/* It should be called with read_lock_bh(&tb6_lock) acquired */ +static struct rt6_info *rt6_get_pcpu_route(struct rt6_info *rt) +{ + struct rt6_info *pcpu_rt, *prev, **p; + + p = this_cpu_ptr(rt->rt6i_pcpu); + pcpu_rt = *p; + + if (pcpu_rt) + goto done; + + pcpu_rt = ip6_rt_pcpu_alloc(rt); + if (!pcpu_rt) { + struct net *net = dev_net(rt->dst.dev); + + pcpu_rt = net->ipv6.ip6_null_entry; + goto done; + } + + prev = cmpxchg(p, NULL, pcpu_rt); + if (prev) { + /* If someone did it before us, return prev instead */ + dst_destroy(&pcpu_rt->dst); + pcpu_rt = prev; + } + +done: + dst_hold(&pcpu_rt->dst); + rt6_dst_from_metrics_check(pcpu_rt); + return pcpu_rt; } static struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int oif, struct flowi6 *fl6, int flags) { struct fib6_node *fn, *saved_fn; - struct rt6_info *rt, *nrt; + struct rt6_info *rt; int strict = 0; - int attempts = 3; - int err; strict |= flags & RT6_LOOKUP_F_IFACE; if (net->ipv6.devconf_all->forwarding == 0) strict |= RT6_LOOKUP_F_REACHABLE; -redo_fib6_lookup_lock: read_lock_bh(&table->tb6_lock); fn = fib6_lookup(&table->tb6_root, &fl6->daddr, &fl6->saddr); @@ -946,51 +1055,52 @@ redo_rt6_select: strict &= ~RT6_LOOKUP_F_REACHABLE; fn = saved_fn; goto redo_rt6_select; - } else { - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - goto out2; } } - dst_hold(&rt->dst); - read_unlock_bh(&table->tb6_lock); - if (rt->rt6i_flags & RTF_CACHE) - goto out2; + if (rt == net->ipv6.ip6_null_entry || (rt->rt6i_flags & RTF_CACHE)) { + dst_use(&rt->dst, jiffies); + read_unlock_bh(&table->tb6_lock); - if (!(rt->rt6i_flags & (RTF_NONEXTHOP | RTF_GATEWAY))) - nrt = rt6_alloc_cow(rt, &fl6->daddr, &fl6->saddr); - else if (!(rt->dst.flags & DST_HOST)) - nrt = rt6_alloc_clone(rt, &fl6->daddr); - else - goto out2; + rt6_dst_from_metrics_check(rt); + return rt; + } else if (unlikely((fl6->flowi6_flags & FLOWI_FLAG_KNOWN_NH) && + !(rt->rt6i_flags & RTF_GATEWAY))) { + /* Create a RTF_CACHE clone which will not be + * owned by the fib6 tree. It is for the special case where + * the daddr in the skb during the neighbor look-up is different + * from the fl6->daddr used to look-up route here. + */ - ip6_rt_put(rt); - rt = nrt ? : net->ipv6.ip6_null_entry; + struct rt6_info *uncached_rt; - dst_hold(&rt->dst); - if (nrt) { - err = ip6_ins_rt(nrt); - if (!err) - goto out2; - } + dst_use(&rt->dst, jiffies); + read_unlock_bh(&table->tb6_lock); - if (--attempts <= 0) - goto out2; + uncached_rt = ip6_rt_cache_alloc(rt, &fl6->daddr, NULL); + dst_release(&rt->dst); - /* - * Race condition! In the gap, when table->tb6_lock was - * released someone could insert this route. Relookup. - */ - ip6_rt_put(rt); - goto redo_fib6_lookup_lock; + if (uncached_rt) + rt6_uncached_list_add(uncached_rt); + else + uncached_rt = net->ipv6.ip6_null_entry; -out2: - rt->dst.lastuse = jiffies; - rt->dst.__use++; + dst_hold(&uncached_rt->dst); + return uncached_rt; - return rt; + } else { + /* Get a percpu copy */ + + struct rt6_info *pcpu_rt; + + rt->dst.lastuse = jiffies; + rt->dst.__use++; + pcpu_rt = rt6_get_pcpu_route(rt); + read_unlock_bh(&table->tb6_lock); + + return pcpu_rt; + } } static struct rt6_info *ip6_pol_route_input(struct net *net, struct fib6_table *table, @@ -1061,7 +1171,6 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori new = &rt->dst; memset(new + 1, 0, sizeof(*rt) - sizeof(*new)); - rt6_init_peer(rt, net->ipv6.peers); new->__use = 1; new->input = dst_discard; @@ -1095,6 +1204,33 @@ struct dst_entry *ip6_blackhole_route(struct net *net, struct dst_entry *dst_ori * Destination cache support functions */ +static void rt6_dst_from_metrics_check(struct rt6_info *rt) +{ + if (rt->dst.from && + dst_metrics_ptr(&rt->dst) != dst_metrics_ptr(rt->dst.from)) + dst_init_metrics(&rt->dst, dst_metrics_ptr(rt->dst.from), true); +} + +static struct dst_entry *rt6_check(struct rt6_info *rt, u32 cookie) +{ + if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) + return NULL; + + if (rt6_check_expired(rt)) + return NULL; + + return &rt->dst; +} + +static struct dst_entry *rt6_dst_from_check(struct rt6_info *rt, u32 cookie) +{ + if (rt->dst.obsolete == DST_OBSOLETE_FORCE_CHK && + rt6_check((struct rt6_info *)(rt->dst.from), cookie)) + return &rt->dst; + else + return NULL; +} + static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) { struct rt6_info *rt; @@ -1105,13 +1241,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) * DST_OBSOLETE_FORCE_CHK which forces validation calls down * into this function always. */ - if (!rt->rt6i_node || (rt->rt6i_node->fn_sernum != cookie)) - return NULL; - if (rt6_check_expired(rt)) - return NULL; + rt6_dst_from_metrics_check(rt); - return dst; + if ((rt->rt6i_flags & RTF_PCPU) || unlikely(dst->flags & DST_NOCACHE)) + return rt6_dst_from_check(rt, cookie); + else + return rt6_check(rt, cookie); } static struct dst_entry *ip6_negative_advice(struct dst_entry *dst) @@ -1150,24 +1286,63 @@ static void ip6_link_failure(struct sk_buff *skb) } } -static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) +static void rt6_do_update_pmtu(struct rt6_info *rt, u32 mtu) +{ + struct net *net = dev_net(rt->dst.dev); + + rt->rt6i_flags |= RTF_MODIFIED; + rt->rt6i_pmtu = mtu; + rt6_update_expires(rt, net->ipv6.sysctl.ip6_rt_mtu_expires); +} + +static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, + const struct ipv6hdr *iph, u32 mtu) { struct rt6_info *rt6 = (struct rt6_info *)dst; - dst_confirm(dst); - if (mtu < dst_mtu(dst) && rt6->rt6i_dst.plen == 128) { - struct net *net = dev_net(dst->dev); + if (rt6->rt6i_flags & RTF_LOCAL) + return; - rt6->rt6i_flags |= RTF_MODIFIED; - if (mtu < IPV6_MIN_MTU) - mtu = IPV6_MIN_MTU; + dst_confirm(dst); + mtu = max_t(u32, mtu, IPV6_MIN_MTU); + if (mtu >= dst_mtu(dst)) + return; - dst_metric_set(dst, RTAX_MTU, mtu); - rt6_update_expires(rt6, net->ipv6.sysctl.ip6_rt_mtu_expires); + if (rt6->rt6i_flags & RTF_CACHE) { + rt6_do_update_pmtu(rt6, mtu); + } else { + const struct in6_addr *daddr, *saddr; + struct rt6_info *nrt6; + + if (iph) { + daddr = &iph->daddr; + saddr = &iph->saddr; + } else if (sk) { + daddr = &sk->sk_v6_daddr; + saddr = &inet6_sk(sk)->saddr; + } else { + return; + } + nrt6 = ip6_rt_cache_alloc(rt6, daddr, saddr); + if (nrt6) { + rt6_do_update_pmtu(nrt6, mtu); + + /* ip6_ins_rt(nrt6) will bump the + * rt6->rt6i_node->fn_sernum + * which will fail the next rt6_check() and + * invalidate the sk->sk_dst_cache. + */ + ip6_ins_rt(nrt6); + } } } +static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, + struct sk_buff *skb, u32 mtu) +{ + __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); +} + void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, int oif, u32 mark) { @@ -1184,7 +1359,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - ip6_rt_update_pmtu(dst, NULL, skb, ntohl(mtu)); + __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); @@ -1343,12 +1518,17 @@ static unsigned int ip6_default_advmss(const struct dst_entry *dst) static unsigned int ip6_mtu(const struct dst_entry *dst) { + const struct rt6_info *rt = (const struct rt6_info *)dst; + unsigned int mtu = rt->rt6i_pmtu; struct inet6_dev *idev; - unsigned int mtu = dst_metric_raw(dst, RTAX_MTU); if (mtu) goto out; + mtu = dst_metric_raw(dst, RTAX_MTU); + if (mtu) + goto out; + mtu = IPV6_MIN_MTU; rcu_read_lock(); @@ -1478,7 +1658,7 @@ static int ip6_convert_metrics(struct mx6_config *mxc, int remaining; u32 *mp; - if (cfg->fc_mx == NULL) + if (!cfg->fc_mx) return 0; mp = kzalloc(sizeof(u32) * RTAX_MAX, GFP_KERNEL); @@ -1592,10 +1772,8 @@ int ip6_route_add(struct fib6_config *cfg) ipv6_addr_prefix(&rt->rt6i_dst.addr, &cfg->fc_dst, cfg->fc_dst_len); rt->rt6i_dst.plen = cfg->fc_dst_len; - if (rt->rt6i_dst.plen == 128) { + if (rt->rt6i_dst.plen == 128) rt->dst.flags |= DST_HOST; - dst_metrics_set_force_overwrite(&rt->dst); - } #ifdef CONFIG_IPV6_SUBTREES ipv6_addr_prefix(&rt->rt6i_src.addr, &cfg->fc_src, cfg->fc_src_len); @@ -1653,6 +1831,16 @@ int ip6_route_add(struct fib6_config *cfg) int gwa_type; gw_addr = &cfg->fc_gateway; + + /* if gw_addr is local we will fail to detect this in case + * address is still TENTATIVE (DAD in progress). rt6_lookup() + * will return already-added prefix route via interface that + * prefix route was assigned to, which might be non-loopback. + */ + err = -EINVAL; + if (ipv6_chk_addr_and_flags(net, gw_addr, NULL, 0, 0)) + goto out; + rt->rt6i_gateway = *gw_addr; gwa_type = ipv6_addr_type(gw_addr); @@ -1666,7 +1854,6 @@ int ip6_route_add(struct fib6_config *cfg) (SIT, PtP, NBMA NOARP links) it is handy to allow some exceptions. --ANK */ - err = -EINVAL; if (!(gwa_type & IPV6_ADDR_UNICAST)) goto out; @@ -1787,6 +1974,9 @@ static int ip6_route_del(struct fib6_config *cfg) if (fn) { for (rt = fn->leaf; rt; rt = rt->dst.rt6_next) { + if ((rt->rt6i_flags & RTF_CACHE) && + !(cfg->fc_flags & RTF_CACHE)) + continue; if (cfg->fc_ifindex && (!rt->dst.dev || rt->dst.dev->ifindex != cfg->fc_ifindex)) @@ -1896,7 +2086,7 @@ static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_bu NEIGH_UPDATE_F_ISROUTER)) ); - nrt = ip6_rt_copy(rt, &msg->dest); + nrt = ip6_rt_cache_alloc(rt, &msg->dest, NULL); if (!nrt) goto out; @@ -1928,42 +2118,35 @@ out: * Misc support functions */ -static struct rt6_info *ip6_rt_copy(struct rt6_info *ort, - const struct in6_addr *dest) +static void rt6_set_from(struct rt6_info *rt, struct rt6_info *from) { - struct net *net = dev_net(ort->dst.dev); - struct rt6_info *rt = ip6_dst_alloc(net, ort->dst.dev, 0, - ort->rt6i_table); - - if (rt) { - rt->dst.input = ort->dst.input; - rt->dst.output = ort->dst.output; - rt->dst.flags |= DST_HOST; + BUG_ON(from->dst.from); - rt->rt6i_dst.addr = *dest; - rt->rt6i_dst.plen = 128; - dst_copy_metrics(&rt->dst, &ort->dst); - rt->dst.error = ort->dst.error; - rt->rt6i_idev = ort->rt6i_idev; - if (rt->rt6i_idev) - in6_dev_hold(rt->rt6i_idev); - rt->dst.lastuse = jiffies; - - if (ort->rt6i_flags & RTF_GATEWAY) - rt->rt6i_gateway = ort->rt6i_gateway; - else - rt->rt6i_gateway = *dest; - rt->rt6i_flags = ort->rt6i_flags; - rt6_set_from(rt, ort); - rt->rt6i_metric = 0; + rt->rt6i_flags &= ~RTF_EXPIRES; + dst_hold(&from->dst); + rt->dst.from = &from->dst; + dst_init_metrics(&rt->dst, dst_metrics_ptr(&from->dst), true); +} +static void ip6_rt_copy_init(struct rt6_info *rt, struct rt6_info *ort) +{ + rt->dst.input = ort->dst.input; + rt->dst.output = ort->dst.output; + rt->rt6i_dst = ort->rt6i_dst; + rt->dst.error = ort->dst.error; + rt->rt6i_idev = ort->rt6i_idev; + if (rt->rt6i_idev) + in6_dev_hold(rt->rt6i_idev); + rt->dst.lastuse = jiffies; + rt->rt6i_gateway = ort->rt6i_gateway; + rt->rt6i_flags = ort->rt6i_flags; + rt6_set_from(rt, ort); + rt->rt6i_metric = ort->rt6i_metric; #ifdef CONFIG_IPV6_SUBTREES - memcpy(&rt->rt6i_src, &ort->rt6i_src, sizeof(struct rt6key)); + rt->rt6i_src = ort->rt6i_src; #endif - memcpy(&rt->rt6i_prefsrc, &ort->rt6i_prefsrc, sizeof(struct rt6key)); - rt->rt6i_table = ort->rt6i_table; - } - return rt; + rt->rt6i_prefsrc = ort->rt6i_prefsrc; + rt->rt6i_table = ort->rt6i_table; } #ifdef CONFIG_IPV6_ROUTE_INFO @@ -2247,9 +2430,10 @@ int ip6_route_get_saddr(struct net *net, unsigned int prefs, struct in6_addr *saddr) { - struct inet6_dev *idev = ip6_dst_idev((struct dst_entry *)rt); + struct inet6_dev *idev = + rt ? ip6_dst_idev((struct dst_entry *)rt) : NULL; int err = 0; - if (rt->rt6i_prefsrc.plen) + if (rt && rt->rt6i_prefsrc.plen) *saddr = rt->rt6i_prefsrc.addr; else err = ipv6_dev_get_saddr(net, idev ? idev->dev : NULL, @@ -2337,6 +2521,7 @@ void rt6_ifdown(struct net *net, struct net_device *dev) fib6_clean_all(net, fib6_ifdown, &adn); icmp6_clean_all(fib6_ifdown, &adn); + rt6_uncached_list_flush_dev(net, dev); } struct rt6_mtu_change_arg { @@ -2374,11 +2559,20 @@ static int rt6_mtu_change_route(struct rt6_info *rt, void *p_arg) PMTU discouvery. */ if (rt->dst.dev == arg->dev && - !dst_metric_locked(&rt->dst, RTAX_MTU) && - (dst_mtu(&rt->dst) >= arg->mtu || - (dst_mtu(&rt->dst) < arg->mtu && - dst_mtu(&rt->dst) == idev->cnf.mtu6))) { - dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + !dst_metric_locked(&rt->dst, RTAX_MTU)) { + if (rt->rt6i_flags & RTF_CACHE) { + /* For RTF_CACHE with rt6i_pmtu == 0 + * (i.e. a redirected route), + * the metrics of its rt->dst.from has already + * been updated. + */ + if (rt->rt6i_pmtu && rt->rt6i_pmtu > arg->mtu) + rt->rt6i_pmtu = arg->mtu; + } else if (dst_mtu(&rt->dst) >= arg->mtu || + (dst_mtu(&rt->dst) < arg->mtu && + dst_mtu(&rt->dst) == idev->cnf.mtu6)) { + dst_metric_set(&rt->dst, RTAX_MTU, arg->mtu); + } } return 0; } @@ -2400,6 +2594,7 @@ static const struct nla_policy rtm_ipv6_policy[RTA_MAX+1] = { [RTA_PRIORITY] = { .type = NLA_U32 }, [RTA_METRICS] = { .type = NLA_NESTED }, [RTA_MULTIPATH] = { .len = sizeof(struct rtnexthop) }, + [RTA_PREF] = { .type = NLA_U8 }, }; static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -2407,6 +2602,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, { struct rtmsg *rtm; struct nlattr *tb[RTA_MAX+1]; + unsigned int pref; int err; err = nlmsg_parse(nlh, sizeof(*rtm), tb, RTA_MAX, rtm_ipv6_policy); @@ -2433,12 +2629,15 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_LOCAL) cfg->fc_flags |= RTF_LOCAL; + if (rtm->rtm_flags & RTM_F_CLONED) + cfg->fc_flags |= RTF_CACHE; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); if (tb[RTA_GATEWAY]) { - nla_memcpy(&cfg->fc_gateway, tb[RTA_GATEWAY], 16); + cfg->fc_gateway = nla_get_in6_addr(tb[RTA_GATEWAY]); cfg->fc_flags |= RTF_GATEWAY; } @@ -2461,7 +2660,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, } if (tb[RTA_PREFSRC]) - nla_memcpy(&cfg->fc_prefsrc, tb[RTA_PREFSRC], 16); + cfg->fc_prefsrc = nla_get_in6_addr(tb[RTA_PREFSRC]); if (tb[RTA_OIF]) cfg->fc_ifindex = nla_get_u32(tb[RTA_OIF]); @@ -2482,6 +2681,14 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_mp_len = nla_len(tb[RTA_MULTIPATH]); } + if (tb[RTA_PREF]) { + pref = nla_get_u8(tb[RTA_PREF]); + if (pref != ICMPV6_ROUTER_PREF_LOW && + pref != ICMPV6_ROUTER_PREF_HIGH) + pref = ICMPV6_ROUTER_PREF_MEDIUM; + cfg->fc_flags |= RTF_PREF(pref); + } + err = 0; errout: return err; @@ -2495,9 +2702,9 @@ static int ip6_route_multipath(struct fib6_config *cfg, int add) int attrlen; int err = 0, last_err = 0; + remaining = cfg->fc_mp_len; beginning: rtnh = (struct rtnexthop *)cfg->fc_mp; - remaining = cfg->fc_mp_len; /* Parse a Multipath Entry */ while (rtnh_ok(rtnh, remaining)) { @@ -2511,7 +2718,7 @@ beginning: nla = nla_find(attrs, attrlen, RTA_GATEWAY); if (nla) { - nla_memcpy(&r_cfg.fc_gateway, nla, 16); + r_cfg.fc_gateway = nla_get_in6_addr(nla); r_cfg.fc_flags |= RTF_GATEWAY; } } @@ -2527,15 +2734,19 @@ beginning: * next hops that have been already added. */ add = 0; + remaining = cfg->fc_mp_len - remaining; goto beginning; } } /* Because each route is added like a single route we remove - * this flag after the first nexthop (if there is a collision, - * we have already fail to add the first nexthop: - * fib6_add_rt2node() has reject it). + * these flags after the first nexthop: if there is a collision, + * we have already failed to add the first nexthop: + * fib6_add_rt2node() has rejected it; when replacing, old + * nexthops have been replaced by first new, the rest should + * be added to it. */ - cfg->fc_nlinfo.nlh->nlmsg_flags &= ~NLM_F_EXCL; + cfg->fc_nlinfo.nlh->nlmsg_flags &= ~(NLM_F_EXCL | + NLM_F_REPLACE); rtnh = rtnh_next(rtnh, &remaining); } @@ -2585,7 +2796,8 @@ static inline size_t rt6_nlmsg_size(void) + nla_total_size(4) /* RTA_PRIORITY */ + RTAX_MAX * nla_total_size(4) /* RTA_METRICS */ + nla_total_size(sizeof(struct rta_cacheinfo)) - + nla_total_size(TCP_CA_NAME_MAX); /* RTAX_CC_ALGO */ + + nla_total_size(TCP_CA_NAME_MAX) /* RTAX_CC_ALGO */ + + nla_total_size(1); /* RTA_PREF */ } static int rt6_fill_node(struct net *net, @@ -2594,6 +2806,7 @@ static int rt6_fill_node(struct net *net, int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { + u32 metrics[RTAX_MAX]; struct rtmsg *rtm; struct nlmsghdr *nlh; long expires; @@ -2660,19 +2873,19 @@ static int rt6_fill_node(struct net *net, rtm->rtm_flags |= RTM_F_CLONED; if (dst) { - if (nla_put(skb, RTA_DST, 16, dst)) + if (nla_put_in6_addr(skb, RTA_DST, dst)) goto nla_put_failure; rtm->rtm_dst_len = 128; } else if (rtm->rtm_dst_len) - if (nla_put(skb, RTA_DST, 16, &rt->rt6i_dst.addr)) + if (nla_put_in6_addr(skb, RTA_DST, &rt->rt6i_dst.addr)) goto nla_put_failure; #ifdef CONFIG_IPV6_SUBTREES if (src) { - if (nla_put(skb, RTA_SRC, 16, src)) + if (nla_put_in6_addr(skb, RTA_SRC, src)) goto nla_put_failure; rtm->rtm_src_len = 128; } else if (rtm->rtm_src_len && - nla_put(skb, RTA_SRC, 16, &rt->rt6i_src.addr)) + nla_put_in6_addr(skb, RTA_SRC, &rt->rt6i_src.addr)) goto nla_put_failure; #endif if (iif) { @@ -2696,22 +2909,25 @@ static int rt6_fill_node(struct net *net, } else if (dst) { struct in6_addr saddr_buf; if (ip6_route_get_saddr(net, rt, dst, 0, &saddr_buf) == 0 && - nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) + nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } if (rt->rt6i_prefsrc.plen) { struct in6_addr saddr_buf; saddr_buf = rt->rt6i_prefsrc.addr; - if (nla_put(skb, RTA_PREFSRC, 16, &saddr_buf)) + if (nla_put_in6_addr(skb, RTA_PREFSRC, &saddr_buf)) goto nla_put_failure; } - if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) + memcpy(metrics, dst_metrics_ptr(&rt->dst), sizeof(metrics)); + if (rt->rt6i_pmtu) + metrics[RTAX_MTU - 1] = rt->rt6i_pmtu; + if (rtnetlink_put_metrics(skb, metrics) < 0) goto nla_put_failure; if (rt->rt6i_flags & RTF_GATEWAY) { - if (nla_put(skb, RTA_GATEWAY, 16, &rt->rt6i_gateway) < 0) + if (nla_put_in6_addr(skb, RTA_GATEWAY, &rt->rt6i_gateway) < 0) goto nla_put_failure; } @@ -2726,6 +2942,9 @@ static int rt6_fill_node(struct net *net, if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; + if (nla_put_u8(skb, RTA_PREF, IPV6_EXTRACT_PREF(rt->rt6i_flags))) + goto nla_put_failure; + nlmsg_end(skb, nlh); return 0; @@ -3199,6 +3418,7 @@ static struct notifier_block ip6_route_dev_notifier = { int __init ip6_route_init(void) { int ret; + int cpu; ret = -ENOMEM; ip6_dst_ops_template.kmem_cachep = @@ -3258,6 +3478,13 @@ int __init ip6_route_init(void) if (ret) goto out_register_late_subsys; + for_each_possible_cpu(cpu) { + struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu); + + INIT_LIST_HEAD(&ul->head); + spin_lock_init(&ul->lock); + } + out: return ret; diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index e4cbd5798eba..ac35a28599be 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -118,7 +118,7 @@ static struct ip_tunnel *ipip6_tunnel_lookup(struct net *net, return t; } t = rcu_dereference(sitn->tunnels_wc[0]); - if ((t != NULL) && (t->dev->flags & IFF_UP)) + if (t && (t->dev->flags & IFF_UP)) return t; return NULL; } @@ -251,7 +251,7 @@ static struct ip_tunnel *ipip6_tunnel_locate(struct net *net, dev = alloc_netdev(sizeof(*t), name, NET_NAME_UNKNOWN, ipip6_tunnel_setup); - if (dev == NULL) + if (!dev) return NULL; dev_net_set(dev, net); @@ -555,7 +555,7 @@ static int ipip6_err(struct sk_buff *skb, u32 info) skb->dev, iph->daddr, iph->saddr); - if (t == NULL) + if (!t) goto out; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { @@ -671,7 +671,7 @@ static int ipip6_rcv(struct sk_buff *skb) tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); - if (tunnel != NULL) { + if (tunnel) { struct pcpu_sw_netstats *tstats; if (tunnel->parms.iph.protocol != IPPROTO_IPV6 && @@ -733,7 +733,7 @@ static int ipip_rcv(struct sk_buff *skb) iph = ip_hdr(skb); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); - if (tunnel != NULL) { + if (tunnel) { if (tunnel->parms.iph.protocol != IPPROTO_IPIP && tunnel->parms.iph.protocol != 0) goto drop; @@ -838,7 +838,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (skb_dst(skb)) neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); - if (neigh == NULL) { + if (!neigh) { net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -867,7 +867,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, if (skb_dst(skb)) neigh = dst_neigh_lookup(skb_dst(skb), &iph6->daddr); - if (neigh == NULL) { + if (!neigh) { net_dbg_ratelimited("nexthop == NULL\n"); goto tx_error; } @@ -983,7 +983,7 @@ static netdev_tx_t ipip6_tunnel_xmit(struct sk_buff *skb, skb_set_inner_ipproto(skb, IPPROTO_IPV6); - err = iptunnel_xmit(skb->sk, rt, skb, fl4.saddr, fl4.daddr, + err = iptunnel_xmit(NULL, rt, skb, fl4.saddr, fl4.daddr, protocol, tos, ttl, df, !net_eq(tunnel->net, dev_net(dev))); iptunnel_xmit_stats(err, &dev->stats, dev->tstats); @@ -1076,7 +1076,6 @@ static void ipip6_tunnel_bind_dev(struct net_device *dev) if (dev->mtu < IPV6_MIN_MTU) dev->mtu = IPV6_MIN_MTU; } - dev->iflink = tunnel->parms.link; } static void ipip6_tunnel_update(struct ip_tunnel *t, struct ip_tunnel_parm *p) @@ -1158,7 +1157,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) break; } t = ipip6_tunnel_locate(net, &p, 0); - if (t == NULL) + if (!t) t = netdev_priv(dev); } @@ -1206,7 +1205,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL); if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { - if (t != NULL) { + if (t) { if (t->dev != dev) { err = -EEXIST; break; @@ -1242,7 +1241,7 @@ ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) goto done; err = -ENOENT; t = ipip6_tunnel_locate(net, &p, 0); - if (t == NULL) + if (!t) goto done; err = -EPERM; if (t == netdev_priv(sitn->fb_tunnel_dev)) @@ -1336,6 +1335,7 @@ static const struct net_device_ops ipip6_netdev_ops = { .ndo_do_ioctl = ipip6_tunnel_ioctl, .ndo_change_mtu = ipip6_tunnel_change_mtu, .ndo_get_stats64 = ip_tunnel_get_stats64, + .ndo_get_iflink = ip_tunnel_get_iflink, }; static void ipip6_dev_free(struct net_device *dev) @@ -1366,7 +1366,6 @@ static void ipip6_tunnel_setup(struct net_device *dev) dev->mtu = ETH_DATA_LEN - t_hlen; dev->flags = IFF_NOARP; netif_keep_dst(dev); - dev->iflink = 0; dev->addr_len = 4; dev->features |= NETIF_F_LLTX; dev->features |= SIT_FEATURES; @@ -1530,8 +1529,7 @@ static bool ipip6_netlink_6rd_parms(struct nlattr *data[], if (data[IFLA_IPTUN_6RD_PREFIX]) { ret = true; - nla_memcpy(&ip6rd->prefix, data[IFLA_IPTUN_6RD_PREFIX], - sizeof(struct in6_addr)); + ip6rd->prefix = nla_get_in6_addr(data[IFLA_IPTUN_6RD_PREFIX]); } if (data[IFLA_IPTUN_6RD_RELAY_PREFIX]) { @@ -1683,8 +1681,8 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) struct ip_tunnel_parm *parm = &tunnel->parms; if (nla_put_u32(skb, IFLA_IPTUN_LINK, parm->link) || - nla_put_be32(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || - nla_put_be32(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || + nla_put_in_addr(skb, IFLA_IPTUN_LOCAL, parm->iph.saddr) || + nla_put_in_addr(skb, IFLA_IPTUN_REMOTE, parm->iph.daddr) || nla_put_u8(skb, IFLA_IPTUN_TTL, parm->iph.ttl) || nla_put_u8(skb, IFLA_IPTUN_TOS, parm->iph.tos) || nla_put_u8(skb, IFLA_IPTUN_PMTUDISC, @@ -1694,10 +1692,10 @@ static int ipip6_fill_info(struct sk_buff *skb, const struct net_device *dev) goto nla_put_failure; #ifdef CONFIG_IPV6_SIT_6RD - if (nla_put(skb, IFLA_IPTUN_6RD_PREFIX, sizeof(struct in6_addr), - &tunnel->ip6rd.prefix) || - nla_put_be32(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, - tunnel->ip6rd.relay_prefix) || + if (nla_put_in6_addr(skb, IFLA_IPTUN_6RD_PREFIX, + &tunnel->ip6rd.prefix) || + nla_put_in_addr(skb, IFLA_IPTUN_6RD_RELAY_PREFIX, + tunnel->ip6rd.relay_prefix) || nla_put_u16(skb, IFLA_IPTUN_6RD_PREFIXLEN, tunnel->ip6rd.prefixlen) || nla_put_u16(skb, IFLA_IPTUN_6RD_RELAY_PREFIXLEN, @@ -1795,7 +1793,7 @@ static void __net_exit sit_destroy_tunnels(struct net *net, struct ip_tunnel *t; t = rtnl_dereference(sitn->tunnels[prio][h]); - while (t != NULL) { + while (t) { /* If dev is in the same netns, it has already * been added to the list by the previous loop. */ diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index 7337fc7947e2..0909f4e0d53c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -41,22 +41,6 @@ static __u16 const msstab[] = { 9000 - 60, }; -static inline struct sock *get_cookie_sock(struct sock *sk, struct sk_buff *skb, - struct request_sock *req, - struct dst_entry *dst) -{ - struct inet_connection_sock *icsk = inet_csk(sk); - struct sock *child; - - child = icsk->icsk_af_ops->syn_recv_sock(sk, skb, req, dst); - if (child) - inet_csk_reqsk_queue_add(sk, req, child); - else - reqsk_free(req); - - return child; -} - static DEFINE_PER_CPU(__u32 [16 + 5 + SHA_WORKSPACE_WORDS], ipv6_cookie_scratch); @@ -189,13 +173,13 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) goto out; ret = NULL; - req = inet_reqsk_alloc(&tcp6_request_sock_ops); + req = inet_reqsk_alloc(&tcp6_request_sock_ops, sk); if (!req) goto out; ireq = inet_rsk(req); treq = tcp_rsk(req); - treq->listener = NULL; + treq->tfo_listener = false; if (security_inet_conn_request(sk, skb, req)) goto out_free; @@ -220,7 +204,6 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->ir_mark = inet_request_mark(sk, skb); - req->expires = 0UL; req->num_retrans = 0; ireq->snd_wscale = tcp_opt.snd_wscale; ireq->sack_ok = tcp_opt.sack_ok; @@ -264,7 +247,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq->rcv_wscale = rcv_wscale; ireq->ecn_ok = cookie_ecn_ok(&tcp_opt, sock_net(sk), dst); - ret = get_cookie_sock(sk, skb, req, dst); + ret = tcp_get_cookie_sock(sk, skb, req, dst); out: return ret; out_free: diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index c5c10fafcfe2..4e705add4f18 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -54,6 +54,27 @@ static struct ctl_table ipv6_table_template[] = { .mode = 0644, .proc_handler = proc_dointvec }, + { + .procname = "idgen_retries", + .data = &init_net.ipv6.sysctl.idgen_retries, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "idgen_delay", + .data = &init_net.ipv6.sysctl.idgen_delay, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_jiffies, + }, + { + .procname = "flowlabel_state_ranges", + .data = &init_net.ipv6.sysctl.flowlabel_state_ranges, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec + }, { } }; @@ -93,6 +114,9 @@ static int __net_init ipv6_sysctl_net_init(struct net *net) ipv6_table[2].data = &net->ipv6.sysctl.flowlabel_consistency; ipv6_table[3].data = &net->ipv6.sysctl.auto_flowlabels; ipv6_table[4].data = &net->ipv6.sysctl.fwmark_reflect; + ipv6_table[5].data = &net->ipv6.sysctl.idgen_retries; + ipv6_table[6].data = &net->ipv6.sysctl.idgen_delay; + ipv6_table[7].data = &net->ipv6.sysctl.flowlabel_state_ranges; ipv6_route_table = ipv6_route_sysctl_init(net); if (!ipv6_route_table) @@ -163,7 +187,7 @@ int ipv6_sysctl_register(void) int err = -ENOMEM; ip6_header = register_net_sysctl(&init_net, "net/ipv6", ipv6_rotable); - if (ip6_header == NULL) + if (!ip6_header) goto out; err = register_pernet_subsys(&ipv6_sysctl_net_ops); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 1f5e62229aaa..6748c4277aff 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -99,21 +99,7 @@ static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) dst_hold(dst); sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - if (rt->rt6i_node) - inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; - } -} - -static void tcp_v6_hash(struct sock *sk) -{ - if (sk->sk_state != TCP_CLOSE) { - if (inet_csk(sk)->icsk_af_ops == &ipv6_mapped) { - tcp_prot.hash(sk); - return; - } - local_bh_disable(); - __inet6_hash(sk, NULL); - local_bh_enable(); + inet6_sk(sk)->rx_dst_cookie = rt6_get_cookie(rt); } } @@ -134,7 +120,6 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, struct ipv6_pinfo *np = inet6_sk(sk); struct tcp_sock *tp = tcp_sk(sk); struct in6_addr *saddr = NULL, *final_p, final; - struct rt6_info *rt; struct flowi6 fl6; struct dst_entry *dst; int addr_type; @@ -154,7 +139,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { struct ip6_flowlabel *flowlabel; flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; fl6_sock_release(flowlabel); } @@ -233,11 +218,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, tp->af_specific = &tcp_sock_ipv6_specific; #endif goto failure; - } else { - ipv6_addr_set_v4mapped(inet->inet_saddr, &np->saddr); - ipv6_addr_set_v4mapped(inet->inet_rcv_saddr, - &sk->sk_v6_rcv_saddr); } + np->saddr = sk->sk_v6_rcv_saddr; return err; } @@ -263,7 +245,7 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, goto failure; } - if (saddr == NULL) { + if (!saddr) { saddr = &fl6.saddr; sk->sk_v6_rcv_saddr = *saddr; } @@ -275,10 +257,9 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(sk, dst, NULL, NULL); - rt = (struct rt6_info *) dst; if (tcp_death_row.sysctl_tw_recycle && !tp->rx_opt.ts_recent_stamp && - ipv6_addr_equal(&rt->rt6i_dst.addr, &sk->sk_v6_daddr)) + ipv6_addr_equal(&fl6.daddr, &sk->sk_v6_daddr)) tcp_fetch_timewait_stamp(sk, dst); icsk->icsk_ext_hdr_len = 0; @@ -340,18 +321,20 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, { const struct ipv6hdr *hdr = (const struct ipv6hdr *)skb->data; const struct tcphdr *th = (struct tcphdr *)(skb->data+offset); + struct net *net = dev_net(skb->dev); + struct request_sock *fastopen; struct ipv6_pinfo *np; - struct sock *sk; - int err; struct tcp_sock *tp; - struct request_sock *fastopen; __u32 seq, snd_una; - struct net *net = dev_net(skb->dev); + struct sock *sk; + int err; - sk = inet6_lookup(net, &tcp_hashinfo, &hdr->daddr, - th->dest, &hdr->saddr, th->source, skb->dev->ifindex); + sk = __inet6_lookup_established(net, &tcp_hashinfo, + &hdr->daddr, th->dest, + &hdr->saddr, ntohs(th->source), + skb->dev->ifindex); - if (sk == NULL) { + if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; @@ -361,6 +344,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, inet_twsk_put(inet_twsk(sk)); return; } + seq = ntohl(th->seq); + if (sk->sk_state == TCP_NEW_SYN_RECV) + return tcp_req_err(sk, seq); bh_lock_sock(sk); if (sock_owned_by_user(sk) && type != ICMPV6_PKT_TOOBIG) @@ -375,7 +361,6 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } tp = tcp_sk(sk); - seq = ntohl(th->seq); /* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */ fastopen = tp->fastopen_rsk; snd_una = fastopen ? tcp_rsk(fastopen)->snt_isn : tp->snd_una; @@ -419,37 +404,12 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, /* Might be for an request_sock */ switch (sk->sk_state) { - struct request_sock *req, **prev; - case TCP_LISTEN: - if (sock_owned_by_user(sk)) - goto out; - - /* Note : We use inet6_iif() here, not tcp_v6_iif() */ - req = inet6_csk_search_req(sk, &prev, th->dest, &hdr->daddr, - &hdr->saddr, inet6_iif(skb)); - if (!req) - goto out; - - /* ICMPs are not backlogged, hence we cannot get - * an established socket here. - */ - WARN_ON(req->sk != NULL); - - if (seq != tcp_rsk(req)->snt_isn) { - NET_INC_STATS_BH(net, LINUX_MIB_OUTOFWINDOWICMPS); - goto out; - } - - inet_csk_reqsk_queue_drop(sk, req, prev); - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_LISTENDROPS); - goto out; - case TCP_SYN_SENT: case TCP_SYN_RECV: /* Only in fast or simultaneous open. If a fast open socket is * is already accepted it is treated as a connected one below. */ - if (fastopen && fastopen->sk == NULL) + if (fastopen && !fastopen->sk) break; if (!sock_owned_by_user(sk)) { @@ -497,7 +457,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, &ireq->ir_v6_rmt_addr); fl6->daddr = ireq->ir_v6_rmt_addr; - if (np->repflow && (ireq->pktopts != NULL)) + if (np->repflow && ireq->pktopts) fl6->flowlabel = ip6_flowlabel(ipv6_hdr(ireq->pktopts)); skb_set_queue_mapping(skb, queue_mapping); @@ -523,17 +483,11 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, } static struct tcp_md5sig_key *tcp_v6_md5_lookup(struct sock *sk, - struct sock *addr_sk) + const struct sock *addr_sk) { return tcp_v6_md5_do_lookup(sk, &addr_sk->sk_v6_daddr); } -static struct tcp_md5sig_key *tcp_v6_reqsk_md5_lookup(struct sock *sk, - struct request_sock *req) -{ - return tcp_v6_md5_do_lookup(sk, &inet_rsk(req)->ir_v6_rmt_addr); -} - static int tcp_v6_parse_md5_keys(struct sock *sk, char __user *optval, int optlen) { @@ -619,9 +573,9 @@ clear_hash_noput: return 1; } -static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, +static int tcp_v6_md5_hash_skb(char *md5_hash, + const struct tcp_md5sig_key *key, const struct sock *sk, - const struct request_sock *req, const struct sk_buff *skb) { const struct in6_addr *saddr, *daddr; @@ -629,12 +583,9 @@ static int tcp_v6_md5_hash_skb(char *md5_hash, struct tcp_md5sig_key *key, struct hash_desc *desc; const struct tcphdr *th = tcp_hdr(skb); - if (sk) { - saddr = &inet6_sk(sk)->saddr; + if (sk) { /* valid for establish/request sockets */ + saddr = &sk->sk_v6_rcv_saddr; daddr = &sk->sk_v6_daddr; - } else if (req) { - saddr = &inet_rsk(req)->ir_v6_loc_addr; - daddr = &inet_rsk(req)->ir_v6_rmt_addr; } else { const struct ipv6hdr *ip6h = ipv6_hdr(skb); saddr = &ip6h->saddr; @@ -670,8 +621,7 @@ clear_hash_noput: return 1; } -static int __tcp_v6_inbound_md5_hash(struct sock *sk, - const struct sk_buff *skb) +static bool tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) { const __u8 *hash_location = NULL; struct tcp_md5sig_key *hash_expected; @@ -685,44 +635,32 @@ static int __tcp_v6_inbound_md5_hash(struct sock *sk, /* We've parsed the options - do we have a hash? */ if (!hash_expected && !hash_location) - return 0; + return false; if (hash_expected && !hash_location) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5NOTFOUND); - return 1; + return true; } if (!hash_expected && hash_location) { NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_TCPMD5UNEXPECTED); - return 1; + return true; } /* check the signature */ genhash = tcp_v6_md5_hash_skb(newhash, hash_expected, - NULL, NULL, skb); + NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) { net_info_ratelimited("MD5 Hash %s for [%pI6c]:%u->[%pI6c]:%u\n", genhash ? "failed" : "mismatch", &ip6h->saddr, ntohs(th->source), &ip6h->daddr, ntohs(th->dest)); - return 1; + return true; } - return 0; + return false; } - -static int tcp_v6_inbound_md5_hash(struct sock *sk, const struct sk_buff *skb) -{ - int ret; - - rcu_read_lock(); - ret = __tcp_v6_inbound_md5_hash(sk, skb); - rcu_read_unlock(); - - return ret; -} - #endif static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, @@ -734,8 +672,6 @@ static void tcp_v6_init_req(struct request_sock *req, struct sock *sk, ireq->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr; ireq->ir_v6_loc_addr = ipv6_hdr(skb)->daddr; - ireq->ir_iif = sk->sk_bound_dev_if; - /* So that link locals have meaning */ if (!sk->sk_bound_dev_if && ipv6_addr_type(&ireq->ir_v6_rmt_addr) & IPV6_ADDR_LINKLOCAL) @@ -774,7 +710,7 @@ static const struct tcp_request_sock_ops tcp_request_sock_ipv6_ops = { .mss_clamp = IPV6_MIN_MTU - sizeof(struct tcphdr) - sizeof(struct ipv6hdr), #ifdef CONFIG_TCP_MD5SIG - .md5_lookup = tcp_v6_reqsk_md5_lookup, + .req_md5_lookup = tcp_v6_md5_lookup, .calc_md5_hash = tcp_v6_md5_hash_skb, #endif .init_req = tcp_v6_init_req, @@ -811,7 +747,7 @@ static void tcp_v6_send_response(struct sock *sk, struct sk_buff *skb, u32 seq, buff = alloc_skb(MAX_HEADER + sizeof(struct ipv6hdr) + tot_len, GFP_ATOMIC); - if (buff == NULL) + if (!buff) return; skb_reserve(buff, MAX_HEADER + sizeof(struct ipv6hdr) + tot_len); @@ -931,7 +867,7 @@ static void tcp_v6_send_reset(struct sock *sk, struct sk_buff *skb) if (!key) goto release_sk1; - genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, NULL, skb); + genhash = tcp_v6_md5_hash_skb(newhash, key, NULL, skb); if (genhash || memcmp(hash_location, newhash, 16) != 0) goto release_sk1; } else { @@ -975,7 +911,7 @@ static void tcp_v6_timewait_ack(struct sock *sk, struct sk_buff *skb) tcptw->tw_rcv_wnd >> tw->tw_rcv_wscale, tcp_time_stamp + tcptw->tw_ts_offset, tcptw->tw_ts_recent, tw->tw_bound_dev_if, tcp_twsk_md5_key(tcptw), - tw->tw_tclass, (tw->tw_flowlabel << 12)); + tw->tw_tclass, cpu_to_be32(tw->tw_flowlabel)); inet_twsk_put(tw); } @@ -997,17 +933,20 @@ static void tcp_v6_reqsk_send_ack(struct sock *sk, struct sk_buff *skb, static struct sock *tcp_v6_hnd_req(struct sock *sk, struct sk_buff *skb) { - struct request_sock *req, **prev; const struct tcphdr *th = tcp_hdr(skb); + struct request_sock *req; struct sock *nsk; /* Find possible connection requests. */ - req = inet6_csk_search_req(sk, &prev, th->source, + req = inet6_csk_search_req(sk, th->source, &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, tcp_v6_iif(skb)); - if (req) - return tcp_check_req(sk, skb, req, prev, false); - + if (req) { + nsk = tcp_check_req(sk, skb, req, false); + if (!nsk) + reqsk_put(req); + return nsk; + } nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), @@ -1067,7 +1006,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk = tcp_v4_syn_recv_sock(sk, skb, req, dst); - if (newsk == NULL) + if (!newsk) return NULL; newtcp6sk = (struct tcp6_sock *)newsk; @@ -1079,11 +1018,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, memcpy(newnp, np, sizeof(struct ipv6_pinfo)); - ipv6_addr_set_v4mapped(newinet->inet_daddr, &newsk->sk_v6_daddr); - - ipv6_addr_set_v4mapped(newinet->inet_saddr, &newnp->saddr); - - newsk->sk_v6_rcv_saddr = newnp->saddr; + newnp->saddr = newsk->sk_v6_rcv_saddr; inet_csk(newsk)->icsk_af_ops = &ipv6_mapped; newsk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -1128,7 +1063,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, } newsk = tcp_create_openreq_child(sk, req, skb); - if (newsk == NULL) + if (!newsk) goto out_nonewsk; /* @@ -1170,7 +1105,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; - if (ireq->pktopts != NULL) { + if (ireq->pktopts) { newnp->pktoptions = skb_clone(ireq->pktopts, sk_gfp_atomic(sk, GFP_ATOMIC)); consume_skb(ireq->pktopts); @@ -1215,7 +1150,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, #ifdef CONFIG_TCP_MD5SIG /* Copy over the MD5 key from the original socket */ key = tcp_v6_md5_do_lookup(sk, &newsk->sk_v6_daddr); - if (key != NULL) { + if (key) { /* We're using one, so create a matching key * on the newsk structure. If we fail to get * memory, then we end up not copying the key @@ -1232,7 +1167,7 @@ static struct sock *tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, tcp_done(newsk); goto out; } - __inet6_hash(newsk, NULL); + __inet_hash(newsk, NULL); return newsk; @@ -1313,7 +1248,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) return 0; } - if (skb->len < tcp_hdrlen(skb) || tcp_checksum_complete(skb)) + if (tcp_checksum_complete(skb)) goto csum_err; if (sk->sk_state == TCP_LISTEN) { @@ -1483,6 +1418,7 @@ process: skb->dev = NULL; bh_lock_sock_nested(sk); + tcp_sk(sk)->segs_in += max_t(u16, 1, skb_shinfo(skb)->gso_segs); ret = 0; if (!sock_owned_by_user(sk)) { if (!tcp_prequeue(sk, skb)) @@ -1504,7 +1440,7 @@ no_tcp_socket: tcp_v6_fill_cb(skb, hdr, th); - if (skb->len < (th->doff<<2) || tcp_checksum_complete(skb)) { + if (tcp_checksum_complete(skb)) { csum_error: TCP_INC_STATS_BH(net, TCP_MIB_CSUMERRORS); bad_packet: @@ -1529,10 +1465,6 @@ do_time_wait: tcp_v6_fill_cb(skb, hdr, th); - if (skb->len < (th->doff<<2)) { - inet_twsk_put(inet_twsk(sk)); - goto bad_packet; - } if (tcp_checksum_complete(skb)) { inet_twsk_put(inet_twsk(sk)); goto csum_error; @@ -1547,9 +1479,9 @@ do_time_wait: &ipv6_hdr(skb)->saddr, th->source, &ipv6_hdr(skb)->daddr, ntohs(th->dest), tcp_v6_iif(skb)); - if (sk2 != NULL) { + if (sk2) { struct inet_timewait_sock *tw = inet_twsk(sk); - inet_twsk_deschedule(tw, &tcp_death_row); + inet_twsk_deschedule(tw); inet_twsk_put(tw); sk = sk2; tcp_v6_restore_cb(skb); @@ -1595,7 +1527,7 @@ static void tcp_v6_early_demux(struct sk_buff *skb) if (sk) { skb->sk = sk; skb->destructor = sock_edemux; - if (sk->sk_state != TCP_TIME_WAIT) { + if (sk_fullsock(sk)) { struct dst_entry *dst = READ_ONCE(sk->sk_rx_dst); if (dst) @@ -1700,9 +1632,9 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - const struct sock *sk, struct request_sock *req, int i, kuid_t uid) + struct request_sock *req, int i, kuid_t uid) { - int ttd = req->expires - jiffies; + long ttd = req->rsk_timer.expires - jiffies; const struct in6_addr *src = &inet_rsk(req)->ir_v6_loc_addr; const struct in6_addr *dest = &inet_rsk(req)->ir_v6_rmt_addr; @@ -1791,9 +1723,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) static void get_timewait6_sock(struct seq_file *seq, struct inet_timewait_sock *tw, int i) { + long delta = tw->tw_timer.expires - jiffies; const struct in6_addr *dest, *src; __u16 destp, srcp; - s32 delta = tw->tw_ttd - inet_tw_time_stamp(); dest = &tw->tw_v6_daddr; src = &tw->tw_v6_rcv_saddr; @@ -1838,7 +1770,7 @@ static int tcp6_seq_show(struct seq_file *seq, void *v) get_tcp6_sock(seq, v, st->num); break; case TCP_SEQ_STATE_OPENREQ: - get_openreq6(seq, st->syn_wait_sk, v, st->num, st->uid); + get_openreq6(seq, v, st->num, st->uid); break; } out: @@ -1902,7 +1834,7 @@ struct proto tcpv6_prot = { .sendpage = tcp_sendpage, .backlog_rcv = tcp_v6_do_rcv, .release_cb = tcp_release_cb, - .hash = tcp_v6_hash, + .hash = inet_hash, .unhash = inet_unhash, .get_port = inet_csk_get_port, .enter_memory_pressure = tcp_enter_memory_pressure, diff --git a/net/ipv6/tcpv6_offload.c b/net/ipv6/tcpv6_offload.c index c1ab77105b4c..d883c9204c01 100644 --- a/net/ipv6/tcpv6_offload.c +++ b/net/ipv6/tcpv6_offload.c @@ -41,8 +41,8 @@ static int tcp6_gro_complete(struct sk_buff *skb, int thoff) return tcp_gro_complete(skb); } -struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, - netdev_features_t features) +static struct sk_buff *tcp6_gso_segment(struct sk_buff *skb, + netdev_features_t features) { struct tcphdr *th; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index d048d46779fc..e51fc3eee6db 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -53,11 +53,11 @@ #include <trace/events/skb.h> #include "udp_impl.h" -static unsigned int udp6_ehashfn(struct net *net, - const struct in6_addr *laddr, - const u16 lport, - const struct in6_addr *faddr, - const __be16 fport) +static u32 udp6_ehashfn(const struct net *net, + const struct in6_addr *laddr, + const u16 lport, + const struct in6_addr *faddr, + const __be16 fport) { static u32 udp6_ehash_secret __read_mostly; static u32 udp_ipv6_hash_secret __read_mostly; @@ -104,9 +104,9 @@ int ipv6_rcv_saddr_equal(const struct sock *sk, const struct sock *sk2) return 0; } -static unsigned int udp6_portaddr_hash(struct net *net, - const struct in6_addr *addr6, - unsigned int port) +static u32 udp6_portaddr_hash(const struct net *net, + const struct in6_addr *addr6, + unsigned int port) { unsigned int hash, mix = net_hash_mix(net); @@ -120,7 +120,6 @@ static unsigned int udp6_portaddr_hash(struct net *net, return hash ^ port; } - int udp_v6_get_port(struct sock *sk, unsigned short snum) { unsigned int hash2_nulladdr = @@ -385,14 +384,12 @@ struct sock *udp6_lib_lookup(struct net *net, const struct in6_addr *saddr, __be } EXPORT_SYMBOL_GPL(udp6_lib_lookup); - /* * This should be easy, if there is something there we * return it, otherwise we block. */ -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len, +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, int flags, int *addr_len) { struct ipv6_pinfo *np = inet6_sk(sk); @@ -528,10 +525,8 @@ csum_copy_err: } unlock_sock_fast(sk, slow); - if (noblock) - return -EAGAIN; - - /* starting over for a new packet */ + /* starting over for a new packet, but check if we need to yield */ + cond_resched(); msg->msg_flags &= ~MSG_TRUNC; goto try_again; } @@ -551,7 +546,7 @@ void __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, sk = __udp6_lib_lookup(net, daddr, uh->dest, saddr, uh->source, inet6_iif(skb), udptable); - if (sk == NULL) { + if (!sk) { ICMP6_INC_STATS_BH(net, __in6_dev_get(skb->dev), ICMP6_MIB_INERRORS); return; @@ -649,7 +644,7 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) /* if we're overly short, let UDP handle it */ encap_rcv = ACCESS_ONCE(up->encap_rcv); - if (skb->len > sizeof(struct udphdr) && encap_rcv != NULL) { + if (skb->len > sizeof(struct udphdr) && encap_rcv) { int ret; /* Verify checksum before giving to encap */ @@ -734,7 +729,9 @@ static bool __udp_v6_is_mcast_sock(struct net *net, struct sock *sk, (inet->inet_dport && inet->inet_dport != rmt_port) || (!ipv6_addr_any(&sk->sk_v6_daddr) && !ipv6_addr_equal(&sk->sk_v6_daddr, rmt_addr)) || - (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif)) + (sk->sk_bound_dev_if && sk->sk_bound_dev_if != dif) || + (!ipv6_addr_any(&sk->sk_v6_rcv_saddr) && + !ipv6_addr_equal(&sk->sk_v6_rcv_saddr, loc_addr))) return false; if (!inet6_mc_check(sk, loc_addr, rmt_addr)) return false; @@ -750,7 +747,7 @@ static void flush_stack(struct sock **stack, unsigned int count, for (i = 0; i < count; i++) { sk = stack[i]; - if (likely(skb1 == NULL)) + if (likely(!skb1)) skb1 = (i == final) ? skb : skb_clone(skb, GFP_ATOMIC); if (!skb1) { atomic_inc(&sk->sk_drops); @@ -900,7 +897,7 @@ int __udp6_lib_rcv(struct sk_buff *skb, struct udp_table *udptable, * for sock caches... i'll skip this for now. */ sk = __udp6_lib_lookup_skb(skb, uh->source, uh->dest, udptable); - if (sk != NULL) { + if (sk) { int ret; if (!uh->check && !udp_sk(sk)->no_check6_rx) { @@ -1101,8 +1098,7 @@ out: return err; } -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, - struct msghdr *msg, size_t len) +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) { struct ipv6_txoptions opt_space; struct udp_sock *up = udp_sk(sk); @@ -1164,12 +1160,12 @@ int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, do_udp_sendmsg: if (__ipv6_only_sock(sk)) return -ENETUNREACH; - return udp_sendmsg(iocb, sk, msg, len); + return udp_sendmsg(sk, msg, len); } } if (up->pending == AF_INET) - return udp_sendmsg(iocb, sk, msg, len); + return udp_sendmsg(sk, msg, len); /* Rough check on arithmetic overflow, better check is made in ip6_append_data(). @@ -1209,7 +1205,7 @@ do_udp_sendmsg: fl6.flowlabel = sin6->sin6_flowinfo&IPV6_FLOWINFO_MASK; if (fl6.flowlabel&IPV6_FLOWLABEL_MASK) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } } @@ -1257,14 +1253,14 @@ do_udp_sendmsg: } if ((fl6.flowlabel&IPV6_FLOWLABEL_MASK) && !flowlabel) { flowlabel = fl6_sock_lookup(sk, fl6.flowlabel); - if (flowlabel == NULL) + if (!flowlabel) return -EINVAL; } if (!(opt->opt_nflen|opt->opt_flen)) opt = NULL; connected = 0; } - if (opt == NULL) + if (!opt) opt = np->opt; if (flowlabel) opt = fl6_merge_options(&opt_space, flowlabel, opt); @@ -1557,7 +1553,6 @@ static struct inet_protosw udpv6_protosw = { .flags = INET_PROTOSW_PERMANENT, }; - int __init udpv6_init(void) { int ret; diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index c779c3c90b9d..0682c031ccdc 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -23,10 +23,9 @@ int compat_udpv6_setsockopt(struct sock *sk, int level, int optname, int compat_udpv6_getsockopt(struct sock *sk, int level, int optname, char __user *optval, int __user *optlen); #endif -int udpv6_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len); -int udpv6_recvmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, - size_t len, int noblock, int flags, int *addr_len); +int udpv6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len); +int udpv6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, int noblock, + int flags, int *addr_len); int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb); void udpv6_destroy_sock(struct sock *sk); diff --git a/net/ipv6/udp_offload.c b/net/ipv6/udp_offload.c index be2c0ba82c85..7441e1e63893 100644 --- a/net/ipv6/udp_offload.c +++ b/net/ipv6/udp_offload.c @@ -54,7 +54,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, /* Set the IPv6 fragment id if not set yet */ if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(skb); + ipv6_proxy_select_ident(dev_net(skb->dev), skb); segs = NULL; goto out; @@ -113,7 +113,7 @@ static struct sk_buff *udp6_ufo_fragment(struct sk_buff *skb, fptr->nexthdr = nexthdr; fptr->reserved = 0; if (!skb_shinfo(skb)->ip6_frag_id) - ipv6_proxy_select_ident(skb); + ipv6_proxy_select_ident(dev_net(skb->dev), skb); fptr->identification = skb_shinfo(skb)->ip6_frag_id; /* Fragment the skb. ipv6 header and the remaining fields of the diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c index f48fbe4d16f5..74bd17882a2f 100644 --- a/net/ipv6/xfrm6_input.c +++ b/net/ipv6/xfrm6_input.c @@ -42,7 +42,8 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async) ipv6_hdr(skb)->payload_len = htons(skb->len); __skb_push(skb, skb->data - skb_network_header(skb)); - NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, skb, skb->dev, NULL, + NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING, NULL, skb, + skb->dev, NULL, ip6_rcv_finish); return -1; } diff --git a/net/ipv6/xfrm6_mode_beet.c b/net/ipv6/xfrm6_mode_beet.c index 9949a356d62c..1e205c3253ac 100644 --- a/net/ipv6/xfrm6_mode_beet.c +++ b/net/ipv6/xfrm6_mode_beet.c @@ -95,8 +95,8 @@ static int xfrm6_beet_input(struct xfrm_state *x, struct sk_buff *skb) ip6h = ipv6_hdr(skb); ip6h->payload_len = htons(skb->len - size); - ip6h->daddr = *(struct in6_addr *)&x->sel.daddr.a6; - ip6h->saddr = *(struct in6_addr *)&x->sel.saddr.a6; + ip6h->daddr = x->sel.daddr.in6; + ip6h->saddr = x->sel.saddr.in6; err = 0; out: return err; diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c index 010f8bd2d577..09c76a7b474d 100644 --- a/net/ipv6/xfrm6_output.c +++ b/net/ipv6/xfrm6_output.c @@ -120,7 +120,7 @@ int xfrm6_prepare_output(struct xfrm_state *x, struct sk_buff *skb) } EXPORT_SYMBOL(xfrm6_prepare_output); -int xfrm6_output_finish(struct sk_buff *skb) +int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb) { memset(IP6CB(skb), 0, sizeof(*IP6CB(skb))); @@ -128,10 +128,10 @@ int xfrm6_output_finish(struct sk_buff *skb) IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED; #endif - return xfrm_output(skb); + return xfrm_output(sk, skb); } -static int __xfrm6_output(struct sk_buff *skb) +static int __xfrm6_output(struct sock *sk, struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); struct xfrm_state *x = dst->xfrm; @@ -140,7 +140,7 @@ static int __xfrm6_output(struct sk_buff *skb) #ifdef CONFIG_NETFILTER if (!x) { IP6CB(skb)->flags |= IP6SKB_REROUTED; - return dst_output(skb); + return dst_output_sk(sk, skb); } #endif @@ -160,14 +160,15 @@ static int __xfrm6_output(struct sk_buff *skb) if (x->props.mode == XFRM_MODE_TUNNEL && ((skb->len > mtu && !skb_is_gso(skb)) || dst_allfrag(skb_dst(skb)))) { - return ip6_fragment(skb, x->outer_mode->afinfo->output_finish); + return ip6_fragment(sk, skb, + x->outer_mode->afinfo->output_finish); } - return x->outer_mode->afinfo->output_finish(skb); + return x->outer_mode->afinfo->output_finish(sk, skb); } int xfrm6_output(struct sock *sk, struct sk_buff *skb) { - return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, skb, + return NF_HOOK_COND(NFPROTO_IPV6, NF_INET_POST_ROUTING, sk, skb, NULL, skb_dst(skb)->dev, __xfrm6_output, !(IP6CB(skb)->flags & IP6SKB_REROUTED)); } diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 8d2d01b4800a..ed0583c1b9fc 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -61,9 +61,7 @@ static int xfrm6_get_saddr(struct net *net, return -EHOSTUNREACH; dev = ip6_dst_idev(dst)->dev; - ipv6_dev_get_saddr(dev_net(dev), dev, - (struct in6_addr *)&daddr->a6, 0, - (struct in6_addr *)&saddr->a6); + ipv6_dev_get_saddr(dev_net(dev), dev, &daddr->in6, 0, &saddr->in6); dst_release(dst); return 0; } @@ -73,20 +71,12 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } -static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) -{ - struct rt6_info *rt = (struct rt6_info *)xdst; - - rt6_init_peer(rt, net->ipv6.peers); -} - static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { if (dst->ops->family == AF_INET6) { struct rt6_info *rt = (struct rt6_info *)dst; - if (rt->rt6i_node) - path->path_cookie = rt->rt6i_node->fn_sernum; + path->path_cookie = rt6_get_cookie(rt); } path->u.rt6.rt6i_nfheader_len = nfheader_len; @@ -108,16 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, return -ENODEV; } - rt6_transfer_peer(&xdst->u.rt6, rt); - /* Sheit... I remember I did this right. Apparently, * it was magically lost, so this code needs audit */ xdst->u.rt6.rt6i_flags = rt->rt6i_flags & (RTF_ANYCAST | RTF_LOCAL); xdst->u.rt6.rt6i_metric = rt->rt6i_metric; xdst->u.rt6.rt6i_node = rt->rt6i_node; - if (rt->rt6i_node) - xdst->route_cookie = rt->rt6i_node->fn_sernum; + xdst->route_cookie = rt6_get_cookie(rt); xdst->u.rt6.rt6i_gateway = rt->rt6i_gateway; xdst->u.rt6.rt6i_dst = rt->rt6i_dst; xdst->u.rt6.rt6i_src = rt->rt6i_src; @@ -257,10 +244,6 @@ static void xfrm6_dst_destroy(struct dst_entry *dst) if (likely(xdst->u.rt6.rt6i_idev)) in6_dev_put(xdst->u.rt6.rt6i_idev); dst_destroy_metrics_generic(dst); - if (rt6_has_peer(&xdst->u.rt6)) { - struct inet_peer *peer = rt6_peer_ptr(&xdst->u.rt6); - inet_putpeer(peer); - } xfrm_dst_destroy(xdst); } @@ -293,7 +276,6 @@ static void xfrm6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static struct dst_ops xfrm6_dst_ops = { .family = AF_INET6, - .protocol = cpu_to_be16(ETH_P_IPV6), .gc = xfrm6_garbage_collect, .update_pmtu = xfrm6_update_pmtu, .redirect = xfrm6_redirect, @@ -311,7 +293,6 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, - .init_dst = xfrm6_init_dst, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, @@ -371,7 +352,7 @@ static void __net_exit xfrm6_net_exit(struct net *net) { struct ctl_table *table; - if (net->ipv6.sysctl.xfrm6_hdr == NULL) + if (!net->ipv6.sysctl.xfrm6_hdr) return; table = net->ipv6.sysctl.xfrm6_hdr->ctl_table_arg; |