From c7109986db3c945f50ceed884a30e0fd8af3b89b Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 26 Jul 2012 12:18:11 +0000 Subject: ipv6: Early TCP socket demux This is the IPv6 missing bits for infrastructure added in commit 41063e9dd1195 (ipv4: Early TCP socket demux.) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/ip6_input.c | 13 +++++++++++-- net/ipv6/tcp_ipv6.c | 38 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 49 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 5ab923e51af3..47975e363fcd 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -47,9 +47,18 @@ -inline int ip6_rcv_finish( struct sk_buff *skb) +int ip6_rcv_finish(struct sk_buff *skb) { - if (skb_dst(skb) == NULL) + if (sysctl_ip_early_demux && !skb_dst(skb)) { + const struct inet6_protocol *ipprot; + + rcu_read_lock(); + ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); + if (ipprot && ipprot->early_demux) + ipprot->early_demux(skb); + rcu_read_unlock(); + } + if (!skb_dst(skb)) ip6_route_input(skb); return dst_input(skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f49476e2d884..221224e72507 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1674,6 +1674,43 @@ do_time_wait: goto discard_it; } +static void tcp_v6_early_demux(struct sk_buff *skb) +{ + const struct ipv6hdr *hdr; + const struct tcphdr *th; + struct sock *sk; + + if (skb->pkt_type != PACKET_HOST) + return; + + if (!pskb_may_pull(skb, skb_transport_offset(skb) + sizeof(struct tcphdr))) + return; + + hdr = ipv6_hdr(skb); + th = tcp_hdr(skb); + + if (th->doff < sizeof(struct tcphdr) / 4) + return; + + sk = __inet6_lookup_established(dev_net(skb->dev), &tcp_hashinfo, + &hdr->saddr, th->source, + &hdr->daddr, ntohs(th->dest), + inet6_iif(skb)); + if (sk) { + skb->sk = sk; + skb->destructor = sock_edemux; + if (sk->sk_state != TCP_TIME_WAIT) { + struct dst_entry *dst = sk->sk_rx_dst; + struct inet_sock *icsk = inet_sk(sk); + if (dst) + dst = dst_check(dst, 0); + if (dst && + icsk->rx_dst_ifindex == inet6_iif(skb)) + skb_dst_set_noref(skb, dst); + } + } +} + static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_obj_size = sizeof(struct tcp6_timewait_sock), .twsk_unique = tcp_twsk_unique, @@ -1984,6 +2021,7 @@ struct proto tcpv6_prot = { }; static const struct inet6_protocol tcpv6_protocol = { + .early_demux = tcp_v6_early_demux, .handler = tcp_v6_rcv, .err_handler = tcp_v6_err, .gso_send_check = tcp_v6_gso_send_check, -- cgit v1.2.3 From 8253947e2cdfb14717c9212b751b7aec9ea9ef5e Mon Sep 17 00:00:00 2001 From: Li Wei Date: Sun, 29 Jul 2012 16:01:30 +0000 Subject: ipv6: fix incorrect route 'expires' value passed to userspace When userspace use RTM_GETROUTE to dump route table, with an already expired route entry, we always got an 'expires' value(2147157) calculated base on INT_MAX. The reason of this problem is in the following satement: rt->dst.expires - jiffies < INT_MAX gcc promoted the type of both sides of '<' to unsigned long, thus a small negative value would be considered greater than INT_MAX. With the help of Eric Dumazet, do the out of bound checks in rtnl_put_cacheinfo(), _after_ conversion to clock_t. Signed-off-by: Li Wei Signed-off-by: David S. Miller --- net/core/rtnetlink.c | 8 ++++++-- net/ipv6/route.c | 8 ++------ 2 files changed, 8 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bc9e380f0abf..5ff949dc954f 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -625,9 +625,13 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, .rta_id = id, }; - if (expires) - ci.rta_expires = jiffies_to_clock_t(expires); + if (expires) { + unsigned long clock; + clock = jiffies_to_clock_t(abs(expires)); + clock = min_t(unsigned long, clock, INT_MAX); + ci.rta_expires = (expires > 0) ? clock : -clock; + } return nla_put(skb, RTA_CACHEINFO, sizeof(ci), &ci); } EXPORT_SYMBOL_GPL(rtnl_put_cacheinfo); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index cf02cb97bbdd..8e80fd279100 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -2480,12 +2480,8 @@ static int rt6_fill_node(struct net *net, goto nla_put_failure; if (nla_put_u32(skb, RTA_PRIORITY, rt->rt6i_metric)) goto nla_put_failure; - if (!(rt->rt6i_flags & RTF_EXPIRES)) - expires = 0; - else if (rt->dst.expires - jiffies < INT_MAX) - expires = rt->dst.expires - jiffies; - else - expires = INT_MAX; + + expires = (rt->rt6i_flags & RTF_EXPIRES) ? rt->dst.expires - jiffies : 0; if (rtnl_put_cacheinfo(skb, &rt->dst, 0, expires, rt->dst.error) < 0) goto nla_put_failure; -- cgit v1.2.3 From cca32e4bf999a34ac08d959f351f2b30bcd02460 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 29 Jul 2012 21:06:13 +0000 Subject: net: TCP early demux cleanup early_demux() handlers should be called in RCU context, and as we use skb_dst_set_noref(skb, dst), caller must not exit from RCU context before dst use (skb_dst(skb)) or release (skb_drop(dst)) Therefore, rcu_read_lock()/rcu_read_unlock() pairs around ->early_demux() are confusing and not needed : Protocol handlers are already in an RCU read lock section. (__netif_receive_skb() does the rcu_read_lock() ) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/ip_input.c | 2 -- net/ipv6/ip6_input.c | 2 -- 2 files changed, 4 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c index 981ff1eef28c..f1395a6fb35f 100644 --- a/net/ipv4/ip_input.c +++ b/net/ipv4/ip_input.c @@ -325,14 +325,12 @@ static int ip_rcv_finish(struct sk_buff *skb) const struct net_protocol *ipprot; int protocol = iph->protocol; - rcu_read_lock(); ipprot = rcu_dereference(inet_protos[protocol]); if (ipprot && ipprot->early_demux) { ipprot->early_demux(skb); /* must reload iph, skb->head might have changed */ iph = ip_hdr(skb); } - rcu_read_unlock(); } /* diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c index 47975e363fcd..a52d864d562b 100644 --- a/net/ipv6/ip6_input.c +++ b/net/ipv6/ip6_input.c @@ -52,11 +52,9 @@ int ip6_rcv_finish(struct sk_buff *skb) if (sysctl_ip_early_demux && !skb_dst(skb)) { const struct inet6_protocol *ipprot; - rcu_read_lock(); ipprot = rcu_dereference(inet6_protos[ipv6_hdr(skb)->nexthdr]); if (ipprot && ipprot->early_demux) ipprot->early_demux(skb); - rcu_read_unlock(); } if (!skb_dst(skb)) ip6_route_input(skb); -- cgit v1.2.3 From c255a458055e459f65eb7b7f51dc5dbdd0caf1d8 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Tue, 31 Jul 2012 16:43:02 -0700 Subject: memcg: rename config variables Sanity: CONFIG_CGROUP_MEM_RES_CTLR -> CONFIG_MEMCG CONFIG_CGROUP_MEM_RES_CTLR_SWAP -> CONFIG_MEMCG_SWAP CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED -> CONFIG_MEMCG_SWAP_ENABLED CONFIG_CGROUP_MEM_RES_CTLR_KMEM -> CONFIG_MEMCG_KMEM [mhocko@suse.cz: fix missed bits] Cc: Glauber Costa Acked-by: Michal Hocko Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Hugh Dickins Cc: Tejun Heo Cc: Aneesh Kumar K.V Cc: David Rientjes Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/cgroups/memory.txt | 10 +++++----- arch/powerpc/configs/chroma_defconfig | 4 ++-- arch/s390/defconfig | 2 +- arch/sh/configs/apsh4ad0a_defconfig | 2 +- arch/sh/configs/sdk7786_defconfig | 4 ++-- arch/sh/configs/se7206_defconfig | 2 +- arch/sh/configs/shx3_defconfig | 2 +- arch/sh/configs/urquell_defconfig | 4 ++-- arch/tile/configs/tilegx_defconfig | 4 ++-- arch/tile/configs/tilepro_defconfig | 4 ++-- arch/um/defconfig | 8 ++++---- include/linux/cgroup_subsys.h | 2 +- include/linux/memcontrol.h | 14 +++++++------- include/linux/mmzone.h | 8 ++++---- include/linux/page_cgroup.h | 10 +++++----- include/linux/sched.h | 2 +- include/linux/swap.h | 6 +++--- include/net/sock.h | 4 ++-- init/Kconfig | 14 +++++++------- kernel/fork.c | 2 +- mm/Makefile | 2 +- mm/hwpoison-inject.c | 2 +- mm/memcontrol.c | 20 ++++++++++---------- mm/memory-failure.c | 2 +- mm/mmzone.c | 2 +- mm/oom_kill.c | 2 +- mm/page_cgroup.c | 2 +- mm/vmscan.c | 4 ++-- net/core/sock.c | 2 +- net/ipv4/Makefile | 2 +- net/ipv4/sysctl_net_ipv4.c | 4 ++-- net/ipv4/tcp_ipv4.c | 2 +- net/ipv6/tcp_ipv6.c | 2 +- 33 files changed, 78 insertions(+), 78 deletions(-) (limited to 'net/ipv6') diff --git a/Documentation/cgroups/memory.txt b/Documentation/cgroups/memory.txt index dd88540bb995..672676ac9615 100644 --- a/Documentation/cgroups/memory.txt +++ b/Documentation/cgroups/memory.txt @@ -187,12 +187,12 @@ the cgroup that brought it in -- this will happen on memory pressure). But see section 8.2: when moving a task to another cgroup, its pages may be recharged to the new cgroup, if move_charge_at_immigrate has been chosen. -Exception: If CONFIG_CGROUP_CGROUP_MEM_RES_CTLR_SWAP is not used. +Exception: If CONFIG_CGROUP_CGROUP_MEMCG_SWAP is not used. When you do swapoff and make swapped-out pages of shmem(tmpfs) to be backed into memory in force, charges for pages are accounted against the caller of swapoff rather than the users of shmem. -2.4 Swap Extension (CONFIG_CGROUP_MEM_RES_CTLR_SWAP) +2.4 Swap Extension (CONFIG_MEMCG_SWAP) Swap Extension allows you to record charge for swap. A swapped-in page is charged back to original page allocator if possible. @@ -259,7 +259,7 @@ When oom event notifier is registered, event will be delivered. per-zone-per-cgroup LRU (cgroup's private LRU) is just guarded by zone->lru_lock, it has no lock of its own. -2.7 Kernel Memory Extension (CONFIG_CGROUP_MEM_RES_CTLR_KMEM) +2.7 Kernel Memory Extension (CONFIG_MEMCG_KMEM) With the Kernel memory extension, the Memory Controller is able to limit the amount of kernel memory used by the system. Kernel memory is fundamentally @@ -286,8 +286,8 @@ per cgroup, instead of globally. a. Enable CONFIG_CGROUPS b. Enable CONFIG_RESOURCE_COUNTERS -c. Enable CONFIG_CGROUP_MEM_RES_CTLR -d. Enable CONFIG_CGROUP_MEM_RES_CTLR_SWAP (to use swap extension) +c. Enable CONFIG_MEMCG +d. Enable CONFIG_MEMCG_SWAP (to use swap extension) 1. Prepare the cgroups (see cgroups.txt, Why are cgroups needed?) # mount -t tmpfs none /sys/fs/cgroup diff --git a/arch/powerpc/configs/chroma_defconfig b/arch/powerpc/configs/chroma_defconfig index b1f9597fe312..29bb11ec6c64 100644 --- a/arch/powerpc/configs/chroma_defconfig +++ b/arch/powerpc/configs/chroma_defconfig @@ -21,8 +21,8 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_NAMESPACES=y CONFIG_RELAY=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/s390/defconfig b/arch/s390/defconfig index 37d2bf267964..de57702a3f44 100644 --- a/arch/s390/defconfig +++ b/arch/s390/defconfig @@ -13,7 +13,7 @@ CONFIG_CGROUPS=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y diff --git a/arch/sh/configs/apsh4ad0a_defconfig b/arch/sh/configs/apsh4ad0a_defconfig index e7583484cc07..95ae23fcfdd6 100644 --- a/arch/sh/configs/apsh4ad0a_defconfig +++ b/arch/sh/configs/apsh4ad0a_defconfig @@ -11,7 +11,7 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_BLK_CGROUP=y CONFIG_NAMESPACES=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/sh/configs/sdk7786_defconfig b/arch/sh/configs/sdk7786_defconfig index 8a7dd7b59c5c..76a76a295d74 100644 --- a/arch/sh/configs/sdk7786_defconfig +++ b/arch/sh/configs/sdk7786_defconfig @@ -18,8 +18,8 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/sh/configs/se7206_defconfig b/arch/sh/configs/se7206_defconfig index 72c3fad7383f..6bc30ab9fd18 100644 --- a/arch/sh/configs/se7206_defconfig +++ b/arch/sh/configs/se7206_defconfig @@ -11,7 +11,7 @@ CONFIG_CGROUP_DEBUG=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y diff --git a/arch/sh/configs/shx3_defconfig b/arch/sh/configs/shx3_defconfig index 6bb413036892..cd6c519f8fad 100644 --- a/arch/sh/configs/shx3_defconfig +++ b/arch/sh/configs/shx3_defconfig @@ -13,7 +13,7 @@ CONFIG_CGROUP_FREEZER=y CONFIG_CGROUP_DEVICE=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y +CONFIG_CGROUP_MEMCG=y CONFIG_RELAY=y CONFIG_NAMESPACES=y CONFIG_UTS_NS=y diff --git a/arch/sh/configs/urquell_defconfig b/arch/sh/configs/urquell_defconfig index 8bfa4d056d7a..d7f89be9f474 100644 --- a/arch/sh/configs/urquell_defconfig +++ b/arch/sh/configs/urquell_defconfig @@ -15,8 +15,8 @@ CONFIG_CPUSETS=y # CONFIG_PROC_PID_CPUSET is not set CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_DEV_INITRD=y diff --git a/arch/tile/configs/tilegx_defconfig b/arch/tile/configs/tilegx_defconfig index b8d99aca5431..0270620a1692 100644 --- a/arch/tile/configs/tilegx_defconfig +++ b/arch/tile/configs/tilegx_defconfig @@ -18,8 +18,8 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/tile/configs/tilepro_defconfig b/arch/tile/configs/tilepro_defconfig index 2b1fd31894f1..c11de27a9bcb 100644 --- a/arch/tile/configs/tilepro_defconfig +++ b/arch/tile/configs/tilepro_defconfig @@ -17,8 +17,8 @@ CONFIG_CGROUP_DEVICE=y CONFIG_CPUSETS=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y CONFIG_CGROUP_SCHED=y CONFIG_RT_GROUP_SCHED=y CONFIG_BLK_CGROUP=y diff --git a/arch/um/defconfig b/arch/um/defconfig index 7823ab12e6a4..fec0d5d27460 100644 --- a/arch/um/defconfig +++ b/arch/um/defconfig @@ -155,10 +155,10 @@ CONFIG_CPUSETS=y CONFIG_PROC_PID_CPUSET=y CONFIG_CGROUP_CPUACCT=y CONFIG_RESOURCE_COUNTERS=y -CONFIG_CGROUP_MEM_RES_CTLR=y -CONFIG_CGROUP_MEM_RES_CTLR_SWAP=y -# CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED is not set -# CONFIG_CGROUP_MEM_RES_CTLR_KMEM is not set +CONFIG_CGROUP_MEMCG=y +CONFIG_CGROUP_MEMCG_SWAP=y +# CONFIG_CGROUP_MEMCG_SWAP_ENABLED is not set +# CONFIG_CGROUP_MEMCG_KMEM is not set CONFIG_CGROUP_SCHED=y CONFIG_FAIR_GROUP_SCHED=y # CONFIG_CFS_BANDWIDTH is not set diff --git a/include/linux/cgroup_subsys.h b/include/linux/cgroup_subsys.h index 5b41ce079024..dfae957398c3 100644 --- a/include/linux/cgroup_subsys.h +++ b/include/linux/cgroup_subsys.h @@ -31,7 +31,7 @@ SUBSYS(cpuacct) /* */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG SUBSYS(mem_cgroup) #endif diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 83e7ba90d6e5..170076222431 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -38,7 +38,7 @@ struct mem_cgroup_reclaim_cookie { unsigned int generation; }; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG /* * All "charge" functions with gfp_mask should use GFP_KERNEL or * (gfp_mask & GFP_RECLAIM_MASK). In current implementatin, memcg doesn't @@ -124,7 +124,7 @@ extern void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, extern void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP extern int do_swap_account; #endif @@ -193,7 +193,7 @@ void mem_cgroup_split_huge_fixup(struct page *head); bool mem_cgroup_bad_page_check(struct page *page); void mem_cgroup_print_bad_page(struct page *page); #endif -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ +#else /* CONFIG_MEMCG */ struct mem_cgroup; static inline int mem_cgroup_newpage_charge(struct page *page, @@ -384,9 +384,9 @@ static inline void mem_cgroup_replace_page_cache(struct page *oldpage, struct page *newpage) { } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR */ +#endif /* CONFIG_MEMCG */ -#if !defined(CONFIG_CGROUP_MEM_RES_CTLR) || !defined(CONFIG_DEBUG_VM) +#if !defined(CONFIG_MEMCG) || !defined(CONFIG_DEBUG_VM) static inline bool mem_cgroup_bad_page_check(struct page *page) { @@ -406,7 +406,7 @@ enum { }; struct sock; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM void sock_update_memcg(struct sock *sk); void sock_release_memcg(struct sock *sk); #else @@ -416,6 +416,6 @@ static inline void sock_update_memcg(struct sock *sk) static inline void sock_release_memcg(struct sock *sk) { } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ +#endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 458988bd55a1..3bdfa15b2012 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -201,7 +201,7 @@ struct zone_reclaim_stat { struct lruvec { struct list_head lists[NR_LRU_LISTS]; struct zone_reclaim_stat reclaim_stat; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG struct zone *zone; #endif }; @@ -671,7 +671,7 @@ typedef struct pglist_data { int nr_zones; #ifdef CONFIG_FLAT_NODE_MEM_MAP /* means !SPARSEMEM */ struct page *node_mem_map; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG struct page_cgroup *node_page_cgroup; #endif #endif @@ -736,7 +736,7 @@ extern void lruvec_init(struct lruvec *lruvec, struct zone *zone); static inline struct zone *lruvec_zone(struct lruvec *lruvec) { -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG return lruvec->zone; #else return container_of(lruvec, struct zone, lruvec); @@ -1052,7 +1052,7 @@ struct mem_section { /* See declaration of similar field in struct zone */ unsigned long *pageblock_flags; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG /* * If !SPARSEMEM, pgdat doesn't have page_cgroup pointer. We use * section. (see memcontrol.h/page_cgroup.h about this.) diff --git a/include/linux/page_cgroup.h b/include/linux/page_cgroup.h index a88cdba27809..777a524716db 100644 --- a/include/linux/page_cgroup.h +++ b/include/linux/page_cgroup.h @@ -12,7 +12,7 @@ enum { #ifndef __GENERATING_BOUNDS_H #include -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG #include /* @@ -82,7 +82,7 @@ static inline void unlock_page_cgroup(struct page_cgroup *pc) bit_spin_unlock(PCG_LOCK, &pc->flags); } -#else /* CONFIG_CGROUP_MEM_RES_CTLR */ +#else /* CONFIG_MEMCG */ struct page_cgroup; static inline void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) @@ -102,11 +102,11 @@ static inline void __init page_cgroup_init_flatmem(void) { } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR */ +#endif /* CONFIG_MEMCG */ #include -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP extern unsigned short swap_cgroup_cmpxchg(swp_entry_t ent, unsigned short old, unsigned short new); extern unsigned short swap_cgroup_record(swp_entry_t ent, unsigned short id); @@ -138,7 +138,7 @@ static inline void swap_cgroup_swapoff(int type) return; } -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_SWAP */ +#endif /* CONFIG_MEMCG_SWAP */ #endif /* !__GENERATING_BOUNDS_H */ diff --git a/include/linux/sched.h b/include/linux/sched.h index 68dcffaa62a0..865725adb9d3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1584,7 +1584,7 @@ struct task_struct { /* bitmask and counter of trace recursion */ unsigned long trace_recursion; #endif /* CONFIG_TRACING */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR /* memcg uses this to do batch job */ +#ifdef CONFIG_MEMCG /* memcg uses this to do batch job */ struct memcg_batch_info { int do_batch; /* incremented when batch uncharge started */ struct mem_cgroup *memcg; /* target memcg of uncharge */ diff --git a/include/linux/swap.h b/include/linux/swap.h index c84ec68eaec9..9a16bb1cefd1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -301,7 +301,7 @@ static inline void scan_unevictable_unregister_node(struct node *node) extern int kswapd_run(int nid); extern void kswapd_stop(int nid); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG extern int mem_cgroup_swappiness(struct mem_cgroup *mem); #else static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) @@ -309,7 +309,7 @@ static inline int mem_cgroup_swappiness(struct mem_cgroup *mem) return vm_swappiness; } #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP extern void mem_cgroup_uncharge_swap(swp_entry_t ent); #else static inline void mem_cgroup_uncharge_swap(swp_entry_t ent) @@ -360,7 +360,7 @@ extern int reuse_swap_page(struct page *); extern int try_to_free_swap(struct page *); struct backing_dev_info; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG extern void mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout); #else diff --git a/include/net/sock.h b/include/net/sock.h index e067f8c18f88..cee528c119ca 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -913,7 +913,7 @@ struct proto { #ifdef SOCK_REFCNT_DEBUG atomic_t socks; #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM /* * cgroup specific init/deinit functions. Called once for all * protocols that implement it, from cgroups populate function. @@ -994,7 +994,7 @@ inline void sk_refcnt_debug_release(const struct sock *sk) #define sk_refcnt_debug_release(sk) do { } while (0) #endif /* SOCK_REFCNT_DEBUG */ -#if defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) && defined(CONFIG_NET) +#if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_NET) extern struct static_key memcg_socket_limit_enabled; static inline struct cg_proto *parent_cg_proto(struct proto *proto, struct cg_proto *cg_proto) diff --git a/init/Kconfig b/init/Kconfig index 72437760e90e..af6c7f8ba019 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -686,7 +686,7 @@ config RESOURCE_COUNTERS This option enables controller independent resource accounting infrastructure that works with cgroups. -config CGROUP_MEM_RES_CTLR +config MEMCG bool "Memory Resource Controller for Control Groups" depends on RESOURCE_COUNTERS select MM_OWNER @@ -709,9 +709,9 @@ config CGROUP_MEM_RES_CTLR This config option also selects MM_OWNER config option, which could in turn add some fork/exit overhead. -config CGROUP_MEM_RES_CTLR_SWAP +config MEMCG_SWAP bool "Memory Resource Controller Swap Extension" - depends on CGROUP_MEM_RES_CTLR && SWAP + depends on MEMCG && SWAP help Add swap management feature to memory resource controller. When you enable this, you can limit mem+swap usage per cgroup. In other words, @@ -726,9 +726,9 @@ config CGROUP_MEM_RES_CTLR_SWAP if boot option "swapaccount=0" is set, swap will not be accounted. Now, memory usage of swap_cgroup is 2 bytes per entry. If swap page size is 4096bytes, 512k per 1Gbytes of swap. -config CGROUP_MEM_RES_CTLR_SWAP_ENABLED +config MEMCG_SWAP_ENABLED bool "Memory Resource Controller Swap Extension enabled by default" - depends on CGROUP_MEM_RES_CTLR_SWAP + depends on MEMCG_SWAP default y help Memory Resource Controller Swap Extension comes with its price in @@ -739,9 +739,9 @@ config CGROUP_MEM_RES_CTLR_SWAP_ENABLED For those who want to have the feature enabled by default should select this option (if, for some reason, they need to disable it then swapaccount=0 does the trick). -config CGROUP_MEM_RES_CTLR_KMEM +config MEMCG_KMEM bool "Memory Resource Controller Kernel Memory accounting (EXPERIMENTAL)" - depends on CGROUP_MEM_RES_CTLR && EXPERIMENTAL + depends on MEMCG && EXPERIMENTAL default n help The Kernel Memory extension for Memory Resource Controller can limit diff --git a/kernel/fork.c b/kernel/fork.c index aaa8813c45d1..3bd2280d79f6 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1306,7 +1306,7 @@ static struct task_struct *copy_process(unsigned long clone_flags, #ifdef CONFIG_DEBUG_MUTEXES p->blocked_on = NULL; /* not blocked yet */ #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG p->memcg_batch.do_batch = 0; p->memcg_batch.memcg = NULL; #endif diff --git a/mm/Makefile b/mm/Makefile index fd6fc1c1966c..290bbfe33698 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_FS_XIP) += filemap_xip.o obj-$(CONFIG_MIGRATION) += migrate.o obj-$(CONFIG_QUICKLIST) += quicklist.o obj-$(CONFIG_TRANSPARENT_HUGEPAGE) += huge_memory.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR) += memcontrol.o page_cgroup.o +obj-$(CONFIG_MEMCG) += memcontrol.o page_cgroup.o obj-$(CONFIG_CGROUP_HUGETLB) += hugetlb_cgroup.o obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o diff --git a/mm/hwpoison-inject.c b/mm/hwpoison-inject.c index cc448bb983ba..3a61efc518d5 100644 --- a/mm/hwpoison-inject.c +++ b/mm/hwpoison-inject.c @@ -123,7 +123,7 @@ static int pfn_inject_init(void) if (!dentry) goto fail; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP dentry = debugfs_create_u64("corrupt-filter-memcg", 0600, hwpoison_dir, &hwpoison_filter_memcg); if (!dentry) diff --git a/mm/memcontrol.c b/mm/memcontrol.c index a2677e0a6387..55a85e1a342f 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -61,12 +61,12 @@ struct cgroup_subsys mem_cgroup_subsys __read_mostly; #define MEM_CGROUP_RECLAIM_RETRIES 5 static struct mem_cgroup *root_mem_cgroup __read_mostly; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP /* Turned on only when memory cgroup is enabled && really_do_swap_account = 1 */ int do_swap_account __read_mostly; /* for remember boot option*/ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP_ENABLED +#ifdef CONFIG_MEMCG_SWAP_ENABLED static int really_do_swap_account __initdata = 1; #else static int really_do_swap_account __initdata = 0; @@ -407,7 +407,7 @@ static void mem_cgroup_get(struct mem_cgroup *memcg); static void mem_cgroup_put(struct mem_cgroup *memcg); /* Writing them here to avoid exposing memcg's inner layout */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM #include #include @@ -466,9 +466,9 @@ struct cg_proto *tcp_proto_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(tcp_proto_cgroup); #endif /* CONFIG_INET */ -#endif /* CONFIG_CGROUP_MEM_RES_CTLR_KMEM */ +#endif /* CONFIG_MEMCG_KMEM */ -#if defined(CONFIG_INET) && defined(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) +#if defined(CONFIG_INET) && defined(CONFIG_MEMCG_KMEM) static void disarm_sock_keys(struct mem_cgroup *memcg) { if (!memcg_proto_activated(&memcg->tcp_mem.cg_proto)) @@ -3085,7 +3085,7 @@ mem_cgroup_uncharge_swapcache(struct page *page, swp_entry_t ent, bool swapout) } #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP /* * called from swap_entry_free(). remove record in swap_cgroup and * uncharge "memsw" account. @@ -4518,7 +4518,7 @@ static int mem_cgroup_oom_control_write(struct cgroup *cgrp, return 0; } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM static int memcg_init_kmem(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { return mem_cgroup_sockets_init(memcg, ss); @@ -4608,7 +4608,7 @@ static struct cftype mem_cgroup_files[] = { .read_seq_string = mem_control_numa_stat_show, }, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP { .name = "memsw.usage_in_bytes", .private = MEMFILE_PRIVATE(_MEMSWAP, RES_USAGE), @@ -4795,7 +4795,7 @@ struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg) } EXPORT_SYMBOL(parent_mem_cgroup); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP static void __init enable_swap_cgroup(void) { if (!mem_cgroup_disabled() && really_do_swap_account) @@ -5526,7 +5526,7 @@ struct cgroup_subsys mem_cgroup_subsys = { .__DEPRECATED_clear_css_refs = true, }; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP static int __init enable_swap_account(char *s) { /* consider enabled if no parameter or 1 is given */ diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b04ff2d6f73d..a6e2141a6610 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -128,7 +128,7 @@ static int hwpoison_filter_flags(struct page *p) * can only guarantee that the page either belongs to the memcg tasks, or is * a freed page. */ -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP u64 hwpoison_filter_memcg; EXPORT_SYMBOL_GPL(hwpoison_filter_memcg); static int hwpoison_filter_task(struct page *p) diff --git a/mm/mmzone.c b/mm/mmzone.c index 6830eab5bf09..3cef80f6ac79 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -96,7 +96,7 @@ void lruvec_init(struct lruvec *lruvec, struct zone *zone) for_each_lru(lru) INIT_LIST_HEAD(&lruvec->lists[lru]); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG lruvec->zone = zone; #endif } diff --git a/mm/oom_kill.c b/mm/oom_kill.c index c82ede69bf3f..e6c10640e56b 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -541,7 +541,7 @@ static void check_panic_on_oom(enum oom_constraint constraint, gfp_t gfp_mask, sysctl_panic_on_oom == 2 ? "compulsory" : "system-wide"); } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, int order) { diff --git a/mm/page_cgroup.c b/mm/page_cgroup.c index eb750f851395..5ddad0c6daa6 100644 --- a/mm/page_cgroup.c +++ b/mm/page_cgroup.c @@ -317,7 +317,7 @@ void __meminit pgdat_page_cgroup_init(struct pglist_data *pgdat) #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_SWAP +#ifdef CONFIG_MEMCG_SWAP static DEFINE_MUTEX(swap_cgroup_mutex); struct swap_cgroup_ctrl { diff --git a/mm/vmscan.c b/mm/vmscan.c index 347b3ff2a478..6b1f89a91212 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -133,7 +133,7 @@ long vm_total_pages; /* The total number of pages which the VM controls */ static LIST_HEAD(shrinker_list); static DECLARE_RWSEM(shrinker_rwsem); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG static bool global_reclaim(struct scan_control *sc) { return !sc->target_mem_cgroup; @@ -2142,7 +2142,7 @@ unsigned long try_to_free_pages(struct zonelist *zonelist, int order, return nr_reclaimed; } -#ifdef CONFIG_CGROUP_MEM_RES_CTLR +#ifdef CONFIG_MEMCG unsigned long mem_cgroup_shrink_node_zone(struct mem_cgroup *memcg, gfp_t gfp_mask, bool noswap, diff --git a/net/core/sock.c b/net/core/sock.c index 2676a88f533e..a67b06280e4c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -142,7 +142,7 @@ static DEFINE_MUTEX(proto_list_mutex); static LIST_HEAD(proto_list); -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM int mem_cgroup_sockets_init(struct mem_cgroup *memcg, struct cgroup_subsys *ss) { struct proto *proto; diff --git a/net/ipv4/Makefile b/net/ipv4/Makefile index ae2ccf2890e4..15ca63ec604e 100644 --- a/net/ipv4/Makefile +++ b/net/ipv4/Makefile @@ -49,7 +49,7 @@ obj-$(CONFIG_TCP_CONG_SCALABLE) += tcp_scalable.o obj-$(CONFIG_TCP_CONG_LP) += tcp_lp.o obj-$(CONFIG_TCP_CONG_YEAH) += tcp_yeah.o obj-$(CONFIG_TCP_CONG_ILLINOIS) += tcp_illinois.o -obj-$(CONFIG_CGROUP_MEM_RES_CTLR_KMEM) += tcp_memcontrol.o +obj-$(CONFIG_MEMCG_KMEM) += tcp_memcontrol.o obj-$(CONFIG_NETLABEL) += cipso_ipv4.o obj-$(CONFIG_XFRM) += xfrm4_policy.o xfrm4_state.o xfrm4_input.o \ diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 5840c3255721..ed7db3f1b6f2 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -184,7 +184,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, int ret; unsigned long vec[3]; struct net *net = current->nsproxy->net_ns; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM struct mem_cgroup *memcg; #endif @@ -203,7 +203,7 @@ static int ipv4_tcp_mem(ctl_table *ctl, int write, if (ret) return ret; -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM rcu_read_lock(); memcg = mem_cgroup_from_task(current); diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2fbd9921253f..4bc8f6769b57 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2633,7 +2633,7 @@ struct proto tcp_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM .init_cgroup = tcp_init_cgroup, .destroy_cgroup = tcp_destroy_cgroup, .proto_cgroup = tcp_proto_cgroup, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 221224e72507..61c7b6d83176 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -2015,7 +2015,7 @@ struct proto tcpv6_prot = { .compat_setsockopt = compat_tcp_setsockopt, .compat_getsockopt = compat_tcp_getsockopt, #endif -#ifdef CONFIG_CGROUP_MEM_RES_CTLR_KMEM +#ifdef CONFIG_MEMCG_KMEM .proto_cgroup = tcp_proto_cgroup, #endif }; -- cgit v1.2.3 From 99a1dec70d5acbd8c6b3928cdebb4a2d1da676c8 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 31 Jul 2012 16:44:14 -0700 Subject: net: introduce sk_gfp_atomic() to allow addition of GFP flags depending on the individual socket Introduce sk_gfp_atomic(), this function allows to inject sock specific flags to each sock related allocation. It is only used on allocation paths that may be required for writing pages back to network storage. [davem@davemloft.net: Use sk_gfp_atomic only when necessary] Signed-off-by: Peter Zijlstra Signed-off-by: Mel Gorman Acked-by: David S. Miller Cc: Neil Brown Cc: Mike Christie Cc: Eric B Munson Cc: Eric Dumazet Cc: Sebastian Andrzej Siewior Cc: Mel Gorman Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/net/sock.h | 5 +++++ net/ipv4/tcp_output.c | 12 +++++++----- net/ipv6/tcp_ipv6.c | 8 +++++--- 3 files changed, 17 insertions(+), 8 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/sock.h b/include/net/sock.h index cee528c119ca..11ccde65c4cf 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -658,6 +658,11 @@ static inline bool sock_flag(const struct sock *sk, enum sock_flags flag) return test_bit(flag, &sk->sk_flags); } +static inline gfp_t sk_gfp_atomic(struct sock *sk, gfp_t gfp_mask) +{ + return GFP_ATOMIC; +} + static inline void sk_acceptq_removed(struct sock *sk) { sk->sk_ack_backlog--; diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index 33cd065cfbd8..3f1bcff0b10b 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -2045,7 +2045,8 @@ void __tcp_push_pending_frames(struct sock *sk, unsigned int cur_mss, if (unlikely(sk->sk_state == TCP_CLOSE)) return; - if (tcp_write_xmit(sk, cur_mss, nonagle, 0, GFP_ATOMIC)) + if (tcp_write_xmit(sk, cur_mss, nonagle, 0, + sk_gfp_atomic(sk, GFP_ATOMIC))) tcp_check_probe_timer(sk); } @@ -2666,7 +2667,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, if (cvp != NULL && cvp->s_data_constant && cvp->s_data_desired) s_data_desired = cvp->s_data_desired; - skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER + 15 + s_data_desired, + sk_gfp_atomic(sk, GFP_ATOMIC)); if (unlikely(!skb)) { dst_release(dst); return NULL; @@ -3064,7 +3066,7 @@ void tcp_send_ack(struct sock *sk) * tcp_transmit_skb() will set the ownership to this * sock. */ - buff = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); + buff = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); if (buff == NULL) { inet_csk_schedule_ack(sk); inet_csk(sk)->icsk_ack.ato = TCP_ATO_MIN; @@ -3079,7 +3081,7 @@ void tcp_send_ack(struct sock *sk) /* Send it off, this clears delayed acks for us. */ TCP_SKB_CB(buff)->when = tcp_time_stamp; - tcp_transmit_skb(sk, buff, 0, GFP_ATOMIC); + tcp_transmit_skb(sk, buff, 0, sk_gfp_atomic(sk, GFP_ATOMIC)); } /* This routine sends a packet with an out of date sequence @@ -3099,7 +3101,7 @@ static int tcp_xmit_probe_skb(struct sock *sk, int urgent) struct sk_buff *skb; /* We don't queue it, tcp_transmit_skb() sets ownership. */ - skb = alloc_skb(MAX_TCP_HEADER, GFP_ATOMIC); + skb = alloc_skb(MAX_TCP_HEADER, sk_gfp_atomic(sk, GFP_ATOMIC)); if (skb == NULL) return -1; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 61c7b6d83176..c66b90f71c9b 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1299,7 +1299,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, /* Clone pktoptions received with SYN */ newnp->pktoptions = NULL; if (treq->pktopts != NULL) { - newnp->pktoptions = skb_clone(treq->pktopts, GFP_ATOMIC); + newnp->pktoptions = skb_clone(treq->pktopts, + sk_gfp_atomic(sk, GFP_ATOMIC)); consume_skb(treq->pktopts); treq->pktopts = NULL; if (newnp->pktoptions) @@ -1349,7 +1350,8 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, * across. Shucks. */ tcp_md5_do_add(newsk, (union tcp_md5_addr *)&newnp->daddr, - AF_INET6, key->key, key->keylen, GFP_ATOMIC); + AF_INET6, key->key, key->keylen, + sk_gfp_atomic(sk, GFP_ATOMIC)); } #endif @@ -1442,7 +1444,7 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) --ANK (980728) */ if (np->rxopt.all) - opt_skb = skb_clone(skb, GFP_ATOMIC); + opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ sock_rps_save_rxhash(sk, skb); -- cgit v1.2.3 From 5d299f3d3c8a2fbc732b1bf03af36333ccec3130 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Aug 2012 05:09:33 +0000 Subject: net: ipv6: fix TCP early demux IPv6 needs a cookie in dst_check() call. We need to add rx_dst_cookie and provide a family independent sk_rx_dst_set(sk, skb) method to properly support IPv6 TCP early demux. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/net/inet_connection_sock.h | 1 + include/net/inet_sock.h | 9 --------- net/ipv4/tcp_input.c | 4 +++- net/ipv4/tcp_ipv4.c | 13 ++++++++++--- net/ipv4/tcp_minisocks.c | 2 +- net/ipv6/tcp_ipv6.c | 27 +++++++++++++++++++++++++-- 7 files changed, 41 insertions(+), 16 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 379e433e15e0..879db26ec401 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -369,6 +369,7 @@ struct ipv6_pinfo { __u8 rcv_tclass; __u32 dst_cookie; + __u32 rx_dst_cookie; struct ipv6_mc_socklist __rcu *ipv6_mc_list; struct ipv6_ac_socklist *ipv6_ac_list; diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h index 5ee66f517b4f..ba1d3615acbb 100644 --- a/include/net/inet_connection_sock.h +++ b/include/net/inet_connection_sock.h @@ -39,6 +39,7 @@ struct inet_connection_sock_af_ops { int (*queue_xmit)(struct sk_buff *skb, struct flowi *fl); void (*send_check)(struct sock *sk, struct sk_buff *skb); int (*rebuild_header)(struct sock *sk); + void (*sk_rx_dst_set)(struct sock *sk, const struct sk_buff *skb); int (*conn_request)(struct sock *sk, struct sk_buff *skb); struct sock *(*syn_recv_sock)(struct sock *sk, struct sk_buff *skb, struct request_sock *req, diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 83b567fe1941..613cfa401672 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -249,13 +249,4 @@ static inline __u8 inet_sk_flowi_flags(const struct sock *sk) return flags; } -static inline void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - - dst_hold(dst); - sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; -} - #endif /* _INET_SOCK_H */ diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index 2fd2bc9e3c64..85308b90df80 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5392,6 +5392,8 @@ int tcp_rcv_established(struct sock *sk, struct sk_buff *skb, { struct tcp_sock *tp = tcp_sk(sk); + if (unlikely(sk->sk_rx_dst == NULL)) + inet_csk(sk)->icsk_af_ops->sk_rx_dst_set(sk, skb); /* * Header prediction. * The code loosely follows the one in the famous @@ -5605,7 +5607,7 @@ void tcp_finish_connect(struct sock *sk, struct sk_buff *skb) tcp_set_state(sk, TCP_ESTABLISHED); if (skb != NULL) { - inet_sk_rx_dst_set(sk, skb); + icsk->icsk_af_ops->sk_rx_dst_set(sk, skb); security_inet_conn_established(sk, skb); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 42b2a6a73092..272241f16fcb 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1627,9 +1627,6 @@ int tcp_v4_do_rcv(struct sock *sk, struct sk_buff *skb) sk->sk_rx_dst = NULL; } } - if (unlikely(sk->sk_rx_dst == NULL)) - inet_sk_rx_dst_set(sk, skb); - if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) { rsk = sk; goto reset; @@ -1872,10 +1869,20 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; +static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; +} + const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v4_conn_request, .syn_recv_sock = tcp_v4_syn_recv_sock, .net_header_len = sizeof(struct iphdr), diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 232a90c3ec86..d9c9dcef2de3 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -387,7 +387,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - inet_sk_rx_dst_set(newsk, skb); + newicsk->icsk_af_ops->sk_rx_dst_set(newsk, skb); /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c66b90f71c9b..5a439e9a4c01 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1447,7 +1447,17 @@ static int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb) opt_skb = skb_clone(skb, sk_gfp_atomic(sk, GFP_ATOMIC)); if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */ + struct dst_entry *dst = sk->sk_rx_dst; + sock_rps_save_rxhash(sk, skb); + if (dst) { + if (inet_sk(sk)->rx_dst_ifindex != skb->skb_iif || + dst->ops->check(dst, np->rx_dst_cookie) == NULL) { + dst_release(dst); + sk->sk_rx_dst = NULL; + } + } + if (tcp_rcv_established(sk, skb, tcp_hdr(skb), skb->len)) goto reset; if (opt_skb) @@ -1705,9 +1715,9 @@ static void tcp_v6_early_demux(struct sk_buff *skb) struct dst_entry *dst = sk->sk_rx_dst; struct inet_sock *icsk = inet_sk(sk); if (dst) - dst = dst_check(dst, 0); + dst = dst_check(dst, inet6_sk(sk)->rx_dst_cookie); if (dst && - icsk->rx_dst_ifindex == inet6_iif(skb)) + icsk->rx_dst_ifindex == skb->skb_iif) skb_dst_set_noref(skb, dst); } } @@ -1719,10 +1729,23 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; +static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt = (const struct rt6_info *)dst; + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + if (rt->rt6i_node) + inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; +} + static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, .rebuild_header = inet6_sk_rebuild_header, + .sk_rx_dst_set = inet6_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct ipv6hdr), -- cgit v1.2.3 From a399a8053164ec8bcb06fed52be9941a26ecde11 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Aug 2012 21:13:53 +0000 Subject: time: jiffies_delta_to_clock_t() helper to the rescue MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various /proc/net files sometimes report crazy timer values, expressed in clock_t units. This happens when an expired timer delta (expires - jiffies) is passed to jiffies_to_clock_t(). This function has an overflow in : return div_u64((u64)x * TICK_NSEC, NSEC_PER_SEC / USER_HZ); commit cbbc719fccdb8cb (time: Change jiffies_to_clock_t() argument type to unsigned long) only got around the problem. As we cant output negative values in /proc/net/tcp without breaking various tools, I suggest adding a jiffies_delta_to_clock_t() wrapper that caps the negative delta to a 0 value. Signed-off-by: Eric Dumazet Reported-by: Maciej Żenczykowski Cc: Thomas Gleixner Cc: Paul Gortmaker Cc: Andrew Morton Cc: hank Signed-off-by: David S. Miller --- include/linux/jiffies.h | 6 ++++++ net/bridge/br_fdb.c | 2 +- net/bridge/br_stp_timer.c | 2 +- net/core/rtnetlink.c | 2 +- net/ipv4/igmp.c | 7 +++++-- net/ipv4/tcp_ipv4.c | 13 +++++-------- net/ipv6/tcp_ipv6.c | 9 +++------ 7 files changed, 22 insertions(+), 19 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 265e2c3cbd1c..aded9b1ac5ca 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -303,7 +303,13 @@ extern void jiffies_to_timespec(const unsigned long jiffies, extern unsigned long timeval_to_jiffies(const struct timeval *value); extern void jiffies_to_timeval(const unsigned long jiffies, struct timeval *value); + extern clock_t jiffies_to_clock_t(unsigned long x); +static inline clock_t jiffies_delta_to_clock_t(long delta) +{ + return jiffies_to_clock_t(max(0L, delta)); +} + extern unsigned long clock_t_to_jiffies(unsigned long x); extern u64 jiffies_64_to_clock_t(u64 x); extern u64 nsec_to_clock_t(u64 x); diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index d21f32383517..9ce430b4657c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -312,7 +312,7 @@ int br_fdb_fillbuf(struct net_bridge *br, void *buf, fe->is_local = f->is_local; if (!f->is_static) - fe->ageing_timer_value = jiffies_to_clock_t(jiffies - f->updated); + fe->ageing_timer_value = jiffies_delta_to_clock_t(jiffies - f->updated); ++fe; ++num; } diff --git a/net/bridge/br_stp_timer.c b/net/bridge/br_stp_timer.c index a6747e673426..c3530a81a33b 100644 --- a/net/bridge/br_stp_timer.c +++ b/net/bridge/br_stp_timer.c @@ -170,5 +170,5 @@ void br_stp_port_timer_init(struct net_bridge_port *p) unsigned long br_timer_value(const struct timer_list *timer) { return timer_pending(timer) - ? jiffies_to_clock_t(timer->expires - jiffies) : 0; + ? jiffies_delta_to_clock_t(timer->expires - jiffies) : 0; } diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 2c5a0a06c4ce..db037c9a4c48 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -618,7 +618,7 @@ int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error) { struct rta_cacheinfo ci = { - .rta_lastuse = jiffies_to_clock_t(jiffies - dst->lastuse), + .rta_lastuse = jiffies_delta_to_clock_t(jiffies - dst->lastuse), .rta_used = dst->__use, .rta_clntref = atomic_read(&(dst->__refcnt)), .rta_error = error, diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 6699f23e6f55..0b5580c69f2d 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -2435,6 +2435,8 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) struct ip_mc_list *im = (struct ip_mc_list *)v; struct igmp_mc_iter_state *state = igmp_mc_seq_private(seq); char *querier; + long delta; + #ifdef CONFIG_IP_MULTICAST querier = IGMP_V1_SEEN(state->in_dev) ? "V1" : IGMP_V2_SEEN(state->in_dev) ? "V2" : @@ -2448,11 +2450,12 @@ static int igmp_mc_seq_show(struct seq_file *seq, void *v) state->dev->ifindex, state->dev->name, state->in_dev->mc_count, querier); } + delta = im->timer.expires - jiffies; seq_printf(seq, "\t\t\t\t%08X %5d %d:%08lX\t\t%d\n", im->multiaddr, im->users, - im->tm_running, im->tm_running ? - jiffies_to_clock_t(im->timer.expires-jiffies) : 0, + im->tm_running, + im->tm_running ? jiffies_delta_to_clock_t(delta) : 0, im->reporter); } return 0; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 42b2a6a73092..c660d2c19a2b 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2385,7 +2385,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, struct seq_file *f, int i, int uid, int *len) { const struct inet_request_sock *ireq = inet_rsk(req); - int ttd = req->expires - jiffies; + long delta = req->expires - jiffies; seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %u %d %pK%n", @@ -2397,7 +2397,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, TCP_SYN_RECV, 0, 0, /* could print option size, but that is af dependent. */ 1, /* timers active (only the expire timer) */ - jiffies_to_clock_t(ttd), + jiffies_delta_to_clock_t(delta), req->retrans, uid, 0, /* non standard timer */ @@ -2448,7 +2448,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) tp->write_seq - tp->snd_una, rx_queue, timer_active, - jiffies_to_clock_t(timer_expires - jiffies), + jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sk), icsk->icsk_probes_out, @@ -2467,10 +2467,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, { __be32 dest, src; __u16 destp, srcp; - int ttd = tw->tw_ttd - jiffies; - - if (ttd < 0) - ttd = 0; + long delta = tw->tw_ttd - jiffies; dest = tw->tw_daddr; src = tw->tw_rcv_saddr; @@ -2480,7 +2477,7 @@ static void get_timewait4_sock(const struct inet_timewait_sock *tw, seq_printf(f, "%4d: %08X:%04X %08X:%04X" " %02X %08X:%08X %02X:%08lX %08X %5d %8d %d %d %pK%n", i, src, srcp, dest, destp, tw->tw_substate, 0, 0, - 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, atomic_read(&tw->tw_refcnt), tw, len); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c66b90f71c9b..aa41b0e6b163 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1875,7 +1875,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) tp->write_seq-tp->snd_una, (sp->sk_state == TCP_LISTEN) ? sp->sk_ack_backlog : (tp->rcv_nxt - tp->copied_seq), timer_active, - jiffies_to_clock_t(timer_expires - jiffies), + jiffies_delta_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, sock_i_uid(sp), icsk->icsk_probes_out, @@ -1895,10 +1895,7 @@ static void get_timewait6_sock(struct seq_file *seq, const struct in6_addr *dest, *src; __u16 destp, srcp; const struct inet6_timewait_sock *tw6 = inet6_twsk((struct sock *)tw); - int ttd = tw->tw_ttd - jiffies; - - if (ttd < 0) - ttd = 0; + long delta = tw->tw_ttd - jiffies; dest = &tw6->tw_v6_daddr; src = &tw6->tw_v6_rcv_saddr; @@ -1914,7 +1911,7 @@ static void get_timewait6_sock(struct seq_file *seq, dest->s6_addr32[0], dest->s6_addr32[1], dest->s6_addr32[2], dest->s6_addr32[3], destp, tw->tw_substate, 0, 0, - 3, jiffies_to_clock_t(ttd), 0, 0, 0, 0, + 3, jiffies_delta_to_clock_t(delta), 0, 0, 0, 0, atomic_read(&tw->tw_refcnt), tw); } -- cgit v1.2.3 From 1fb9489bf190ce2b3fc03891f3de4b2d30600e28 Mon Sep 17 00:00:00 2001 From: Pavel Emelyanov Date: Wed, 8 Aug 2012 21:53:36 +0000 Subject: net: Loopback ifindex is constant now As pointed out, there are places, that access net->loopback_dev->ifindex and after ifindex generation is made per-net this value becomes constant equals 1. So go ahead and introduce the LOOPBACK_IFINDEX constant and use it where appropriate. Signed-off-by: Pavel Emelyanov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- drivers/net/loopback.c | 1 + include/net/net_namespace.h | 7 +++++++ net/decnet/dn_route.c | 6 +++--- net/ipv4/fib_frontend.c | 2 +- net/ipv4/ipmr.c | 2 +- net/ipv4/netfilter/ipt_rpfilter.c | 2 +- net/ipv4/route.c | 6 +++--- net/ipv6/route.c | 2 +- 8 files changed, 18 insertions(+), 10 deletions(-) (limited to 'net/ipv6') diff --git a/drivers/net/loopback.c b/drivers/net/loopback.c index e2a06fd996d5..4a075babe193 100644 --- a/drivers/net/loopback.c +++ b/drivers/net/loopback.c @@ -197,6 +197,7 @@ static __net_init int loopback_net_init(struct net *net) if (err) goto out_free_netdev; + BUG_ON(dev->ifindex != LOOPBACK_IFINDEX); net->loopback_dev = dev; return 0; diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 6dc3db3466bf..97e441945c13 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -105,6 +105,13 @@ struct net { struct sock *diag_nlsk; }; +/* + * ifindex generation is per-net namespace, and loopback is + * always the 1st device in ns (see net_dev_init), thus any + * loopback device should get ifindex 1 + */ + +#define LOOPBACK_IFINDEX 1 #include diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index 85a3604c87c8..c855e8d0738f 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -961,7 +961,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o .saddr = oldflp->saddr, .flowidn_scope = RT_SCOPE_UNIVERSE, .flowidn_mark = oldflp->flowidn_mark, - .flowidn_iif = init_net.loopback_dev->ifindex, + .flowidn_iif = LOOPBACK_IFINDEX, .flowidn_oif = oldflp->flowidn_oif, }; struct dn_route *rt = NULL; @@ -979,7 +979,7 @@ static int dn_route_output_slow(struct dst_entry **pprt, const struct flowidn *o "dn_route_output_slow: dst=%04x src=%04x mark=%d" " iif=%d oif=%d\n", le16_to_cpu(oldflp->daddr), le16_to_cpu(oldflp->saddr), - oldflp->flowidn_mark, init_net.loopback_dev->ifindex, + oldflp->flowidn_mark, LOOPBACK_IFINDEX, oldflp->flowidn_oif); /* If we have an output interface, verify its a DECnet device */ @@ -1042,7 +1042,7 @@ source_ok: if (!fld.daddr) goto out; } - fld.flowidn_oif = init_net.loopback_dev->ifindex; + fld.flowidn_oif = LOOPBACK_IFINDEX; res.type = RTN_LOCAL; goto make_route; } diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index c43ae3fba792..7f073a38c87d 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -218,7 +218,7 @@ __be32 fib_compute_spec_dst(struct sk_buff *skb) scope = RT_SCOPE_UNIVERSE; if (!ipv4_is_zeronet(ip_hdr(skb)->saddr)) { fl4.flowi4_oif = 0; - fl4.flowi4_iif = net->loopback_dev->ifindex; + fl4.flowi4_iif = LOOPBACK_IFINDEX; fl4.daddr = ip_hdr(skb)->saddr; fl4.saddr = 0; fl4.flowi4_tos = RT_TOS(ip_hdr(skb)->tos); diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8eec8f4a0536..3a57570c8ee5 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -1798,7 +1798,7 @@ static struct mr_table *ipmr_rt_fib_lookup(struct net *net, struct sk_buff *skb) .flowi4_oif = (rt_is_output_route(rt) ? skb->dev->ifindex : 0), .flowi4_iif = (rt_is_output_route(rt) ? - net->loopback_dev->ifindex : + LOOPBACK_IFINDEX : skb->dev->ifindex), .flowi4_mark = skb->mark, }; diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 31371be8174b..c30130062cd6 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -85,7 +85,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) return ipv4_is_local_multicast(iph->daddr) ^ invert; flow.flowi4_iif = 0; } else { - flow.flowi4_iif = dev_net(par->in)->loopback_dev->ifindex; + flow.flowi4_iif = LOOPBACK_IFINDEX; } flow.daddr = iph->saddr; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 21ad369014c0..c58137391a3d 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1619,7 +1619,7 @@ static int ip_route_input_slow(struct sk_buff *skb, __be32 daddr, __be32 saddr, if (res.type == RTN_LOCAL) { err = fib_validate_source(skb, saddr, daddr, tos, - net->loopback_dev->ifindex, + LOOPBACK_IFINDEX, dev, in_dev, &itag); if (err < 0) goto martian_source_keep_err; @@ -1895,7 +1895,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) orig_oif = fl4->flowi4_oif; - fl4->flowi4_iif = net->loopback_dev->ifindex; + fl4->flowi4_iif = LOOPBACK_IFINDEX; fl4->flowi4_tos = tos & IPTOS_RT_MASK; fl4->flowi4_scope = ((tos & RTO_ONLINK) ? RT_SCOPE_LINK : RT_SCOPE_UNIVERSE); @@ -1984,7 +1984,7 @@ struct rtable *__ip_route_output_key(struct net *net, struct flowi4 *fl4) if (!fl4->daddr) fl4->daddr = fl4->saddr = htonl(INADDR_LOOPBACK); dev_out = net->loopback_dev; - fl4->flowi4_oif = net->loopback_dev->ifindex; + fl4->flowi4_oif = LOOPBACK_IFINDEX; res.type = RTN_LOCAL; flags |= RTCF_LOCAL; goto make_route; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8e80fd279100..0ddf2d132e7f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -965,7 +965,7 @@ struct dst_entry * ip6_route_output(struct net *net, const struct sock *sk, { int flags = 0; - fl6->flowi6_iif = net->loopback_dev->ifindex; + fl6->flowi6_iif = LOOPBACK_IFINDEX; if ((sk && sk->sk_bound_dev_if) || rt6_need_strict(&fl6->daddr)) flags |= RT6_LOOKUP_F_IFACE; -- cgit v1.2.3 From 63d02d157ec4124990258d66517b6c11fd6df0cf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 9 Aug 2012 14:11:00 +0000 Subject: net: tcp: ipv6_mapped needs sk_rx_dst_set method commit 5d299f3d3c8a2fb (net: ipv6: fix TCP early demux) added a regression for ipv6_mapped case. [ 67.422369] SELinux: initialized (dev autofs, type autofs), uses genfs_contexts [ 67.449678] SELinux: initialized (dev autofs, type autofs), uses genfs_contexts [ 92.631060] BUG: unable to handle kernel NULL pointer dereference at (null) [ 92.631435] IP: [< (null)>] (null) [ 92.631645] PGD 0 [ 92.631846] Oops: 0010 [#1] SMP [ 92.632095] Modules linked in: autofs4 sunrpc ipv6 dm_mirror dm_region_hash dm_log dm_multipath dm_mod video sbs sbshc battery ac lp parport sg snd_hda_intel snd_hda_codec snd_seq_oss snd_seq_midi_event snd_seq snd_seq_device pcspkr snd_pcm_oss snd_mixer_oss snd_pcm snd_timer serio_raw button floppy snd i2c_i801 i2c_core soundcore snd_page_alloc shpchp ide_cd_mod cdrom microcode ehci_hcd ohci_hcd uhci_hcd [ 92.634294] CPU 0 [ 92.634294] Pid: 4469, comm: sendmail Not tainted 3.6.0-rc1 #3 [ 92.634294] RIP: 0010:[<0000000000000000>] [< (null)>] (null) [ 92.634294] RSP: 0018:ffff880245fc7cb0 EFLAGS: 00010282 [ 92.634294] RAX: ffffffffa01985f0 RBX: ffff88024827ad00 RCX: 0000000000000000 [ 92.634294] RDX: 0000000000000218 RSI: ffff880254735380 RDI: ffff88024827ad00 [ 92.634294] RBP: ffff880245fc7cc8 R08: 0000000000000001 R09: 0000000000000000 [ 92.634294] R10: 0000000000000000 R11: ffff880245fc7bf8 R12: ffff880254735380 [ 92.634294] R13: ffff880254735380 R14: 0000000000000000 R15: 7fffffffffff0218 [ 92.634294] FS: 00007f4516ccd6f0(0000) GS:ffff880256600000(0000) knlGS:0000000000000000 [ 92.634294] CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b [ 92.634294] CR2: 0000000000000000 CR3: 0000000245ed1000 CR4: 00000000000007f0 [ 92.634294] DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 [ 92.634294] DR3: 0000000000000000 DR6: 00000000ffff0ff0 DR7: 0000000000000400 [ 92.634294] Process sendmail (pid: 4469, threadinfo ffff880245fc6000, task ffff880254b8cac0) [ 92.634294] Stack: [ 92.634294] ffffffff813837a7 ffff88024827ad00 ffff880254b6b0e8 ffff880245fc7d68 [ 92.634294] ffffffff81385083 00000000001d2680 ffff8802547353a8 ffff880245fc7d18 [ 92.634294] ffffffff8105903a ffff88024827ad60 0000000000000002 00000000000000ff [ 92.634294] Call Trace: [ 92.634294] [] ? tcp_finish_connect+0x2c/0xfa [ 92.634294] [] tcp_rcv_state_process+0x2b6/0x9c6 [ 92.634294] [] ? sched_clock_cpu+0xc3/0xd1 [ 92.634294] [] ? local_clock+0x2b/0x3c [ 92.634294] [] tcp_v4_do_rcv+0x63a/0x670 [ 92.634294] [] release_sock+0x128/0x1bd [ 92.634294] [] __inet_stream_connect+0x1b1/0x352 [ 92.634294] [] ? lock_sock_nested+0x74/0x7f [ 92.634294] [] ? wake_up_bit+0x25/0x25 [ 92.634294] [] ? lock_sock_nested+0x74/0x7f [ 92.634294] [] ? inet_stream_connect+0x22/0x4b [ 92.634294] [] inet_stream_connect+0x33/0x4b [ 92.634294] [] sys_connect+0x78/0x9e [ 92.634294] [] ? sysret_check+0x1b/0x56 [ 92.634294] [] ? __audit_syscall_entry+0x195/0x1c8 [ 92.634294] [] ? trace_hardirqs_on_thunk+0x3a/0x3f [ 92.634294] [] system_call_fastpath+0x16/0x1b [ 92.634294] Code: Bad RIP value. [ 92.634294] RIP [< (null)>] (null) [ 92.634294] RSP [ 92.634294] CR2: 0000000000000000 [ 92.648982] ---[ end trace 24e2bed94314c8d9 ]--- [ 92.649146] Kernel panic - not syncing: Fatal exception in interrupt Fix this using inet_sk_rx_dst_set(), and export this function in case IPv6 is modular. Reported-by: Andrew Morton Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_ipv4.c | 3 ++- net/ipv6/tcp_ipv6.c | 1 + 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/include/net/tcp.h b/include/net/tcp.h index e19124b84cd2..1f000ffe7075 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -464,6 +464,7 @@ extern int tcp_disconnect(struct sock *sk, int flags); void tcp_connect_init(struct sock *sk); void tcp_finish_connect(struct sock *sk, struct sk_buff *skb); int tcp_send_rcvq(struct sock *sk, struct msghdr *msg, size_t size); +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb); /* From syncookies.c */ extern __u32 syncookie_secret[2][16-4+SHA_DIGEST_WORDS]; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 272241f16fcb..767823764016 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1869,7 +1869,7 @@ static struct timewait_sock_ops tcp_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; -static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) { struct dst_entry *dst = skb_dst(skb); @@ -1877,6 +1877,7 @@ static void inet_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) sk->sk_rx_dst = dst; inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; } +EXPORT_SYMBOL(inet_sk_rx_dst_set); const struct inet_connection_sock_af_ops ipv4_specific = { .queue_xmit = ip_queue_xmit, diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5a439e9a4c01..bb9ce2b2f377 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1777,6 +1777,7 @@ static const struct inet_connection_sock_af_ops ipv6_mapped = { .queue_xmit = ip_queue_xmit, .send_check = tcp_v4_send_check, .rebuild_header = inet_sk_rebuild_header, + .sk_rx_dst_set = inet_sk_rx_dst_set, .conn_request = tcp_v6_conn_request, .syn_recv_sock = tcp_v6_syn_recv_sock, .net_header_len = sizeof(struct iphdr), -- cgit v1.2.3 From 19e303d67dc2e68a7f14b0baf7949195d7327145 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sat, 28 Jul 2012 14:45:50 +0000 Subject: netfilter: PTR_RET can be used This quiets the coccinelle warnings: net/bridge/netfilter/ebtable_filter.c:107:1-3: WARNING: PTR_RET can be used net/bridge/netfilter/ebtable_nat.c:107:1-3: WARNING: PTR_RET can be used net/ipv6/netfilter/ip6table_filter.c:65:1-3: WARNING: PTR_RET can be used net/ipv6/netfilter/ip6table_mangle.c:100:1-3: WARNING: PTR_RET can be used net/ipv6/netfilter/ip6table_raw.c:44:1-3: WARNING: PTR_RET can be used net/ipv6/netfilter/ip6table_security.c:62:1-3: WARNING: PTR_RET can be used net/ipv4/netfilter/iptable_filter.c:72:1-3: WARNING: PTR_RET can be used net/ipv4/netfilter/iptable_mangle.c:107:1-3: WARNING: PTR_RET can be used net/ipv4/netfilter/iptable_raw.c:51:1-3: WARNING: PTR_RET can be used net/ipv4/netfilter/iptable_security.c:70:1-3: WARNING: PTR_RET can be used Signed-off-by: Fengguang Wu Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtable_filter.c | 4 +--- net/bridge/netfilter/ebtable_nat.c | 4 +--- net/ipv4/netfilter/iptable_filter.c | 4 +--- net/ipv4/netfilter/iptable_mangle.c | 4 +--- net/ipv4/netfilter/iptable_raw.c | 4 +--- net/ipv4/netfilter/iptable_security.c | 5 +---- net/ipv6/netfilter/ip6table_filter.c | 4 +--- net/ipv6/netfilter/ip6table_mangle.c | 4 +--- net/ipv6/netfilter/ip6table_raw.c | 4 +--- net/ipv6/netfilter/ip6table_security.c | 5 +---- 10 files changed, 10 insertions(+), 32 deletions(-) (limited to 'net/ipv6') diff --git a/net/bridge/netfilter/ebtable_filter.c b/net/bridge/netfilter/ebtable_filter.c index 42e6bd094574..3c2e9dced9e0 100644 --- a/net/bridge/netfilter/ebtable_filter.c +++ b/net/bridge/netfilter/ebtable_filter.c @@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_filter[] __read_mostly = { static int __net_init frame_filter_net_init(struct net *net) { net->xt.frame_filter = ebt_register_table(net, &frame_filter); - if (IS_ERR(net->xt.frame_filter)) - return PTR_ERR(net->xt.frame_filter); - return 0; + return PTR_RET(net->xt.frame_filter); } static void __net_exit frame_filter_net_exit(struct net *net) diff --git a/net/bridge/netfilter/ebtable_nat.c b/net/bridge/netfilter/ebtable_nat.c index 6dc2f878ae05..10871bc77908 100644 --- a/net/bridge/netfilter/ebtable_nat.c +++ b/net/bridge/netfilter/ebtable_nat.c @@ -100,9 +100,7 @@ static struct nf_hook_ops ebt_ops_nat[] __read_mostly = { static int __net_init frame_nat_net_init(struct net *net) { net->xt.frame_nat = ebt_register_table(net, &frame_nat); - if (IS_ERR(net->xt.frame_nat)) - return PTR_ERR(net->xt.frame_nat); - return 0; + return PTR_RET(net->xt.frame_nat); } static void __net_exit frame_nat_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_filter.c b/net/ipv4/netfilter/iptable_filter.c index 851acec852d2..d20cc374025c 100644 --- a/net/ipv4/netfilter/iptable_filter.c +++ b/net/ipv4/netfilter/iptable_filter.c @@ -69,9 +69,7 @@ static int __net_init iptable_filter_net_init(struct net *net) net->ipv4.iptable_filter = ipt_register_table(net, &packet_filter, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_filter)) - return PTR_ERR(net->ipv4.iptable_filter); - return 0; + return PTR_RET(net->ipv4.iptable_filter); } static void __net_exit iptable_filter_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_mangle.c b/net/ipv4/netfilter/iptable_mangle.c index aef5d1fbe77d..f38b94288cf5 100644 --- a/net/ipv4/netfilter/iptable_mangle.c +++ b/net/ipv4/netfilter/iptable_mangle.c @@ -104,9 +104,7 @@ static int __net_init iptable_mangle_net_init(struct net *net) net->ipv4.iptable_mangle = ipt_register_table(net, &packet_mangler, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_mangle)) - return PTR_ERR(net->ipv4.iptable_mangle); - return 0; + return PTR_RET(net->ipv4.iptable_mangle); } static void __net_exit iptable_mangle_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_raw.c b/net/ipv4/netfilter/iptable_raw.c index 07fb710cd722..b21e2191e2f5 100644 --- a/net/ipv4/netfilter/iptable_raw.c +++ b/net/ipv4/netfilter/iptable_raw.c @@ -48,9 +48,7 @@ static int __net_init iptable_raw_net_init(struct net *net) net->ipv4.iptable_raw = ipt_register_table(net, &packet_raw, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_raw)) - return PTR_ERR(net->ipv4.iptable_raw); - return 0; + return PTR_RET(net->ipv4.iptable_raw); } static void __net_exit iptable_raw_net_exit(struct net *net) diff --git a/net/ipv4/netfilter/iptable_security.c b/net/ipv4/netfilter/iptable_security.c index be45bdc4c602..b283d8e2601a 100644 --- a/net/ipv4/netfilter/iptable_security.c +++ b/net/ipv4/netfilter/iptable_security.c @@ -66,10 +66,7 @@ static int __net_init iptable_security_net_init(struct net *net) net->ipv4.iptable_security = ipt_register_table(net, &security_table, repl); kfree(repl); - if (IS_ERR(net->ipv4.iptable_security)) - return PTR_ERR(net->ipv4.iptable_security); - - return 0; + return PTR_RET(net->ipv4.iptable_security); } static void __net_exit iptable_security_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_filter.c b/net/ipv6/netfilter/ip6table_filter.c index 325e59a0224f..beb5777d2043 100644 --- a/net/ipv6/netfilter/ip6table_filter.c +++ b/net/ipv6/netfilter/ip6table_filter.c @@ -61,9 +61,7 @@ static int __net_init ip6table_filter_net_init(struct net *net) net->ipv6.ip6table_filter = ip6t_register_table(net, &packet_filter, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_filter)) - return PTR_ERR(net->ipv6.ip6table_filter); - return 0; + return PTR_RET(net->ipv6.ip6table_filter); } static void __net_exit ip6table_filter_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_mangle.c b/net/ipv6/netfilter/ip6table_mangle.c index 4d782405f125..7431121b87de 100644 --- a/net/ipv6/netfilter/ip6table_mangle.c +++ b/net/ipv6/netfilter/ip6table_mangle.c @@ -97,9 +97,7 @@ static int __net_init ip6table_mangle_net_init(struct net *net) net->ipv6.ip6table_mangle = ip6t_register_table(net, &packet_mangler, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_mangle)) - return PTR_ERR(net->ipv6.ip6table_mangle); - return 0; + return PTR_RET(net->ipv6.ip6table_mangle); } static void __net_exit ip6table_mangle_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_raw.c b/net/ipv6/netfilter/ip6table_raw.c index 5b9926a011bd..60d1bddff7a0 100644 --- a/net/ipv6/netfilter/ip6table_raw.c +++ b/net/ipv6/netfilter/ip6table_raw.c @@ -40,9 +40,7 @@ static int __net_init ip6table_raw_net_init(struct net *net) net->ipv6.ip6table_raw = ip6t_register_table(net, &packet_raw, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_raw)) - return PTR_ERR(net->ipv6.ip6table_raw); - return 0; + return PTR_RET(net->ipv6.ip6table_raw); } static void __net_exit ip6table_raw_net_exit(struct net *net) diff --git a/net/ipv6/netfilter/ip6table_security.c b/net/ipv6/netfilter/ip6table_security.c index 91aa2b4d83c9..db155351339c 100644 --- a/net/ipv6/netfilter/ip6table_security.c +++ b/net/ipv6/netfilter/ip6table_security.c @@ -58,10 +58,7 @@ static int __net_init ip6table_security_net_init(struct net *net) net->ipv6.ip6table_security = ip6t_register_table(net, &security_table, repl); kfree(repl); - if (IS_ERR(net->ipv6.ip6table_security)) - return PTR_ERR(net->ipv6.ip6table_security); - - return 0; + return PTR_RET(net->ipv6.ip6table_security); } static void __net_exit ip6table_security_net_exit(struct net *net) -- cgit v1.2.3 From c12b395a46646bab69089ce7016ac78177f6001f Mon Sep 17 00:00:00 2001 From: "xeb@mail.ru" Date: Fri, 10 Aug 2012 00:51:50 +0000 Subject: gre: Support GRE over IPv6 GRE over IPv6 implementation. Signed-off-by: Dmitry Kozlov Signed-off-by: David S. Miller --- include/linux/if_arp.h | 1 + include/linux/if_tunnel.h | 3 + include/linux/ip6_tunnel.h | 17 + include/net/ip6_tunnel.h | 41 +- include/net/ipv6.h | 1 + net/ipv6/Kconfig | 16 + net/ipv6/Makefile | 1 + net/ipv6/ip6_gre.c | 1790 ++++++++++++++++++++++++++++++++++++++++++++ net/ipv6/ip6_tunnel.c | 89 ++- 9 files changed, 1933 insertions(+), 26 deletions(-) create mode 100644 net/ipv6/ip6_gre.c (limited to 'net/ipv6') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index f0e69c6e8208..9adcc29f084a 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -92,6 +92,7 @@ #define ARPHRD_PHONET 820 /* PhoNet media type */ #define ARPHRD_PHONET_PIPE 821 /* PhoNet pipe header */ #define ARPHRD_CAIF 822 /* CAIF media type */ +#define ARPHRD_IP6GRE 823 /* GRE over IPv6 */ #define ARPHRD_VOID 0xFFFF /* Void type, nothing is known */ #define ARPHRD_NONE 0xFFFE /* zero header length */ diff --git a/include/linux/if_tunnel.h b/include/linux/if_tunnel.h index 5efff60b6f56..8c5035ac3142 100644 --- a/include/linux/if_tunnel.h +++ b/include/linux/if_tunnel.h @@ -75,6 +75,9 @@ enum { IFLA_GRE_TTL, IFLA_GRE_TOS, IFLA_GRE_PMTUDISC, + IFLA_GRE_ENCAP_LIMIT, + IFLA_GRE_FLOWINFO, + IFLA_GRE_FLAGS, __IFLA_GRE_MAX, }; diff --git a/include/linux/ip6_tunnel.h b/include/linux/ip6_tunnel.h index bf22b0317902..48af63c9a48d 100644 --- a/include/linux/ip6_tunnel.h +++ b/include/linux/ip6_tunnel.h @@ -31,4 +31,21 @@ struct ip6_tnl_parm { struct in6_addr raddr; /* remote tunnel end-point address */ }; +struct ip6_tnl_parm2 { + char name[IFNAMSIZ]; /* name of tunnel device */ + int link; /* ifindex of underlying L2 interface */ + __u8 proto; /* tunnel protocol */ + __u8 encap_limit; /* encapsulation limit for tunnel */ + __u8 hop_limit; /* hop limit for tunnel */ + __be32 flowinfo; /* traffic class and flowlabel for tunnel */ + __u32 flags; /* tunnel flags */ + struct in6_addr laddr; /* local tunnel end-point address */ + struct in6_addr raddr; /* remote tunnel end-point address */ + + __be16 i_flags; + __be16 o_flags; + __be32 i_key; + __be32 o_key; +}; + #endif diff --git a/include/net/ip6_tunnel.h b/include/net/ip6_tunnel.h index 358fb86f57eb..e03047f7090b 100644 --- a/include/net/ip6_tunnel.h +++ b/include/net/ip6_tunnel.h @@ -5,6 +5,8 @@ #include #include +#define IP6TUNNEL_ERR_TIMEO (30*HZ) + /* capable of sending packets */ #define IP6_TNL_F_CAP_XMIT 0x10000 /* capable of receiving packets */ @@ -12,15 +14,40 @@ /* determine capability on a per-packet basis */ #define IP6_TNL_F_CAP_PER_PACKET 0x40000 -/* IPv6 tunnel */ +struct __ip6_tnl_parm { + char name[IFNAMSIZ]; /* name of tunnel device */ + int link; /* ifindex of underlying L2 interface */ + __u8 proto; /* tunnel protocol */ + __u8 encap_limit; /* encapsulation limit for tunnel */ + __u8 hop_limit; /* hop limit for tunnel */ + __be32 flowinfo; /* traffic class and flowlabel for tunnel */ + __u32 flags; /* tunnel flags */ + struct in6_addr laddr; /* local tunnel end-point address */ + struct in6_addr raddr; /* remote tunnel end-point address */ + + __be16 i_flags; + __be16 o_flags; + __be32 i_key; + __be32 o_key; +}; +/* IPv6 tunnel */ struct ip6_tnl { struct ip6_tnl __rcu *next; /* next tunnel in list */ struct net_device *dev; /* virtual device associated with tunnel */ - struct ip6_tnl_parm parms; /* tunnel configuration parameters */ + struct __ip6_tnl_parm parms; /* tunnel configuration parameters */ struct flowi fl; /* flowi template for xmit */ struct dst_entry *dst_cache; /* cached dst */ u32 dst_cookie; + + int err_count; + unsigned long err_time; + + /* These fields used only by GRE */ + __u32 i_seqno; /* The last seen seqno */ + __u32 o_seqno; /* The last output seqno */ + int hlen; /* Precalculated GRE header length */ + int mlink; }; /* Tunnel encapsulation limit destination sub-option */ @@ -31,4 +58,14 @@ struct ipv6_tlv_tnl_enc_lim { __u8 encap_limit; /* tunnel encapsulation limit */ } __packed; +struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t); +void ip6_tnl_dst_reset(struct ip6_tnl *t); +void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst); +int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, + const struct in6_addr *raddr); +int ip6_tnl_xmit_ctl(struct ip6_tnl *t); +__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw); +__u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, + const struct in6_addr *raddr); + #endif diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 01c34b363a34..6d01fb00ff2b 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -34,6 +34,7 @@ #define NEXTHDR_IPV6 41 /* IPv6 in IPv6 */ #define NEXTHDR_ROUTING 43 /* Routing header. */ #define NEXTHDR_FRAGMENT 44 /* Fragmentation/reassembly header. */ +#define NEXTHDR_GRE 47 /* GRE header. */ #define NEXTHDR_ESP 50 /* Encapsulating security payload. */ #define NEXTHDR_AUTH 51 /* Authentication header. */ #define NEXTHDR_ICMP 58 /* ICMP for IPv6. */ diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig index 5728695b5449..4f7fe7270e37 100644 --- a/net/ipv6/Kconfig +++ b/net/ipv6/Kconfig @@ -201,6 +201,22 @@ config IPV6_TUNNEL If unsure, say N. +config IPV6_GRE + tristate "IPv6: GRE tunnel" + select IPV6_TUNNEL + ---help--- + Tunneling means encapsulating data of one protocol type within + another protocol and sending it over a channel that understands the + encapsulating protocol. This particular tunneling driver implements + GRE (Generic Routing Encapsulation) and at this time allows + encapsulating of IPv4 or IPv6 over existing IPv6 infrastructure. + This driver is useful if the other endpoint is a Cisco router: Cisco + likes GRE much better than the other Linux tunneling driver ("IP + tunneling" above). In addition, GRE allows multicast redistribution + through the tunnel. + + Saying M here will produce a module called ip6_gre. If unsure, say N. + config IPV6_MULTIPLE_TABLES bool "IPv6: Multiple Routing Tables" depends on EXPERIMENTAL diff --git a/net/ipv6/Makefile b/net/ipv6/Makefile index 686934acfac1..b6d3f79151e2 100644 --- a/net/ipv6/Makefile +++ b/net/ipv6/Makefile @@ -36,6 +36,7 @@ obj-$(CONFIG_NETFILTER) += netfilter/ obj-$(CONFIG_IPV6_SIT) += sit.o obj-$(CONFIG_IPV6_TUNNEL) += ip6_tunnel.o +obj-$(CONFIG_IPV6_GRE) += ip6_gre.o obj-y += addrconf_core.o exthdrs_core.o diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c new file mode 100644 index 000000000000..a84ad5dc4fcf --- /dev/null +++ b/net/ipv6/ip6_gre.c @@ -0,0 +1,1790 @@ +/* + * GRE over IPv6 protocol decoder. + * + * Authors: Dmitry Kozlov (xeb@mail.ru) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include + + +#define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) +#define IPV6_TCLASS_SHIFT 20 + +#define HASH_SIZE_SHIFT 5 +#define HASH_SIZE (1 << HASH_SIZE_SHIFT) + +static int ip6gre_net_id __read_mostly; +struct ip6gre_net { + struct ip6_tnl __rcu *tunnels[4][HASH_SIZE]; + + struct net_device *fb_tunnel_dev; +}; + +static struct rtnl_link_ops ip6gre_link_ops __read_mostly; +static int ip6gre_tunnel_init(struct net_device *dev); +static void ip6gre_tunnel_setup(struct net_device *dev); +static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t); +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu); + +/* Tunnel hash table */ + +/* + 4 hash tables: + + 3: (remote,local) + 2: (remote,*) + 1: (*,local) + 0: (*,*) + + We require exact key match i.e. if a key is present in packet + it will match only tunnel with the same key; if it is not present, + it will match only keyless tunnel. + + All keysless packets, if not matched configured keyless tunnels + will match fallback tunnel. + */ + +#define HASH_KEY(key) (((__force u32)key^((__force u32)key>>4))&(HASH_SIZE - 1)) +static u32 HASH_ADDR(const struct in6_addr *addr) +{ + u32 hash = ipv6_addr_hash(addr); + + return hash_32(hash, HASH_SIZE_SHIFT); +} + +#define tunnels_r_l tunnels[3] +#define tunnels_r tunnels[2] +#define tunnels_l tunnels[1] +#define tunnels_wc tunnels[0] +/* + * Locking : hash tables are protected by RCU and RTNL + */ + +#define for_each_ip_tunnel_rcu(start) \ + for (t = rcu_dereference(start); t; t = rcu_dereference(t->next)) + +/* often modified stats are per cpu, other are shared (netdev->stats) */ +struct pcpu_tstats { + u64 rx_packets; + u64 rx_bytes; + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; +}; + +static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *tot) +{ + int i; + + for_each_possible_cpu(i) { + const struct pcpu_tstats *tstats = per_cpu_ptr(dev->tstats, i); + u64 rx_packets, rx_bytes, tx_packets, tx_bytes; + unsigned int start; + + do { + start = u64_stats_fetch_begin_bh(&tstats->syncp); + rx_packets = tstats->rx_packets; + tx_packets = tstats->tx_packets; + rx_bytes = tstats->rx_bytes; + tx_bytes = tstats->tx_bytes; + } while (u64_stats_fetch_retry_bh(&tstats->syncp, start)); + + tot->rx_packets += rx_packets; + tot->tx_packets += tx_packets; + tot->rx_bytes += rx_bytes; + tot->tx_bytes += tx_bytes; + } + + tot->multicast = dev->stats.multicast; + tot->rx_crc_errors = dev->stats.rx_crc_errors; + tot->rx_fifo_errors = dev->stats.rx_fifo_errors; + tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_errors = dev->stats.rx_errors; + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; + tot->tx_carrier_errors = dev->stats.tx_carrier_errors; + tot->tx_dropped = dev->stats.tx_dropped; + tot->tx_aborted_errors = dev->stats.tx_aborted_errors; + tot->tx_errors = dev->stats.tx_errors; + + return tot; +} + +/* Given src, dst and key, find appropriate for input tunnel. */ + +static struct ip6_tnl *ip6gre_tunnel_lookup(struct net_device *dev, + const struct in6_addr *remote, const struct in6_addr *local, + __be32 key, __be16 gre_proto) +{ + struct net *net = dev_net(dev); + int link = dev->ifindex; + unsigned int h0 = HASH_ADDR(remote); + unsigned int h1 = HASH_KEY(key); + struct ip6_tnl *t, *cand = NULL; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int dev_type = (gre_proto == htons(ETH_P_TEB)) ? + ARPHRD_ETHER : ARPHRD_IP6GRE; + int score, cand_score = 4; + + for_each_ip_tunnel_rcu(ign->tunnels_r_l[h0 ^ h1]) { + if (!ipv6_addr_equal(local, &t->parms.laddr) || + !ipv6_addr_equal(remote, &t->parms.raddr) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_r[h0 ^ h1]) { + if (!ipv6_addr_equal(remote, &t->parms.raddr) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_l[h1]) { + if ((!ipv6_addr_equal(local, &t->parms.laddr) && + (!ipv6_addr_equal(local, &t->parms.raddr) || + !ipv6_addr_is_multicast(local))) || + key != t->parms.i_key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + for_each_ip_tunnel_rcu(ign->tunnels_wc[h1]) { + if (t->parms.i_key != key || + !(t->dev->flags & IFF_UP)) + continue; + + if (t->dev->type != ARPHRD_IP6GRE && + t->dev->type != dev_type) + continue; + + score = 0; + if (t->parms.link != link) + score |= 1; + if (t->dev->type != dev_type) + score |= 2; + if (score == 0) + return t; + + if (score < cand_score) { + cand = t; + cand_score = score; + } + } + + if (cand != NULL) + return cand; + + dev = ign->fb_tunnel_dev; + if (dev->flags & IFF_UP) + return netdev_priv(dev); + + return NULL; +} + +static struct ip6_tnl __rcu **__ip6gre_bucket(struct ip6gre_net *ign, + const struct __ip6_tnl_parm *p) +{ + const struct in6_addr *remote = &p->raddr; + const struct in6_addr *local = &p->laddr; + unsigned int h = HASH_KEY(p->i_key); + int prio = 0; + + if (!ipv6_addr_any(local)) + prio |= 1; + if (!ipv6_addr_any(remote) && !ipv6_addr_is_multicast(remote)) { + prio |= 2; + h ^= HASH_ADDR(remote); + } + + return &ign->tunnels[prio][h]; +} + +static inline struct ip6_tnl __rcu **ip6gre_bucket(struct ip6gre_net *ign, + const struct ip6_tnl *t) +{ + return __ip6gre_bucket(ign, &t->parms); +} + +static void ip6gre_tunnel_link(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp = ip6gre_bucket(ign, t); + + rcu_assign_pointer(t->next, rtnl_dereference(*tp)); + rcu_assign_pointer(*tp, t); +} + +static void ip6gre_tunnel_unlink(struct ip6gre_net *ign, struct ip6_tnl *t) +{ + struct ip6_tnl __rcu **tp; + struct ip6_tnl *iter; + + for (tp = ip6gre_bucket(ign, t); + (iter = rtnl_dereference(*tp)) != NULL; + tp = &iter->next) { + if (t == iter) { + rcu_assign_pointer(*tp, t->next); + break; + } + } +} + +static struct ip6_tnl *ip6gre_tunnel_find(struct net *net, + const struct __ip6_tnl_parm *parms, + int type) +{ + const struct in6_addr *remote = &parms->raddr; + const struct in6_addr *local = &parms->laddr; + __be32 key = parms->i_key; + int link = parms->link; + struct ip6_tnl *t; + struct ip6_tnl __rcu **tp; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + for (tp = __ip6gre_bucket(ign, parms); + (t = rtnl_dereference(*tp)) != NULL; + tp = &t->next) + if (ipv6_addr_equal(local, &t->parms.laddr) && + ipv6_addr_equal(remote, &t->parms.raddr) && + key == t->parms.i_key && + link == t->parms.link && + type == t->dev->type) + break; + + return t; +} + +static struct ip6_tnl *ip6gre_tunnel_locate(struct net *net, + const struct __ip6_tnl_parm *parms, int create) +{ + struct ip6_tnl *t, *nt; + struct net_device *dev; + char name[IFNAMSIZ]; + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + t = ip6gre_tunnel_find(net, parms, ARPHRD_IP6GRE); + if (t || !create) + return t; + + if (parms->name[0]) + strlcpy(name, parms->name, IFNAMSIZ); + else + strcpy(name, "ip6gre%d"); + + dev = alloc_netdev(sizeof(*t), name, ip6gre_tunnel_setup); + if (!dev) + return NULL; + + dev_net_set(dev, net); + + nt = netdev_priv(dev); + nt->parms = *parms; + dev->rtnl_link_ops = &ip6gre_link_ops; + + nt->dev = dev; + ip6gre_tnl_link_config(nt, 1); + + if (register_netdevice(dev) < 0) + goto failed_free; + + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= NETIF_F_LLTX; + + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + return nt; + +failed_free: + free_netdev(dev); + return NULL; +} + +static void ip6gre_tunnel_uninit(struct net_device *dev) +{ + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + ip6gre_tunnel_unlink(ign, netdev_priv(dev)); + dev_put(dev); +} + + +static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, + u8 type, u8 code, int offset, __be32 info) +{ + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; + __be16 *p = (__be16 *)(ipv6h + 1); + int grehlen = sizeof(ipv6h) + 4; + struct ip6_tnl *t; + __be16 flags; + + flags = p[0]; + if (flags&(GRE_CSUM|GRE_KEY|GRE_SEQ|GRE_ROUTING|GRE_VERSION)) { + if (flags&(GRE_VERSION|GRE_ROUTING)) + return; + if (flags&GRE_KEY) { + grehlen += 4; + if (flags&GRE_CSUM) + grehlen += 4; + } + } + + /* If only 8 bytes returned, keyed message will be dropped here */ + if (skb_headlen(skb) < grehlen) + return; + + rcu_read_lock(); + + t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, + flags & GRE_KEY ? + *(((__be32 *)p) + (grehlen / 4) - 1) : 0, + p[1]); + if (t == NULL) + goto out; + + switch (type) { + __u32 teli; + struct ipv6_tlv_tnl_enc_lim *tel; + __u32 mtu; + case ICMPV6_DEST_UNREACH: + net_warn_ratelimited("%s: Path to destination invalid or inactive!\n", + t->parms.name); + break; + case ICMPV6_TIME_EXCEED: + if (code == ICMPV6_EXC_HOPLIMIT) { + net_warn_ratelimited("%s: Too small hop limit or routing loop in tunnel!\n", + t->parms.name); + } + break; + case ICMPV6_PARAMPROB: + teli = 0; + if (code == ICMPV6_HDR_FIELD) + teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); + + if (teli && teli == info - 2) { + tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; + if (tel->encap_limit == 0) { + net_warn_ratelimited("%s: Too small encapsulation limit or routing loop in tunnel!\n", + t->parms.name); + } + } else { + net_warn_ratelimited("%s: Recipient unable to parse tunneled packet!\n", + t->parms.name); + } + break; + case ICMPV6_PKT_TOOBIG: + mtu = info - offset; + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + t->dev->mtu = mtu; + break; + } + + if (time_before(jiffies, t->err_time + IP6TUNNEL_ERR_TIMEO)) + t->err_count++; + else + t->err_count = 1; + t->err_time = jiffies; +out: + rcu_read_unlock(); +} + +static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, struct sk_buff *skb) +{ + __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; + + if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) + ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); + + if (INET_ECN_is_ce(dsfield)) + IP_ECN_set_ce(ip_hdr(skb)); +} + +static inline void ip6gre_ecn_decapsulate_ipv6(const struct ip6_tnl *t, + const struct ipv6hdr *ipv6h, struct sk_buff *skb) +{ + if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) + ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); + + if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) + IP6_ECN_set_ce(ipv6_hdr(skb)); +} + +static int ip6gre_rcv(struct sk_buff *skb) +{ + const struct ipv6hdr *ipv6h; + u8 *h; + __be16 flags; + __sum16 csum = 0; + __be32 key = 0; + u32 seqno = 0; + struct ip6_tnl *tunnel; + int offset = 4; + __be16 gre_proto; + + if (!pskb_may_pull(skb, sizeof(struct in6_addr))) + goto drop_nolock; + + ipv6h = ipv6_hdr(skb); + h = skb->data; + flags = *(__be16 *)h; + + if (flags&(GRE_CSUM|GRE_KEY|GRE_ROUTING|GRE_SEQ|GRE_VERSION)) { + /* - Version must be 0. + - We do not support routing headers. + */ + if (flags&(GRE_VERSION|GRE_ROUTING)) + goto drop_nolock; + + if (flags&GRE_CSUM) { + switch (skb->ip_summed) { + case CHECKSUM_COMPLETE: + csum = csum_fold(skb->csum); + if (!csum) + break; + /* fall through */ + case CHECKSUM_NONE: + skb->csum = 0; + csum = __skb_checksum_complete(skb); + skb->ip_summed = CHECKSUM_COMPLETE; + } + offset += 4; + } + if (flags&GRE_KEY) { + key = *(__be32 *)(h + offset); + offset += 4; + } + if (flags&GRE_SEQ) { + seqno = ntohl(*(__be32 *)(h + offset)); + offset += 4; + } + } + + gre_proto = *(__be16 *)(h + 2); + + rcu_read_lock(); + tunnel = ip6gre_tunnel_lookup(skb->dev, + &ipv6h->saddr, &ipv6h->daddr, key, + gre_proto); + if (tunnel) { + struct pcpu_tstats *tstats; + + if (!xfrm6_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; + + if (!ip6_tnl_rcv_ctl(tunnel, &ipv6h->daddr, &ipv6h->saddr)) { + tunnel->dev->stats.rx_dropped++; + goto drop; + } + + secpath_reset(skb); + + skb->protocol = gre_proto; + /* WCCP version 1 and 2 protocol decoding. + * - Change protocol to IP + * - When dealing with WCCPv2, Skip extra 4 bytes in GRE header + */ + if (flags == 0 && gre_proto == htons(ETH_P_WCCP)) { + skb->protocol = htons(ETH_P_IP); + if ((*(h + offset) & 0xF0) != 0x40) + offset += 4; + } + + skb->mac_header = skb->network_header; + __pskb_pull(skb, offset); + skb_postpull_rcsum(skb, skb_transport_header(skb), offset); + skb->pkt_type = PACKET_HOST; + + if (((flags&GRE_CSUM) && csum) || + (!(flags&GRE_CSUM) && tunnel->parms.i_flags&GRE_CSUM)) { + tunnel->dev->stats.rx_crc_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + if (tunnel->parms.i_flags&GRE_SEQ) { + if (!(flags&GRE_SEQ) || + (tunnel->i_seqno && + (s32)(seqno - tunnel->i_seqno) < 0)) { + tunnel->dev->stats.rx_fifo_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + tunnel->i_seqno = seqno + 1; + } + + /* Warning: All skb pointers will be invalidated! */ + if (tunnel->dev->type == ARPHRD_ETHER) { + if (!pskb_may_pull(skb, ETH_HLEN)) { + tunnel->dev->stats.rx_length_errors++; + tunnel->dev->stats.rx_errors++; + goto drop; + } + + ipv6h = ipv6_hdr(skb); + skb->protocol = eth_type_trans(skb, tunnel->dev); + skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); + } + + tstats = this_cpu_ptr(tunnel->dev->tstats); + u64_stats_update_begin(&tstats->syncp); + tstats->rx_packets++; + tstats->rx_bytes += skb->len; + u64_stats_update_end(&tstats->syncp); + + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + if (skb->protocol == htons(ETH_P_IP)) + ip6gre_ecn_decapsulate_ipv4(tunnel, ipv6h, skb); + else if (skb->protocol == htons(ETH_P_IPV6)) + ip6gre_ecn_decapsulate_ipv6(tunnel, ipv6h, skb); + + netif_rx(skb); + + rcu_read_unlock(); + return 0; + } + icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); + +drop: + rcu_read_unlock(); +drop_nolock: + kfree_skb(skb); + return 0; +} + +struct ipv6_tel_txoption { + struct ipv6_txoptions ops; + __u8 dst_opt[8]; +}; + +static void init_tel_txopt(struct ipv6_tel_txoption *opt, __u8 encap_limit) +{ + memset(opt, 0, sizeof(struct ipv6_tel_txoption)); + + opt->dst_opt[2] = IPV6_TLV_TNL_ENCAP_LIMIT; + opt->dst_opt[3] = 1; + opt->dst_opt[4] = encap_limit; + opt->dst_opt[5] = IPV6_TLV_PADN; + opt->dst_opt[6] = 1; + + opt->ops.dst0opt = (struct ipv6_opt_hdr *) opt->dst_opt; + opt->ops.opt_nflen = 8; +} + +static netdev_tx_t ip6gre_xmit2(struct sk_buff *skb, + struct net_device *dev, + __u8 dsfield, + struct flowi6 *fl6, + int encap_limit, + __u32 *pmtu) +{ + struct net *net = dev_net(dev); + struct ip6_tnl *tunnel = netdev_priv(dev); + struct net_device *tdev; /* Device to other host */ + struct ipv6hdr *ipv6h; /* Our new IP header */ + unsigned int max_headroom; /* The extra header space needed */ + int gre_hlen; + struct ipv6_tel_txoption opt; + int mtu; + struct dst_entry *dst = NULL, *ndst = NULL; + struct net_device_stats *stats = &tunnel->dev->stats; + int err = -1; + u8 proto; + int pkt_len; + struct sk_buff *new_skb; + + if (dev->type == ARPHRD_ETHER) + IPCB(skb)->flags = 0; + + if (dev->header_ops && dev->type == ARPHRD_IP6GRE) { + gre_hlen = 0; + ipv6h = (struct ipv6hdr *)skb->data; + fl6->daddr = ipv6h->daddr; + } else { + gre_hlen = tunnel->hlen; + fl6->daddr = tunnel->parms.raddr; + } + + if (!fl6->flowi6_mark) + dst = ip6_tnl_dst_check(tunnel); + + if (!dst) { + ndst = ip6_route_output(net, NULL, fl6); + + if (ndst->error) + goto tx_err_link_failure; + ndst = xfrm_lookup(net, ndst, flowi6_to_flowi(fl6), NULL, 0); + if (IS_ERR(ndst)) { + err = PTR_ERR(ndst); + ndst = NULL; + goto tx_err_link_failure; + } + dst = ndst; + } + + tdev = dst->dev; + + if (tdev == dev) { + stats->collisions++; + net_warn_ratelimited("%s: Local routing loop detected!\n", + tunnel->parms.name); + goto tx_err_dst_release; + } + + mtu = dst_mtu(dst) - sizeof(*ipv6h); + if (encap_limit >= 0) { + max_headroom += 8; + mtu -= 8; + } + if (mtu < IPV6_MIN_MTU) + mtu = IPV6_MIN_MTU; + if (skb_dst(skb)) + skb_dst(skb)->ops->update_pmtu(skb_dst(skb), NULL, skb, mtu); + if (skb->len > mtu) { + *pmtu = mtu; + err = -EMSGSIZE; + goto tx_err_dst_release; + } + + if (tunnel->err_count > 0) { + if (time_before(jiffies, + tunnel->err_time + IP6TUNNEL_ERR_TIMEO)) { + tunnel->err_count--; + + dst_link_failure(skb); + } else + tunnel->err_count = 0; + } + + max_headroom = LL_RESERVED_SPACE(tdev) + gre_hlen + dst->header_len; + + if (skb_headroom(skb) < max_headroom || skb_shared(skb) || + (skb_cloned(skb) && !skb_clone_writable(skb, 0))) { + new_skb = skb_realloc_headroom(skb, max_headroom); + if (max_headroom > dev->needed_headroom) + dev->needed_headroom = max_headroom; + if (!new_skb) + goto tx_err_dst_release; + + if (skb->sk) + skb_set_owner_w(new_skb, skb->sk); + consume_skb(skb); + skb = new_skb; + } + + skb_dst_drop(skb); + + if (fl6->flowi6_mark) { + skb_dst_set(skb, dst); + ndst = NULL; + } else { + skb_dst_set_noref(skb, dst); + } + + skb->transport_header = skb->network_header; + + proto = NEXTHDR_GRE; + if (encap_limit >= 0) { + init_tel_txopt(&opt, encap_limit); + ipv6_push_nfrag_opts(skb, &opt.ops, &proto, NULL); + } + + skb_push(skb, gre_hlen); + skb_reset_network_header(skb); + + /* + * Push down and install the IP header. + */ + ipv6h = ipv6_hdr(skb); + *(__be32 *)ipv6h = fl6->flowlabel | htonl(0x60000000); + dsfield = INET_ECN_encapsulate(0, dsfield); + ipv6_change_dsfield(ipv6h, ~INET_ECN_MASK, dsfield); + ipv6h->hop_limit = tunnel->parms.hop_limit; + ipv6h->nexthdr = proto; + ipv6h->saddr = fl6->saddr; + ipv6h->daddr = fl6->daddr; + + ((__be16 *)(ipv6h + 1))[0] = tunnel->parms.o_flags; + ((__be16 *)(ipv6h + 1))[1] = (dev->type == ARPHRD_ETHER) ? + htons(ETH_P_TEB) : skb->protocol; + + if (tunnel->parms.o_flags&(GRE_KEY|GRE_CSUM|GRE_SEQ)) { + __be32 *ptr = (__be32 *)(((u8 *)ipv6h) + tunnel->hlen - 4); + + if (tunnel->parms.o_flags&GRE_SEQ) { + ++tunnel->o_seqno; + *ptr = htonl(tunnel->o_seqno); + ptr--; + } + if (tunnel->parms.o_flags&GRE_KEY) { + *ptr = tunnel->parms.o_key; + ptr--; + } + if (tunnel->parms.o_flags&GRE_CSUM) { + *ptr = 0; + *(__sum16 *)ptr = ip_compute_csum((void *)(ipv6h+1), + skb->len - sizeof(struct ipv6hdr)); + } + } + + nf_reset(skb); + pkt_len = skb->len; + err = ip6_local_out(skb); + + if (net_xmit_eval(err) == 0) { + struct pcpu_tstats *tstats = this_cpu_ptr(tunnel->dev->tstats); + + tstats->tx_bytes += pkt_len; + tstats->tx_packets++; + } else { + stats->tx_errors++; + stats->tx_aborted_errors++; + } + + if (ndst) + ip6_tnl_dst_store(tunnel, ndst); + + return 0; +tx_err_link_failure: + stats->tx_carrier_errors++; + dst_link_failure(skb); +tx_err_dst_release: + dst_release(ndst); + return err; +} + +static inline int ip6gre_xmit_ipv4(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + const struct iphdr *iph = ip_hdr(skb); + int encap_limit = -1; + struct flowi6 fl6; + __u8 dsfield; + __u32 mtu; + int err; + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPIP; + + dsfield = ipv4_get_dsfield(iph); + + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= htonl((__u32)iph->tos << IPV6_TCLASS_SHIFT) + & IPV6_TCLASS_MASK; + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + + err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + if (err != 0) { + /* XXX: send ICMP error even if DF is not set. */ + if (err == -EMSGSIZE) + icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + htonl(mtu)); + return -1; + } + + return 0; +} + +static inline int ip6gre_xmit_ipv6(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + int encap_limit = -1; + __u16 offset; + struct flowi6 fl6; + __u8 dsfield; + __u32 mtu; + int err; + + if (ipv6_addr_equal(&t->parms.raddr, &ipv6h->saddr)) + return -1; + + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); + if (offset > 0) { + struct ipv6_tlv_tnl_enc_lim *tel; + tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; + if (tel->encap_limit == 0) { + icmpv6_send(skb, ICMPV6_PARAMPROB, + ICMPV6_HDR_FIELD, offset + 2); + return -1; + } + encap_limit = tel->encap_limit - 1; + } else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = IPPROTO_IPV6; + + dsfield = ipv6_get_dsfield(ipv6h); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS) + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_TCLASS_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL) + fl6.flowlabel |= (*(__be32 *) ipv6h & IPV6_FLOWLABEL_MASK); + if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK) + fl6.flowi6_mark = skb->mark; + + err = ip6gre_xmit2(skb, dev, dsfield, &fl6, encap_limit, &mtu); + if (err != 0) { + if (err == -EMSGSIZE) + icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); + return -1; + } + + return 0; +} + +/** + * ip6_tnl_addr_conflict - compare packet addresses to tunnel's own + * @t: the outgoing tunnel device + * @hdr: IPv6 header from the incoming packet + * + * Description: + * Avoid trivial tunneling loop by checking that tunnel exit-point + * doesn't match source of incoming packet. + * + * Return: + * 1 if conflict, + * 0 else + **/ + +static inline bool ip6gre_tnl_addr_conflict(const struct ip6_tnl *t, + const struct ipv6hdr *hdr) +{ + return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); +} + +static int ip6gre_xmit_other(struct sk_buff *skb, struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + int encap_limit = -1; + struct flowi6 fl6; + __u32 mtu; + int err; + + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + encap_limit = t->parms.encap_limit; + + memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6)); + fl6.flowi6_proto = skb->protocol; + + err = ip6gre_xmit2(skb, dev, 0, &fl6, encap_limit, &mtu); + + return err; +} + +static netdev_tx_t ip6gre_tunnel_xmit(struct sk_buff *skb, + struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct net_device_stats *stats = &t->dev->stats; + int ret; + + if (!ip6_tnl_xmit_ctl(t)) + return -1; + + switch (skb->protocol) { + case htons(ETH_P_IP): + ret = ip6gre_xmit_ipv4(skb, dev); + break; + case htons(ETH_P_IPV6): + ret = ip6gre_xmit_ipv6(skb, dev); + break; + default: + ret = ip6gre_xmit_other(skb, dev); + break; + } + + if (ret < 0) + goto tx_err; + + return NETDEV_TX_OK; + +tx_err: + stats->tx_errors++; + stats->tx_dropped++; + kfree_skb(skb); + return NETDEV_TX_OK; +} + +static void ip6gre_tnl_link_config(struct ip6_tnl *t, int set_mtu) +{ + struct net_device *dev = t->dev; + struct __ip6_tnl_parm *p = &t->parms; + struct flowi6 *fl6 = &t->fl.u.ip6; + int addend = sizeof(struct ipv6hdr) + 4; + + if (dev->type != ARPHRD_ETHER) { + memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &p->raddr, sizeof(struct in6_addr)); + } + + /* Set up flowi template */ + fl6->saddr = p->laddr; + fl6->daddr = p->raddr; + fl6->flowi6_oif = p->link; + fl6->flowlabel = 0; + + if (!(p->flags&IP6_TNL_F_USE_ORIG_TCLASS)) + fl6->flowlabel |= IPV6_TCLASS_MASK & p->flowinfo; + if (!(p->flags&IP6_TNL_F_USE_ORIG_FLOWLABEL)) + fl6->flowlabel |= IPV6_FLOWLABEL_MASK & p->flowinfo; + + p->flags &= ~(IP6_TNL_F_CAP_XMIT|IP6_TNL_F_CAP_RCV|IP6_TNL_F_CAP_PER_PACKET); + p->flags |= ip6_tnl_get_cap(t, &p->laddr, &p->raddr); + + if (p->flags&IP6_TNL_F_CAP_XMIT && + p->flags&IP6_TNL_F_CAP_RCV && dev->type != ARPHRD_ETHER) + dev->flags |= IFF_POINTOPOINT; + else + dev->flags &= ~IFF_POINTOPOINT; + + dev->iflink = p->link; + + /* Precalculate GRE options length */ + if (t->parms.o_flags&(GRE_CSUM|GRE_KEY|GRE_SEQ)) { + if (t->parms.o_flags&GRE_CSUM) + addend += 4; + if (t->parms.o_flags&GRE_KEY) + addend += 4; + if (t->parms.o_flags&GRE_SEQ) + addend += 4; + } + + if (p->flags & IP6_TNL_F_CAP_XMIT) { + int strict = (ipv6_addr_type(&p->raddr) & + (IPV6_ADDR_MULTICAST|IPV6_ADDR_LINKLOCAL)); + + struct rt6_info *rt = rt6_lookup(dev_net(dev), + &p->raddr, &p->laddr, + p->link, strict); + + if (rt == NULL) + return; + + if (rt->dst.dev) { + dev->hard_header_len = rt->dst.dev->hard_header_len + addend; + + if (set_mtu) { + dev->mtu = rt->dst.dev->mtu - addend; + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + + if (dev->mtu < IPV6_MIN_MTU) + dev->mtu = IPV6_MIN_MTU; + } + } + dst_release(&rt->dst); + } + + t->hlen = addend; +} + +static int ip6gre_tnl_change(struct ip6_tnl *t, + const struct __ip6_tnl_parm *p, int set_mtu) +{ + t->parms.laddr = p->laddr; + t->parms.raddr = p->raddr; + t->parms.flags = p->flags; + t->parms.hop_limit = p->hop_limit; + t->parms.encap_limit = p->encap_limit; + t->parms.flowinfo = p->flowinfo; + t->parms.link = p->link; + t->parms.proto = p->proto; + t->parms.i_key = p->i_key; + t->parms.o_key = p->o_key; + t->parms.i_flags = p->i_flags; + t->parms.o_flags = p->o_flags; + ip6_tnl_dst_reset(t); + ip6gre_tnl_link_config(t, set_mtu); + return 0; +} + +static void ip6gre_tnl_parm_from_user(struct __ip6_tnl_parm *p, + const struct ip6_tnl_parm2 *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->flags = u->flags; + p->hop_limit = u->hop_limit; + p->encap_limit = u->encap_limit; + p->flowinfo = u->flowinfo; + p->link = u->link; + p->i_key = u->i_key; + p->o_key = u->o_key; + p->i_flags = u->i_flags; + p->o_flags = u->o_flags; + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void ip6gre_tnl_parm_to_user(struct ip6_tnl_parm2 *u, + const struct __ip6_tnl_parm *p) +{ + u->proto = IPPROTO_GRE; + u->laddr = p->laddr; + u->raddr = p->raddr; + u->flags = p->flags; + u->hop_limit = p->hop_limit; + u->encap_limit = p->encap_limit; + u->flowinfo = p->flowinfo; + u->link = p->link; + u->i_key = p->i_key; + u->o_key = p->o_key; + u->i_flags = p->i_flags; + u->o_flags = p->o_flags; + memcpy(u->name, p->name, sizeof(u->name)); +} + +static int ip6gre_tunnel_ioctl(struct net_device *dev, + struct ifreq *ifr, int cmd) +{ + int err = 0; + struct ip6_tnl_parm2 p; + struct __ip6_tnl_parm p1; + struct ip6_tnl *t; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + + switch (cmd) { + case SIOCGETTUNNEL: + t = NULL; + if (dev == ign->fb_tunnel_dev) { + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) { + err = -EFAULT; + break; + } + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, 0); + } + if (t == NULL) + t = netdev_priv(dev); + ip6gre_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + break; + + case SIOCADDTUNNEL: + case SIOCCHGTUNNEL: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + + err = -EINVAL; + if ((p.i_flags|p.o_flags)&(GRE_VERSION|GRE_ROUTING)) + goto done; + + if (!(p.i_flags&GRE_KEY)) + p.i_key = 0; + if (!(p.o_flags&GRE_KEY)) + p.o_key = 0; + + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, cmd == SIOCADDTUNNEL); + + if (dev != ign->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) { + if (t != NULL) { + if (t->dev != dev) { + err = -EEXIST; + break; + } + } else { + t = netdev_priv(dev); + + ip6gre_tunnel_unlink(ign, t); + synchronize_net(); + ip6gre_tnl_change(t, &p1, 1); + ip6gre_tunnel_link(ign, t); + netdev_state_change(dev); + } + } + + if (t) { + err = 0; + + ip6gre_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) + err = -EFAULT; + } else + err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT); + break; + + case SIOCDELTUNNEL: + err = -EPERM; + if (!capable(CAP_NET_ADMIN)) + goto done; + + if (dev == ign->fb_tunnel_dev) { + err = -EFAULT; + if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) + goto done; + err = -ENOENT; + ip6gre_tnl_parm_from_user(&p1, &p); + t = ip6gre_tunnel_locate(net, &p1, 0); + if (t == NULL) + goto done; + err = -EPERM; + if (t == netdev_priv(ign->fb_tunnel_dev)) + goto done; + dev = t->dev; + } + unregister_netdevice(dev); + err = 0; + break; + + default: + err = -EINVAL; + } + +done: + return err; +} + +static int ip6gre_tunnel_change_mtu(struct net_device *dev, int new_mtu) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + if (new_mtu < 68 || + new_mtu > 0xFFF8 - dev->hard_header_len - tunnel->hlen) + return -EINVAL; + dev->mtu = new_mtu; + return 0; +} + +static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, + unsigned short type, + const void *daddr, const void *saddr, unsigned int len) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct ipv6hdr *ipv6h = (struct ipv6hdr *)skb_push(skb, t->hlen); + __be16 *p = (__be16 *)(ipv6h+1); + + *(__be32 *)ipv6h = t->fl.u.ip6.flowlabel | htonl(0x60000000); + ipv6h->hop_limit = t->parms.hop_limit; + ipv6h->nexthdr = NEXTHDR_GRE; + ipv6h->saddr = t->parms.laddr; + ipv6h->daddr = t->parms.raddr; + + p[0] = t->parms.o_flags; + p[1] = htons(type); + + /* + * Set the source hardware address. + */ + + if (saddr) + memcpy(&ipv6h->saddr, saddr, sizeof(struct in6_addr)); + if (daddr) + memcpy(&ipv6h->daddr, daddr, sizeof(struct in6_addr)); + if (!ipv6_addr_any(&ipv6h->daddr)) + return t->hlen; + + return -t->hlen; +} + +static int ip6gre_header_parse(const struct sk_buff *skb, unsigned char *haddr) +{ + const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb_mac_header(skb); + memcpy(haddr, &ipv6h->saddr, sizeof(struct in6_addr)); + return sizeof(struct in6_addr); +} + +static const struct header_ops ip6gre_header_ops = { + .create = ip6gre_header, + .parse = ip6gre_header_parse, +}; + +static const struct net_device_ops ip6gre_netdev_ops = { + .ndo_init = ip6gre_tunnel_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6gre_tunnel_xmit, + .ndo_do_ioctl = ip6gre_tunnel_ioctl, + .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_get_stats64 = ip6gre_get_stats64, +}; + +static void ip6gre_dev_free(struct net_device *dev) +{ + free_percpu(dev->tstats); + free_netdev(dev); +} + +static void ip6gre_tunnel_setup(struct net_device *dev) +{ + struct ip6_tnl *t; + + dev->netdev_ops = &ip6gre_netdev_ops; + dev->destructor = ip6gre_dev_free; + + dev->type = ARPHRD_IP6GRE; + dev->hard_header_len = LL_MAX_HEADER + sizeof(struct ipv6hdr) + 4; + dev->mtu = ETH_DATA_LEN - sizeof(struct ipv6hdr) - 4; + t = netdev_priv(dev); + if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) + dev->mtu -= 8; + dev->flags |= IFF_NOARP; + dev->iflink = 0; + dev->addr_len = sizeof(struct in6_addr); + dev->features |= NETIF_F_NETNS_LOCAL; + dev->priv_flags &= ~IFF_XMIT_DST_RELEASE; +} + +static int ip6gre_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + memcpy(dev->dev_addr, &tunnel->parms.laddr, sizeof(struct in6_addr)); + memcpy(dev->broadcast, &tunnel->parms.raddr, sizeof(struct in6_addr)); + + if (ipv6_addr_any(&tunnel->parms.raddr)) + dev->header_ops = &ip6gre_header_ops; + + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static void ip6gre_fb_tunnel_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + tunnel->hlen = sizeof(struct ipv6hdr) + 4; + + dev_hold(dev); +} + + +static struct inet6_protocol ip6gre_protocol __read_mostly = { + .handler = ip6gre_rcv, + .err_handler = ip6gre_err, + .flags = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL, +}; + +static void ip6gre_destroy_tunnels(struct ip6gre_net *ign, + struct list_head *head) +{ + int prio; + + for (prio = 0; prio < 4; prio++) { + int h; + for (h = 0; h < HASH_SIZE; h++) { + struct ip6_tnl *t; + + t = rtnl_dereference(ign->tunnels[prio][h]); + + while (t != NULL) { + unregister_netdevice_queue(t->dev, head); + t = rtnl_dereference(t->next); + } + } + } +} + +static int __net_init ip6gre_init_net(struct net *net) +{ + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int err; + + ign->fb_tunnel_dev = alloc_netdev(sizeof(struct ip6_tnl), "ip6gre0", + ip6gre_tunnel_setup); + if (!ign->fb_tunnel_dev) { + err = -ENOMEM; + goto err_alloc_dev; + } + dev_net_set(ign->fb_tunnel_dev, net); + + ip6gre_fb_tunnel_init(ign->fb_tunnel_dev); + ign->fb_tunnel_dev->rtnl_link_ops = &ip6gre_link_ops; + + err = register_netdev(ign->fb_tunnel_dev); + if (err) + goto err_reg_dev; + + rcu_assign_pointer(ign->tunnels_wc[0], + netdev_priv(ign->fb_tunnel_dev)); + return 0; + +err_reg_dev: + ip6gre_dev_free(ign->fb_tunnel_dev); +err_alloc_dev: + return err; +} + +static void __net_exit ip6gre_exit_net(struct net *net) +{ + struct ip6gre_net *ign; + LIST_HEAD(list); + + ign = net_generic(net, ip6gre_net_id); + rtnl_lock(); + ip6gre_destroy_tunnels(ign, &list); + unregister_netdevice_many(&list); + rtnl_unlock(); +} + +static struct pernet_operations ip6gre_net_ops = { + .init = ip6gre_init_net, + .exit = ip6gre_exit_net, + .id = &ip6gre_net_id, + .size = sizeof(struct ip6gre_net), +}; + +static int ip6gre_tunnel_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + __be16 flags; + + if (!data) + return 0; + + flags = 0; + if (data[IFLA_GRE_IFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_IFLAGS]); + if (data[IFLA_GRE_OFLAGS]) + flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]); + if (flags & (GRE_VERSION|GRE_ROUTING)) + return -EINVAL; + + return 0; +} + +static int ip6gre_tap_validate(struct nlattr *tb[], struct nlattr *data[]) +{ + struct in6_addr daddr; + + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + + if (!data) + goto out; + + if (data[IFLA_GRE_REMOTE]) { + nla_memcpy(&daddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + if (ipv6_addr_any(&daddr)) + return -EINVAL; + } + +out: + return ip6gre_tunnel_validate(tb, data); +} + + +static void ip6gre_netlink_parms(struct nlattr *data[], + struct __ip6_tnl_parm *parms) +{ + memset(parms, 0, sizeof(*parms)); + + if (!data) + return; + + if (data[IFLA_GRE_LINK]) + parms->link = nla_get_u32(data[IFLA_GRE_LINK]); + + if (data[IFLA_GRE_IFLAGS]) + parms->i_flags = nla_get_be16(data[IFLA_GRE_IFLAGS]); + + if (data[IFLA_GRE_OFLAGS]) + parms->o_flags = nla_get_be16(data[IFLA_GRE_OFLAGS]); + + if (data[IFLA_GRE_IKEY]) + parms->i_key = nla_get_be32(data[IFLA_GRE_IKEY]); + + if (data[IFLA_GRE_OKEY]) + parms->o_key = nla_get_be32(data[IFLA_GRE_OKEY]); + + if (data[IFLA_GRE_LOCAL]) + nla_memcpy(&parms->laddr, data[IFLA_GRE_LOCAL], sizeof(struct in6_addr)); + + if (data[IFLA_GRE_REMOTE]) + nla_memcpy(&parms->raddr, data[IFLA_GRE_REMOTE], sizeof(struct in6_addr)); + + if (data[IFLA_GRE_TTL]) + parms->hop_limit = nla_get_u8(data[IFLA_GRE_TTL]); + + if (data[IFLA_GRE_ENCAP_LIMIT]) + parms->encap_limit = nla_get_u8(data[IFLA_GRE_ENCAP_LIMIT]); + + if (data[IFLA_GRE_FLOWINFO]) + parms->flowinfo = nla_get_u32(data[IFLA_GRE_FLOWINFO]); + + if (data[IFLA_GRE_FLAGS]) + parms->flags = nla_get_u32(data[IFLA_GRE_FLAGS]); +} + +static int ip6gre_tap_init(struct net_device *dev) +{ + struct ip6_tnl *tunnel; + + tunnel = netdev_priv(dev); + + tunnel->dev = dev; + strcpy(tunnel->parms.name, dev->name); + + ip6gre_tnl_link_config(tunnel, 1); + + dev->tstats = alloc_percpu(struct pcpu_tstats); + if (!dev->tstats) + return -ENOMEM; + + return 0; +} + +static const struct net_device_ops ip6gre_tap_netdev_ops = { + .ndo_init = ip6gre_tap_init, + .ndo_uninit = ip6gre_tunnel_uninit, + .ndo_start_xmit = ip6gre_tunnel_xmit, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = ip6gre_tunnel_change_mtu, + .ndo_get_stats64 = ip6gre_get_stats64, +}; + +static void ip6gre_tap_setup(struct net_device *dev) +{ + + ether_setup(dev); + + dev->netdev_ops = &ip6gre_tap_netdev_ops; + dev->destructor = ip6gre_dev_free; + + dev->iflink = 0; + dev->features |= NETIF_F_NETNS_LOCAL; +} + +static int ip6gre_newlink(struct net *src_net, struct net_device *dev, + struct nlattr *tb[], struct nlattr *data[]) +{ + struct ip6_tnl *nt; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + int err; + + nt = netdev_priv(dev); + ip6gre_netlink_parms(data, &nt->parms); + + if (ip6gre_tunnel_find(net, &nt->parms, dev->type)) + return -EEXIST; + + if (dev->type == ARPHRD_ETHER && !tb[IFLA_ADDRESS]) + eth_hw_addr_random(dev); + + nt->dev = dev; + ip6gre_tnl_link_config(nt, !tb[IFLA_MTU]); + + /* Can use a lockless transmit, unless we generate output sequences */ + if (!(nt->parms.o_flags & GRE_SEQ)) + dev->features |= NETIF_F_LLTX; + + err = register_netdevice(dev); + if (err) + goto out; + + dev_hold(dev); + ip6gre_tunnel_link(ign, nt); + +out: + return err; +} + +static int ip6gre_changelink(struct net_device *dev, struct nlattr *tb[], + struct nlattr *data[]) +{ + struct ip6_tnl *t, *nt; + struct net *net = dev_net(dev); + struct ip6gre_net *ign = net_generic(net, ip6gre_net_id); + struct __ip6_tnl_parm p; + + if (dev == ign->fb_tunnel_dev) + return -EINVAL; + + nt = netdev_priv(dev); + ip6gre_netlink_parms(data, &p); + + t = ip6gre_tunnel_locate(net, &p, 0); + + if (t) { + if (t->dev != dev) + return -EEXIST; + } else { + t = nt; + + ip6gre_tunnel_unlink(ign, t); + ip6gre_tnl_change(t, &p, !tb[IFLA_MTU]); + ip6gre_tunnel_link(ign, t); + netdev_state_change(dev); + } + + return 0; +} + +static size_t ip6gre_get_size(const struct net_device *dev) +{ + return + /* IFLA_GRE_LINK */ + nla_total_size(4) + + /* IFLA_GRE_IFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_OFLAGS */ + nla_total_size(2) + + /* IFLA_GRE_IKEY */ + nla_total_size(4) + + /* IFLA_GRE_OKEY */ + nla_total_size(4) + + /* IFLA_GRE_LOCAL */ + nla_total_size(4) + + /* IFLA_GRE_REMOTE */ + nla_total_size(4) + + /* IFLA_GRE_TTL */ + nla_total_size(1) + + /* IFLA_GRE_TOS */ + nla_total_size(1) + + /* IFLA_GRE_ENCAP_LIMIT */ + nla_total_size(1) + + /* IFLA_GRE_FLOWINFO */ + nla_total_size(4) + + /* IFLA_GRE_FLAGS */ + nla_total_size(4) + + 0; +} + +static int ip6gre_fill_info(struct sk_buff *skb, const struct net_device *dev) +{ + struct ip6_tnl *t = netdev_priv(dev); + struct __ip6_tnl_parm *p = &t->parms; + + if (nla_put_u32(skb, IFLA_GRE_LINK, p->link) || + nla_put_be16(skb, IFLA_GRE_IFLAGS, p->i_flags) || + nla_put_be16(skb, IFLA_GRE_OFLAGS, p->o_flags) || + nla_put_be32(skb, IFLA_GRE_IKEY, p->i_key) || + nla_put_be32(skb, IFLA_GRE_OKEY, p->o_key) || + nla_put(skb, IFLA_GRE_LOCAL, sizeof(struct in6_addr), &p->raddr) || + nla_put(skb, IFLA_GRE_REMOTE, sizeof(struct in6_addr), &p->laddr) || + nla_put_u8(skb, IFLA_GRE_TTL, p->hop_limit) || + /*nla_put_u8(skb, IFLA_GRE_TOS, t->priority) ||*/ + nla_put_u8(skb, IFLA_GRE_ENCAP_LIMIT, p->encap_limit) || + nla_put_be32(skb, IFLA_GRE_FLOWINFO, p->flowinfo) || + nla_put_u32(skb, IFLA_GRE_FLAGS, p->flags)) + goto nla_put_failure; + return 0; + +nla_put_failure: + return -EMSGSIZE; +} + +static const struct nla_policy ip6gre_policy[IFLA_GRE_MAX + 1] = { + [IFLA_GRE_LINK] = { .type = NLA_U32 }, + [IFLA_GRE_IFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_OFLAGS] = { .type = NLA_U16 }, + [IFLA_GRE_IKEY] = { .type = NLA_U32 }, + [IFLA_GRE_OKEY] = { .type = NLA_U32 }, + [IFLA_GRE_LOCAL] = { .len = FIELD_SIZEOF(struct ipv6hdr, saddr) }, + [IFLA_GRE_REMOTE] = { .len = FIELD_SIZEOF(struct ipv6hdr, daddr) }, + [IFLA_GRE_TTL] = { .type = NLA_U8 }, + [IFLA_GRE_ENCAP_LIMIT] = { .type = NLA_U8 }, + [IFLA_GRE_FLOWINFO] = { .type = NLA_U32 }, + [IFLA_GRE_FLAGS] = { .type = NLA_U32 }, +}; + +static struct rtnl_link_ops ip6gre_link_ops __read_mostly = { + .kind = "ip6gre", + .maxtype = IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6gre_tunnel_setup, + .validate = ip6gre_tunnel_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +}; + +static struct rtnl_link_ops ip6gre_tap_ops __read_mostly = { + .kind = "ip6gretap", + .maxtype = IFLA_GRE_MAX, + .policy = ip6gre_policy, + .priv_size = sizeof(struct ip6_tnl), + .setup = ip6gre_tap_setup, + .validate = ip6gre_tap_validate, + .newlink = ip6gre_newlink, + .changelink = ip6gre_changelink, + .get_size = ip6gre_get_size, + .fill_info = ip6gre_fill_info, +}; + +/* + * And now the modules code and kernel interface. + */ + +static int __init ip6gre_init(void) +{ + int err; + + pr_info("GRE over IPv6 tunneling driver\n"); + + err = register_pernet_device(&ip6gre_net_ops); + if (err < 0) + return err; + + err = inet6_add_protocol(&ip6gre_protocol, IPPROTO_GRE); + if (err < 0) { + pr_info("%s: can't add protocol\n", __func__); + goto add_proto_failed; + } + + err = rtnl_link_register(&ip6gre_link_ops); + if (err < 0) + goto rtnl_link_failed; + + err = rtnl_link_register(&ip6gre_tap_ops); + if (err < 0) + goto tap_ops_failed; + +out: + return err; + +tap_ops_failed: + rtnl_link_unregister(&ip6gre_link_ops); +rtnl_link_failed: + inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); +add_proto_failed: + unregister_pernet_device(&ip6gre_net_ops); + goto out; +} + +static void __exit ip6gre_fini(void) +{ + rtnl_link_unregister(&ip6gre_tap_ops); + rtnl_link_unregister(&ip6gre_link_ops); + inet6_del_protocol(&ip6gre_protocol, IPPROTO_GRE); + unregister_pernet_device(&ip6gre_net_ops); +} + +module_init(ip6gre_init); +module_exit(ip6gre_fini); +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("D. Kozlov (xeb@mail.ru)"); +MODULE_DESCRIPTION("GRE over IPv6 tunneling device"); +MODULE_ALIAS_RTNL_LINK("ip6gre"); +MODULE_ALIAS_NETDEV("ip6gre0"); diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 9a1d5fe6aef8..33d2a0e6712d 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -126,7 +126,7 @@ static struct net_device_stats *ip6_get_stats(struct net_device *dev) * Locking : hash tables are protected by RCU and RTNL */ -static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) +struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) { struct dst_entry *dst = t->dst_cache; @@ -139,20 +139,23 @@ static inline struct dst_entry *ip6_tnl_dst_check(struct ip6_tnl *t) return dst; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_check); -static inline void ip6_tnl_dst_reset(struct ip6_tnl *t) +void ip6_tnl_dst_reset(struct ip6_tnl *t) { dst_release(t->dst_cache); t->dst_cache = NULL; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_reset); -static inline void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) +void ip6_tnl_dst_store(struct ip6_tnl *t, struct dst_entry *dst) { struct rt6_info *rt = (struct rt6_info *) dst; t->dst_cookie = rt->rt6i_node ? rt->rt6i_node->fn_sernum : 0; dst_release(t->dst_cache); t->dst_cache = dst; } +EXPORT_SYMBOL_GPL(ip6_tnl_dst_store); /** * ip6_tnl_lookup - fetch tunnel matching the end-point addresses @@ -200,7 +203,7 @@ ip6_tnl_lookup(struct net *net, const struct in6_addr *remote, const struct in6_ **/ static struct ip6_tnl __rcu ** -ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct ip6_tnl_parm *p) +ip6_tnl_bucket(struct ip6_tnl_net *ip6n, const struct __ip6_tnl_parm *p) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; @@ -267,7 +270,7 @@ static void ip6_dev_free(struct net_device *dev) * created tunnel or NULL **/ -static struct ip6_tnl *ip6_tnl_create(struct net *net, struct ip6_tnl_parm *p) +static struct ip6_tnl *ip6_tnl_create(struct net *net, struct __ip6_tnl_parm *p) { struct net_device *dev; struct ip6_tnl *t; @@ -322,7 +325,7 @@ failed: **/ static struct ip6_tnl *ip6_tnl_locate(struct net *net, - struct ip6_tnl_parm *p, int create) + struct __ip6_tnl_parm *p, int create) { const struct in6_addr *remote = &p->raddr; const struct in6_addr *local = &p->laddr; @@ -374,8 +377,7 @@ ip6_tnl_dev_uninit(struct net_device *dev) * else index to encapsulation limit **/ -static __u16 -parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) +__u16 ip6_tnl_parse_tlv_enc_lim(struct sk_buff *skb, __u8 *raw) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *) raw; __u8 nexthdr = ipv6h->nexthdr; @@ -425,6 +427,7 @@ parse_tlv_tnl_enc_lim(struct sk_buff *skb, __u8 * raw) } return 0; } +EXPORT_SYMBOL(ip6_tnl_parse_tlv_enc_lim); /** * ip6_tnl_err - tunnel error handler @@ -480,7 +483,7 @@ ip6_tnl_err(struct sk_buff *skb, __u8 ipproto, struct inet6_skb_parm *opt, case ICMPV6_PARAMPROB: teli = 0; if ((*code) == ICMPV6_HDR_FIELD) - teli = parse_tlv_tnl_enc_lim(skb, skb->data); + teli = ip6_tnl_parse_tlv_enc_lim(skb, skb->data); if (teli && teli == *info - 2) { tel = (struct ipv6_tlv_tnl_enc_lim *) &skb->data[teli]; @@ -693,11 +696,11 @@ static void ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t, IP6_ECN_set_ce(ipv6_hdr(skb)); } -static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, +__u32 ip6_tnl_get_cap(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ltype = ipv6_addr_type(laddr); int rtype = ipv6_addr_type(raddr); __u32 flags = 0; @@ -715,13 +718,14 @@ static __u32 ip6_tnl_get_cap(struct ip6_tnl *t, } return flags; } +EXPORT_SYMBOL(ip6_tnl_get_cap); /* called with rcu_read_lock() */ -static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, +int ip6_tnl_rcv_ctl(struct ip6_tnl *t, const struct in6_addr *laddr, const struct in6_addr *raddr) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); @@ -740,6 +744,7 @@ static inline int ip6_tnl_rcv_ctl(struct ip6_tnl *t, } return ret; } +EXPORT_SYMBOL_GPL(ip6_tnl_rcv_ctl); /** * ip6_tnl_rcv - decapsulate IPv6 packet and retransmit it locally @@ -859,9 +864,9 @@ ip6_tnl_addr_conflict(const struct ip6_tnl *t, const struct ipv6hdr *hdr) return ipv6_addr_equal(&t->parms.raddr, &hdr->saddr); } -static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) +int ip6_tnl_xmit_ctl(struct ip6_tnl *t) { - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; int ret = 0; struct net *net = dev_net(t->dev); @@ -885,6 +890,8 @@ static inline int ip6_tnl_xmit_ctl(struct ip6_tnl *t) } return ret; } +EXPORT_SYMBOL_GPL(ip6_tnl_xmit_ctl); + /** * ip6_tnl_xmit2 - encapsulate packet and send * @skb: the outgoing socket buffer @@ -1085,7 +1092,7 @@ ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev) !ip6_tnl_xmit_ctl(t) || ip6_tnl_addr_conflict(t, ipv6h)) return -1; - offset = parse_tlv_tnl_enc_lim(skb, skb_network_header(skb)); + offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb)); if (offset > 0) { struct ipv6_tlv_tnl_enc_lim *tel; tel = (struct ipv6_tlv_tnl_enc_lim *)&skb_network_header(skb)[offset]; @@ -1152,7 +1159,7 @@ tx_err: static void ip6_tnl_link_config(struct ip6_tnl *t) { struct net_device *dev = t->dev; - struct ip6_tnl_parm *p = &t->parms; + struct __ip6_tnl_parm *p = &t->parms; struct flowi6 *fl6 = &t->fl.u.ip6; memcpy(dev->dev_addr, &p->laddr, sizeof(struct in6_addr)); @@ -1215,7 +1222,7 @@ static void ip6_tnl_link_config(struct ip6_tnl *t) **/ static int -ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) +ip6_tnl_change(struct ip6_tnl *t, const struct __ip6_tnl_parm *p) { t->parms.laddr = p->laddr; t->parms.raddr = p->raddr; @@ -1230,6 +1237,34 @@ ip6_tnl_change(struct ip6_tnl *t, struct ip6_tnl_parm *p) return 0; } +static void +ip6_tnl_parm_from_user(struct __ip6_tnl_parm *p, const struct ip6_tnl_parm *u) +{ + p->laddr = u->laddr; + p->raddr = u->raddr; + p->flags = u->flags; + p->hop_limit = u->hop_limit; + p->encap_limit = u->encap_limit; + p->flowinfo = u->flowinfo; + p->link = u->link; + p->proto = u->proto; + memcpy(p->name, u->name, sizeof(u->name)); +} + +static void +ip6_tnl_parm_to_user(struct ip6_tnl_parm *u, const struct __ip6_tnl_parm *p) +{ + u->laddr = p->laddr; + u->raddr = p->raddr; + u->flags = p->flags; + u->hop_limit = p->hop_limit; + u->encap_limit = p->encap_limit; + u->flowinfo = p->flowinfo; + u->link = p->link; + u->proto = p->proto; + memcpy(u->name, p->name, sizeof(u->name)); +} + /** * ip6_tnl_ioctl - configure ipv6 tunnels from userspace * @dev: virtual device associated with tunnel @@ -1263,6 +1298,7 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) { int err = 0; struct ip6_tnl_parm p; + struct __ip6_tnl_parm p1; struct ip6_tnl *t = NULL; struct net *net = dev_net(dev); struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id); @@ -1274,11 +1310,12 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) err = -EFAULT; break; } - t = ip6_tnl_locate(net, &p, 0); + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, 0); } if (t == NULL) t = netdev_priv(dev); - memcpy(&p, &t->parms, sizeof (p)); + ip6_tnl_parm_to_user(&p, &t->parms); if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof (p))) { err = -EFAULT; } @@ -1295,7 +1332,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (p.proto != IPPROTO_IPV6 && p.proto != IPPROTO_IPIP && p.proto != 0) break; - t = ip6_tnl_locate(net, &p, cmd == SIOCADDTUNNEL); + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, cmd == SIOCADDTUNNEL); if (dev != ip6n->fb_tnl_dev && cmd == SIOCCHGTUNNEL) { if (t != NULL) { if (t->dev != dev) { @@ -1307,13 +1345,14 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) ip6_tnl_unlink(ip6n, t); synchronize_net(); - err = ip6_tnl_change(t, &p); + err = ip6_tnl_change(t, &p1); ip6_tnl_link(ip6n, t); netdev_state_change(dev); } if (t) { err = 0; - if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof (p))) + ip6_tnl_parm_to_user(&p, &t->parms); + if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p))) err = -EFAULT; } else @@ -1329,7 +1368,9 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof (p))) break; err = -ENOENT; - if ((t = ip6_tnl_locate(net, &p, 0)) == NULL) + ip6_tnl_parm_from_user(&p1, &p); + t = ip6_tnl_locate(net, &p1, 0); + if (t == NULL) break; err = -EPERM; if (t->dev == ip6n->fb_tnl_dev) -- cgit v1.2.3 From 4855d6f3116e891b66198838b683dce3dcf6e874 Mon Sep 17 00:00:00 2001 From: Igor Maravic Date: Sun, 12 Aug 2012 22:31:58 +0000 Subject: net: ipv6: proc: Fix error handling Fix error handling in case making of dir dev_snmp6 failes Signed-off-by: Igor Maravic Signed-off-by: David S. Miller --- net/ipv6/proc.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/proc.c b/net/ipv6/proc.c index da2e92d05c15..745a32042950 100644 --- a/net/ipv6/proc.c +++ b/net/ipv6/proc.c @@ -307,10 +307,10 @@ static int __net_init ipv6_proc_init_net(struct net *net) goto proc_dev_snmp6_fail; return 0; +proc_dev_snmp6_fail: + proc_net_remove(net, "snmp6"); proc_snmp6_fail: proc_net_remove(net, "sockstat6"); -proc_dev_snmp6_fail: - proc_net_remove(net, "dev_snmp6"); return -ENOMEM; } -- cgit v1.2.3 From 4acd4945cd1e1f92b20d14e349c6c6a52acbd42d Mon Sep 17 00:00:00 2001 From: Ben Hutchings Date: Tue, 14 Aug 2012 08:54:51 +0000 Subject: ipv6: addrconf: Avoid calling netdevice notifiers with RCU read-side lock Cong Wang reports that lockdep detected suspicious RCU usage while enabling IPV6 forwarding: [ 1123.310275] =============================== [ 1123.442202] [ INFO: suspicious RCU usage. ] [ 1123.558207] 3.6.0-rc1+ #109 Not tainted [ 1123.665204] ------------------------------- [ 1123.768254] include/linux/rcupdate.h:430 Illegal context switch in RCU read-side critical section! [ 1123.992320] [ 1123.992320] other info that might help us debug this: [ 1123.992320] [ 1124.307382] [ 1124.307382] rcu_scheduler_active = 1, debug_locks = 0 [ 1124.522220] 2 locks held by sysctl/5710: [ 1124.648364] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_trylock+0x15/0x17 [ 1124.882211] #1: (rcu_read_lock){.+.+.+}, at: [] rcu_lock_acquire+0x0/0x29 [ 1125.085209] [ 1125.085209] stack backtrace: [ 1125.332213] Pid: 5710, comm: sysctl Not tainted 3.6.0-rc1+ #109 [ 1125.441291] Call Trace: [ 1125.545281] [] lockdep_rcu_suspicious+0x109/0x112 [ 1125.667212] [] rcu_preempt_sleep_check+0x45/0x47 [ 1125.781838] [] __might_sleep+0x1e/0x19b [...] [ 1127.445223] [] call_netdevice_notifiers+0x4a/0x4f [...] [ 1127.772188] [] dev_disable_lro+0x32/0x6b [ 1127.885174] [] dev_forward_change+0x30/0xcb [ 1128.013214] [] addrconf_forward_change+0x85/0xc5 [...] addrconf_forward_change() uses RCU iteration over the netdev list, which is unnecessary since it already holds the RTNL lock. We also cannot reasonably require netdevice notifier functions not to sleep. Reported-by: Cong Wang Signed-off-by: Ben Hutchings Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 79181819a24f..6bc85f7c31e3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -494,8 +494,7 @@ static void addrconf_forward_change(struct net *net, __s32 newf) struct net_device *dev; struct inet6_dev *idev; - rcu_read_lock(); - for_each_netdev_rcu(net, dev) { + for_each_netdev(net, dev) { idev = __in6_dev_get(dev); if (idev) { int changed = (!idev->cnf.forwarding) ^ (!newf); @@ -504,7 +503,6 @@ static void addrconf_forward_change(struct net *net, __s32 newf) dev_forward_change(idev); } } - rcu_read_unlock(); } static int addrconf_fixup_forwarding(struct ctl_table *table, int *p, int newf) -- cgit v1.2.3 From a7cb5a49bf64ba64864ae16a6be028f8b0d3cc06 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 May 2012 01:10:10 -0600 Subject: userns: Print out socket uids in a user namespace aware fashion. Cc: Alexey Kuznetsov Cc: James Morris Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Arnaldo Carvalho de Melo Cc: Sridhar Samudrala Acked-by: Vlad Yasevich Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- include/net/tcp.h | 3 ++- init/Kconfig | 6 ------ net/appletalk/atalk_proc.c | 3 ++- net/ipv4/ping.c | 4 +++- net/ipv4/raw.c | 4 +++- net/ipv4/tcp_ipv4.c | 6 +++--- net/ipv4/udp.c | 4 +++- net/ipv6/raw.c | 3 ++- net/ipv6/tcp_ipv6.c | 6 +++--- net/ipv6/udp.c | 3 ++- net/ipx/ipx_proc.c | 3 ++- net/key/af_key.c | 2 +- net/llc/llc_proc.c | 2 +- net/packet/af_packet.c | 2 +- net/phonet/socket.c | 6 ++++-- net/sctp/proc.c | 6 ++++-- 16 files changed, 36 insertions(+), 27 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/tcp.h b/include/net/tcp.h index e19124b84cd2..91e746736a8f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1509,7 +1509,8 @@ struct tcp_iter_state { sa_family_t family; enum tcp_seq_states state; struct sock *syn_wait_sk; - int bucket, offset, sbucket, num, uid; + int bucket, offset, sbucket, num; + kuid_t uid; loff_t last_pos; }; diff --git a/init/Kconfig b/init/Kconfig index 80fae193e3a1..25a6ebb50c64 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -942,10 +942,7 @@ config UIDGID_CONVERTED depends on PROC_EVENTS = n # Networking - depends on PACKET = n depends on NET_9P = n - depends on IPX = n - depends on PHONET = n depends on NET_CLS_FLOW = n depends on NETFILTER_XT_MATCH_OWNER = n depends on NETFILTER_XT_MATCH_RECENT = n @@ -953,14 +950,11 @@ config UIDGID_CONVERTED depends on NETFILTER_NETLINK_LOG = n depends on INET = n depends on IPV6 = n - depends on IP_SCTP = n depends on AF_RXRPC = n - depends on LLC2 = n depends on NET_KEY = n depends on INET_DIAG = n depends on DNS_RESOLVER = n depends on AX25 = n - depends on ATALK = n # Filesystems depends on USB_GADGETFS = n diff --git a/net/appletalk/atalk_proc.c b/net/appletalk/atalk_proc.c index b5b1a221c242..c30f3a0717fb 100644 --- a/net/appletalk/atalk_proc.c +++ b/net/appletalk/atalk_proc.c @@ -183,7 +183,8 @@ static int atalk_seq_socket_show(struct seq_file *seq, void *v) ntohs(at->dest_net), at->dest_node, at->dest_port, sk_wmem_alloc_get(s), sk_rmem_alloc_get(s), - s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); + s->sk_state, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s))); out: return 0; } diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 6232d476f37e..bee5eeb676f8 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -845,7 +845,9 @@ static void ping_format_sock(struct sock *sp, struct seq_file *f, bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); } diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index ff0f071969ea..f2425785d40a 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -992,7 +992,9 @@ static void raw_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) i, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 42b2a6a73092..642be8a4c6a3 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2382,7 +2382,7 @@ void tcp_proc_unregister(struct net *net, struct tcp_seq_afinfo *afinfo) EXPORT_SYMBOL(tcp_proc_unregister); static void get_openreq4(const struct sock *sk, const struct request_sock *req, - struct seq_file *f, int i, int uid, int *len) + struct seq_file *f, int i, kuid_t uid, int *len) { const struct inet_request_sock *ireq = inet_rsk(req); int ttd = req->expires - jiffies; @@ -2399,7 +2399,7 @@ static void get_openreq4(const struct sock *sk, const struct request_sock *req, 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->retrans, - uid, + from_kuid_munged(seq_user_ns(f), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ atomic_read(&sk->sk_refcnt), @@ -2450,7 +2450,7 @@ static void get_tcp4_sock(struct sock *sk, struct seq_file *f, int i, int *len) timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, - sock_i_uid(sk), + from_kuid_munged(seq_user_ns(f), sock_i_uid(sk)), icsk->icsk_probes_out, sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index b4c3582a991f..53b89817c008 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -2110,7 +2110,9 @@ static void udp4_format_sock(struct sock *sp, struct seq_file *f, bucket, src, srcp, dest, destp, sp->sk_state, sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), - 0, 0L, 0, sock_i_uid(sp), 0, sock_i_ino(sp), + 0, 0L, 0, + from_kuid_munged(seq_user_ns(f), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops), len); } diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ef0579d5bca6..7af88ef01657 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -1251,7 +1251,8 @@ static void raw6_sock_seq_show(struct seq_file *seq, struct sock *sp, int i) sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, - sock_i_uid(sp), 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); } diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index c66b90f71c9b..4b5b335ebde1 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1803,7 +1803,7 @@ static void tcp_v6_destroy_sock(struct sock *sk) #ifdef CONFIG_PROC_FS /* Proc filesystem TCPv6 sock list dumping. */ static void get_openreq6(struct seq_file *seq, - const struct sock *sk, struct request_sock *req, int i, int uid) + const struct sock *sk, struct request_sock *req, int i, kuid_t uid) { int ttd = req->expires - jiffies; const struct in6_addr *src = &inet6_rsk(req)->loc_addr; @@ -1827,7 +1827,7 @@ static void get_openreq6(struct seq_file *seq, 1, /* timers active (only the expire timer) */ jiffies_to_clock_t(ttd), req->retrans, - uid, + from_kuid_munged(seq_user_ns(seq), uid), 0, /* non standard timer */ 0, /* open_requests have no inode */ 0, req); @@ -1877,7 +1877,7 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) timer_active, jiffies_to_clock_t(timer_expires - jiffies), icsk->icsk_retransmits, - sock_i_uid(sp), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), icsk->icsk_probes_out, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 99d0077b56b8..bbdff07eebe1 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -1458,7 +1458,8 @@ static void udp6_sock_seq_show(struct seq_file *seq, struct sock *sp, int bucket sk_wmem_alloc_get(sp), sk_rmem_alloc_get(sp), 0, 0L, 0, - sock_i_uid(sp), 0, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sp)), + 0, sock_i_ino(sp), atomic_read(&sp->sk_refcnt), sp, atomic_read(&sp->sk_drops)); diff --git a/net/ipx/ipx_proc.c b/net/ipx/ipx_proc.c index f8ba30dfecae..02ff7f2f60d4 100644 --- a/net/ipx/ipx_proc.c +++ b/net/ipx/ipx_proc.c @@ -217,7 +217,8 @@ static int ipx_seq_socket_show(struct seq_file *seq, void *v) seq_printf(seq, "%08X %08X %02X %03d\n", sk_wmem_alloc_get(s), sk_rmem_alloc_get(s), - s->sk_state, SOCK_INODE(s->sk_socket)->i_uid); + s->sk_state, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s))); out: return 0; } diff --git a/net/key/af_key.c b/net/key/af_key.c index 34e418508a67..0481d4b51476 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -3661,7 +3661,7 @@ static int pfkey_seq_show(struct seq_file *f, void *v) atomic_read(&s->sk_refcnt), sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), - sock_i_uid(s), + from_kuid_munged(seq_user_ns(f), sock_i_uid(s)), sock_i_ino(s) ); return 0; diff --git a/net/llc/llc_proc.c b/net/llc/llc_proc.c index a1839c004357..7b4799cfbf8d 100644 --- a/net/llc/llc_proc.c +++ b/net/llc/llc_proc.c @@ -151,7 +151,7 @@ static int llc_seq_socket_show(struct seq_file *seq, void *v) sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk) - llc->copied_seq, sk->sk_state, - sk->sk_socket ? SOCK_INODE(sk->sk_socket)->i_uid : -1, + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), llc->link); out: return 0; diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index ceaca7c134a0..d147317ce9ea 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -3846,7 +3846,7 @@ static int packet_seq_show(struct seq_file *seq, void *v) po->ifindex, po->running, atomic_read(&s->sk_rmem_alloc), - sock_i_uid(s), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(s)), sock_i_ino(s)); } diff --git a/net/phonet/socket.c b/net/phonet/socket.c index 0acc943f713a..b7e982782255 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -612,7 +612,8 @@ static int pn_sock_seq_show(struct seq_file *seq, void *v) sk->sk_protocol, pn->sobject, pn->dobject, pn->resource, sk->sk_state, sk_wmem_alloc_get(sk), sk_rmem_alloc_get(sk), - sock_i_uid(sk), sock_i_ino(sk), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk), atomic_read(&sk->sk_refcnt), sk, atomic_read(&sk->sk_drops), &len); } @@ -796,7 +797,8 @@ static int pn_res_seq_show(struct seq_file *seq, void *v) struct sock *sk = *psk; seq_printf(seq, "%02X %5d %lu%n", - (int) (psk - pnres.sk), sock_i_uid(sk), + (int) (psk - pnres.sk), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), sock_i_ino(sk), &len); } seq_printf(seq, "%*s\n", 63 - len, ""); diff --git a/net/sctp/proc.c b/net/sctp/proc.c index 1e2eee88c3ea..dc12febc977a 100644 --- a/net/sctp/proc.c +++ b/net/sctp/proc.c @@ -216,7 +216,8 @@ static int sctp_eps_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%8pK %8pK %-3d %-3d %-4d %-5d %5d %5lu ", ep, sk, sctp_sk(sk)->type, sk->sk_state, hash, epb->bind_addr.port, - sock_i_uid(sk), sock_i_ino(sk)); + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk)); sctp_seq_dump_local_addrs(seq, epb); seq_printf(seq, "\n"); @@ -324,7 +325,8 @@ static int sctp_assocs_seq_show(struct seq_file *seq, void *v) assoc->assoc_id, assoc->sndbuf_used, atomic_read(&assoc->rmem_alloc), - sock_i_uid(sk), sock_i_ino(sk), + from_kuid_munged(seq_user_ns(seq), sock_i_uid(sk)), + sock_i_ino(sk), epb->bind_addr.port, assoc->peer.port); seq_printf(seq, " "); -- cgit v1.2.3 From 4f82f45730c68fdaf9b0472495a965188404866e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 24 May 2012 10:37:59 -0600 Subject: net ip6 flowlabel: Make owner a union of struct pid * and kuid_t Correct a long standing omission and use struct pid in the owner field of struct ip6_flowlabel when the share type is IPV6_FL_S_PROCESS. This guarantees we don't have issues when pid wraparound occurs. Use a kuid_t in the owner field of struct ip6_flowlabel when the share type is IPV6_FL_S_USER to add user namespace support. In /proc/net/ip6_flowlabel capture the current pid namespace when opening the file and release the pid namespace when the file is closed ensuring we print the pid owner value that is meaning to the reader of the file. Similarly use from_kuid_munged to print uid values that are meaningful to the reader of the file. This requires exporting pid_nr_ns so that ipv6 can continue to built as a module. Yoiks what silliness Acked-by: David S. Miller Acked-by: Serge Hallyn Signed-off-by: Eric W. Biederman --- include/net/ipv6.h | 5 ++++- init/Kconfig | 1 - kernel/pid.c | 1 + net/ipv6/ip6_flowlabel.c | 50 +++++++++++++++++++++++++++++++++++++++++------- 4 files changed, 48 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 01c34b363a34..c8a202436e01 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -222,7 +222,10 @@ struct ip6_flowlabel { struct ipv6_txoptions *opt; unsigned long linger; u8 share; - u32 owner; + union { + struct pid *pid; + kuid_t uid; + } owner; unsigned long lastuse; unsigned long expires; struct net *fl_net; diff --git a/init/Kconfig b/init/Kconfig index f857f97bcef3..64ff9ce59443 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -948,7 +948,6 @@ config UIDGID_CONVERTED depends on NETFILTER_XT_MATCH_RECENT = n depends on NETFILTER_XT_TARGET_LOG = n depends on NETFILTER_NETLINK_LOG = n - depends on IPV6 = n depends on AF_RXRPC = n depends on NET_KEY = n depends on INET_DIAG = n diff --git a/kernel/pid.c b/kernel/pid.c index e86b291ad834..aebd4f5aaf41 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -479,6 +479,7 @@ pid_t pid_nr_ns(struct pid *pid, struct pid_namespace *ns) } return nr; } +EXPORT_SYMBOL_GPL(pid_nr_ns); pid_t pid_vnr(struct pid *pid) { diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index 9772fbd8a3f5..c836a6a20a34 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -90,6 +91,11 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) static void fl_free(struct ip6_flowlabel *fl) { + switch (fl->share) { + case IPV6_FL_S_PROCESS: + put_pid(fl->owner.pid); + break; + } if (fl) { release_net(fl->fl_net); kfree(fl->opt); @@ -394,10 +400,10 @@ fl_create(struct net *net, struct sock *sk, struct in6_flowlabel_req *freq, case IPV6_FL_S_ANY: break; case IPV6_FL_S_PROCESS: - fl->owner = current->pid; + fl->owner.pid = get_task_pid(current, PIDTYPE_PID); break; case IPV6_FL_S_USER: - fl->owner = current_euid(); + fl->owner.uid = current_euid(); break; default: err = -EINVAL; @@ -561,7 +567,10 @@ recheck: err = -EPERM; if (fl1->share == IPV6_FL_S_EXCL || fl1->share != fl->share || - fl1->owner != fl->owner) + ((fl1->share == IPV6_FL_S_PROCESS) && + (fl1->owner.pid == fl->owner.pid)) || + ((fl1->share == IPV6_FL_S_USER) && + uid_eq(fl1->owner.uid, fl->owner.uid))) goto release; err = -EINVAL; @@ -621,6 +630,7 @@ done: struct ip6fl_iter_state { struct seq_net_private p; + struct pid_namespace *pid_ns; int bucket; }; @@ -699,6 +709,7 @@ static void ip6fl_seq_stop(struct seq_file *seq, void *v) static int ip6fl_seq_show(struct seq_file *seq, void *v) { + struct ip6fl_iter_state *state = ip6fl_seq_private(seq); if (v == SEQ_START_TOKEN) seq_printf(seq, "%-5s %-1s %-6s %-6s %-6s %-8s %-32s %s\n", "Label", "S", "Owner", "Users", "Linger", "Expires", "Dst", "Opt"); @@ -708,7 +719,11 @@ static int ip6fl_seq_show(struct seq_file *seq, void *v) "%05X %-1d %-6d %-6d %-6ld %-8ld %pi6 %-4d\n", (unsigned int)ntohl(fl->label), fl->share, - (int)fl->owner, + ((fl->share == IPV6_FL_S_PROCESS) ? + pid_nr_ns(fl->owner.pid, state->pid_ns) : + ((fl->share == IPV6_FL_S_USER) ? + from_kuid_munged(seq_user_ns(seq), fl->owner.uid) : + 0)), atomic_read(&fl->users), fl->linger/HZ, (long)(fl->expires - jiffies)/HZ, @@ -727,8 +742,29 @@ static const struct seq_operations ip6fl_seq_ops = { static int ip6fl_seq_open(struct inode *inode, struct file *file) { - return seq_open_net(inode, file, &ip6fl_seq_ops, - sizeof(struct ip6fl_iter_state)); + struct seq_file *seq; + struct ip6fl_iter_state *state; + int err; + + err = seq_open_net(inode, file, &ip6fl_seq_ops, + sizeof(struct ip6fl_iter_state)); + + if (!err) { + seq = file->private_data; + state = ip6fl_seq_private(seq); + rcu_read_lock(); + state->pid_ns = get_pid_ns(task_active_pid_ns(current)); + rcu_read_unlock(); + } + return err; +} + +static int ip6fl_seq_release(struct inode *inode, struct file *file) +{ + struct seq_file *seq = file->private_data; + struct ip6fl_iter_state *state = ip6fl_seq_private(seq); + put_pid_ns(state->pid_ns); + return seq_release_net(inode, file); } static const struct file_operations ip6fl_seq_fops = { @@ -736,7 +772,7 @@ static const struct file_operations ip6fl_seq_fops = { .open = ip6fl_seq_open, .read = seq_read, .llseek = seq_lseek, - .release = seq_release_net, + .release = ip6fl_seq_release, }; static int __net_init ip6_flowlabel_proc_init(struct net *net) -- cgit v1.2.3 From 898132ae76d1aeb52301f10e8795c34fbb54e853 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Aug 2012 16:15:02 +0300 Subject: ipv6: move dereference after check in fl_free() There is a dereference before checking for NULL bug here. Generally free() functions should accept NULL pointers. For example, fl_create() can pass a NULL pointer to fl_free() on the error path. Signed-off-by: Dan Carpenter Signed-off-by: Eric W. Biederman --- net/ipv6/ip6_flowlabel.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_flowlabel.c b/net/ipv6/ip6_flowlabel.c index c836a6a20a34..90bbefb57943 100644 --- a/net/ipv6/ip6_flowlabel.c +++ b/net/ipv6/ip6_flowlabel.c @@ -91,12 +91,9 @@ static struct ip6_flowlabel *fl_lookup(struct net *net, __be32 label) static void fl_free(struct ip6_flowlabel *fl) { - switch (fl->share) { - case IPV6_FL_S_PROCESS: - put_pid(fl->owner.pid); - break; - } if (fl) { + if (fl->share == IPV6_FL_S_PROCESS) + put_pid(fl->owner.pid); release_net(fl->fl_net); kfree(fl->opt); } -- cgit v1.2.3 From 5ef5d6c569f80cf716d75fa88e9b5ee72f0986b2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 16 Aug 2012 03:14:04 +0000 Subject: gre: information leak in ip6_tnl_ioctl() There is a one byte hole between p->hop_limit and p->flowinfo where stack memory is leaked to the user. This was introduced in c12b395a46 "gre: Support GRE over IPv6". Signed-off-by: Dan Carpenter --- net/ipv6/ip6_tunnel.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c index 33d2a0e6712d..cb7e2ded6f08 100644 --- a/net/ipv6/ip6_tunnel.c +++ b/net/ipv6/ip6_tunnel.c @@ -1312,6 +1312,8 @@ ip6_tnl_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd) } ip6_tnl_parm_from_user(&p1, &p); t = ip6_tnl_locate(net, &p1, 0); + } else { + memset(&p, 0, sizeof(p)); } if (t == NULL) t = netdev_priv(dev); -- cgit v1.2.3 From 9d7b0fc1ef1f17aff57c0dc9a59453d8fca255c3 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Mon, 20 Aug 2012 02:56:56 -0700 Subject: net: ipv6: fix oops in inet_putpeer() Commit 97bab73f (inet: Hide route peer accesses behind helpers.) introduced a bug in xfrm6_policy_destroy(). The xfrm_dst's _rt6i_peer member is not initialized, causing a false positive result from inetpeer_ptr_is_peer(), which in turn causes a NULL pointer dereference in inet_putpeer(). Pid: 314, comm: kworker/0:1 Not tainted 3.6.0-rc1+ #17 To Be Filled By O.E.M. To Be Filled By O.E.M./P4S800D-X EIP: 0060:[] EFLAGS: 00010246 CPU: 0 EIP is at inet_putpeer+0xe/0x16 EAX: 00000000 EBX: f3481700 ECX: 00000000 EDX: 000dd641 ESI: f3481700 EDI: c05e949c EBP: f551def4 ESP: f551def4 DS: 007b ES: 007b FS: 0000 GS: 00e0 SS: 0068 CR0: 8005003b CR2: 00000070 CR3: 3243d000 CR4: 00000750 DR0: 00000000 DR1: 00000000 DR2: 00000000 DR3: 00000000 DR6: ffff0ff0 DR7: 00000400 f551df04 c0423de1 00000000 f3481700 f551df18 c038d5f7 f254b9f8 f551df28 f34f85d8 f551df20 c03ef48d f551df3c c0396870 f30697e8 f24e1738 c05e98f4 f5509540 c05cd2b4 f551df7c c0142d2b c043feb5 f5509540 00000000 c05cd2e8 [] xfrm6_dst_destroy+0x42/0xdb [] dst_destroy+0x1d/0xa4 [] xfrm_bundle_flo_delete+0x2b/0x36 [] flow_cache_gc_task+0x85/0x9f [] process_one_work+0x122/0x441 [] ? apic_timer_interrupt+0x31/0x38 [] ? flow_cache_new_hashrnd+0x2b/0x2b [] worker_thread+0x113/0x3cc Fix by adding a init_dst() callback to struct xfrm_policy_afinfo to properly initialize the dst's peer pointer. Signed-off-by: Patrick McHardy Signed-off-by: David S. Miller --- include/net/xfrm.h | 2 ++ net/ipv6/xfrm6_policy.c | 8 ++++++++ net/xfrm/xfrm_policy.c | 2 ++ 3 files changed, 12 insertions(+) (limited to 'net/ipv6') diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 62b619e82a90..976a81abe1a2 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -292,6 +292,8 @@ struct xfrm_policy_afinfo { struct flowi *fl, int reverse); int (*get_tos)(const struct flowi *fl); + void (*init_dst)(struct net *net, + struct xfrm_dst *dst); int (*init_path)(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index ef39812107b1..f8c4c08ffb60 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -73,6 +73,13 @@ static int xfrm6_get_tos(const struct flowi *fl) return 0; } +static void xfrm6_init_dst(struct net *net, struct xfrm_dst *xdst) +{ + struct rt6_info *rt = (struct rt6_info *)xdst; + + rt6_init_peer(rt, net->ipv6.peers); +} + static int xfrm6_init_path(struct xfrm_dst *path, struct dst_entry *dst, int nfheader_len) { @@ -286,6 +293,7 @@ static struct xfrm_policy_afinfo xfrm6_policy_afinfo = { .get_saddr = xfrm6_get_saddr, .decode_session = _decode_session6, .get_tos = xfrm6_get_tos, + .init_dst = xfrm6_init_dst, .init_path = xfrm6_init_path, .fill_dst = xfrm6_fill_dst, .blackhole_route = ip6_blackhole_route, diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c5a5165a5927..5a2aa17e4d3c 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -1357,6 +1357,8 @@ static inline struct xfrm_dst *xfrm_alloc_dst(struct net *net, int family) memset(dst + 1, 0, sizeof(*xdst) - sizeof(*dst)); xdst->flo.ops = &xfrm_bundle_fc_ops; + if (afinfo->init_dst) + afinfo->init_dst(net, xdst); } else xdst = ERR_PTR(-ENOBUFS); -- cgit v1.2.3 From fae6ef87faeb8853896920c68ee703d715799d28 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sun, 19 Aug 2012 03:30:38 +0000 Subject: net: tcp: move sk_rx_dst_set call after tcp_create_openreq_child() This commit removes the sk_rx_dst_set calls from tcp_create_openreq_child(), because at that point the icsk_af_ops field of ipv6_mapped TCP sockets has not been set to its proper final value. Instead, to make sure we get the right sk_rx_dst_set variant appropriate for the address family of the new connection, we have tcp_v{4,6}_syn_recv_sock() directly call the appropriate function shortly after the call to tcp_create_openreq_child() returns. This also moves inet6_sk_rx_dst_set() to avoid a forward declaration with the new approach. Signed-off-by: Neal Cardwell Reported-by: Artem Savkov Cc: Eric Dumazet Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 1 + net/ipv4/tcp_minisocks.c | 2 -- net/ipv6/tcp_ipv6.c | 25 +++++++++++++------------ 3 files changed, 14 insertions(+), 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 767823764016..5bf2040b25b1 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1462,6 +1462,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, goto exit_nonewsk; newsk->sk_gso_type = SKB_GSO_TCPV4; + inet_sk_rx_dst_set(newsk, skb); newtp = tcp_sk(newsk); newinet = inet_sk(newsk); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index d9c9dcef2de3..6ff7f10dce9d 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -387,8 +387,6 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, struct tcp_sock *oldtp = tcp_sk(sk); struct tcp_cookie_values *oldcvp = oldtp->cookie_values; - newicsk->icsk_af_ops->sk_rx_dst_set(newsk, skb); - /* TCP Cookie Transactions require space for the cookie pair, * as it differs for each connection. There is no need to * copy any s_data_payload stored at the original socket. diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index bb9ce2b2f377..a3e60cc04a8a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -94,6 +94,18 @@ static struct tcp_md5sig_key *tcp_v6_md5_do_lookup(struct sock *sk, } #endif +static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) +{ + struct dst_entry *dst = skb_dst(skb); + const struct rt6_info *rt = (const struct rt6_info *)dst; + + dst_hold(dst); + sk->sk_rx_dst = dst; + inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; + if (rt->rt6i_node) + inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; +} + static void tcp_v6_hash(struct sock *sk) { if (sk->sk_state != TCP_CLOSE) { @@ -1270,6 +1282,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newsk->sk_gso_type = SKB_GSO_TCPV6; __ip6_dst_store(newsk, dst, NULL, NULL); + inet6_sk_rx_dst_set(newsk, skb); newtcp6sk = (struct tcp6_sock *)newsk; inet_sk(newsk)->pinet6 = &newtcp6sk->inet6; @@ -1729,18 +1742,6 @@ static struct timewait_sock_ops tcp6_timewait_sock_ops = { .twsk_destructor= tcp_twsk_destructor, }; -static void inet6_sk_rx_dst_set(struct sock *sk, const struct sk_buff *skb) -{ - struct dst_entry *dst = skb_dst(skb); - const struct rt6_info *rt = (const struct rt6_info *)dst; - - dst_hold(dst); - sk->sk_rx_dst = dst; - inet_sk(sk)->rx_dst_ifindex = skb->skb_iif; - if (rt->rt6i_node) - inet6_sk(sk)->rx_dst_cookie = rt->rt6i_node->fn_sernum; -} - static const struct inet_connection_sock_af_ops ipv6_specific = { .queue_xmit = inet6_csk_xmit, .send_check = tcp_v6_send_check, -- cgit v1.2.3 From 0115e8e30d6fcdd4b8faa30d3ffd90859a591f51 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Aug 2012 17:19:46 +0000 Subject: net: remove delay at device dismantle I noticed extra one second delay in device dismantle, tracked down to a call to dst_dev_event() while some call_rcu() are still in RCU queues. These call_rcu() were posted by rt_free(struct rtable *rt) calls. We then wait a little (but one second) in netdev_wait_allrefs() before kicking again NETDEV_UNREGISTER. As the call_rcu() are now completed, dst_dev_event() can do the needed device swap on busy dst. To solve this problem, add a new NETDEV_UNREGISTER_FINAL, called after a rcu_barrier(), but outside of RTNL lock. Use NETDEV_UNREGISTER_FINAL with care ! Change dst_dev_event() handler to react to NETDEV_UNREGISTER_FINAL Also remove NETDEV_UNREGISTER_BATCH, as its not used anymore after IP cache removal. With help from Gao feng Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Mahesh Bandewar Cc: "Eric W. Biederman" Cc: Gao feng Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- net/core/dev.c | 22 ++++++++-------------- net/core/dst.c | 2 +- net/core/fib_rules.c | 3 ++- net/core/rtnetlink.c | 2 +- net/ipv4/devinet.c | 6 +++++- net/ipv4/fib_frontend.c | 8 ++++---- net/ipv6/addrconf.c | 6 +++++- 8 files changed, 27 insertions(+), 24 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4936f09a9333..9ad7fa8c10e0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1553,7 +1553,7 @@ struct packet_type { #define NETDEV_PRE_TYPE_CHANGE 0x000E #define NETDEV_POST_TYPE_CHANGE 0x000F #define NETDEV_POST_INIT 0x0010 -#define NETDEV_UNREGISTER_BATCH 0x0011 +#define NETDEV_UNREGISTER_FINAL 0x0011 #define NETDEV_RELEASE 0x0012 #define NETDEV_NOTIFY_PEERS 0x0013 #define NETDEV_JOIN 0x0014 diff --git a/net/core/dev.c b/net/core/dev.c index 088923fe4066..0640d2a859c6 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1406,7 +1406,6 @@ rollback: nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); - nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); } } @@ -1448,7 +1447,6 @@ int unregister_netdevice_notifier(struct notifier_block *nb) nb->notifier_call(nb, NETDEV_DOWN, dev); } nb->notifier_call(nb, NETDEV_UNREGISTER, dev); - nb->notifier_call(nb, NETDEV_UNREGISTER_BATCH, dev); } } unlock: @@ -1468,7 +1466,8 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - ASSERT_RTNL(); + if (val != NETDEV_UNREGISTER_FINAL) + ASSERT_RTNL(); return raw_notifier_call_chain(&netdev_chain, val, dev); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -5331,10 +5330,6 @@ static void rollback_registered_many(struct list_head *head) netdev_unregister_kobject(dev); } - /* Process any work delayed until the end of the batch */ - dev = list_first_entry(head, struct net_device, unreg_list); - call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); - synchronize_net(); list_for_each_entry(dev, head, unreg_list) @@ -5787,9 +5782,8 @@ static void netdev_wait_allrefs(struct net_device *dev) /* Rebroadcast unregister notification */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - /* don't resend NETDEV_UNREGISTER_BATCH, _BATCH users - * should have already handle it the first time */ - + rcu_barrier(); + call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { /* We must not have linkwatch events @@ -5851,9 +5845,8 @@ void netdev_run_todo(void) __rtnl_unlock(); - /* Wait for rcu callbacks to finish before attempting to drain - * the device list. This usually avoids a 250ms wait. - */ + + /* Wait for rcu callbacks to finish before next phase */ if (!list_empty(&list)) rcu_barrier(); @@ -5862,6 +5855,8 @@ void netdev_run_todo(void) = list_first_entry(&list, struct net_device, todo_list); list_del(&dev->todo_list); + call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); + if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { pr_err("network todo '%s' but state %d\n", dev->name, dev->reg_state); @@ -6256,7 +6251,6 @@ int dev_change_net_namespace(struct net_device *dev, struct net *net, const char the device is just moving and can keep their slaves up. */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); - call_netdevice_notifiers(NETDEV_UNREGISTER_BATCH, dev); rtmsg_ifinfo(RTM_DELLINK, dev, ~0U); /* diff --git a/net/core/dst.c b/net/core/dst.c index 56d63612e1e4..f6593d238e9a 100644 --- a/net/core/dst.c +++ b/net/core/dst.c @@ -374,7 +374,7 @@ static int dst_dev_event(struct notifier_block *this, unsigned long event, struct dst_entry *dst, *last = NULL; switch (event) { - case NETDEV_UNREGISTER: + case NETDEV_UNREGISTER_FINAL: case NETDEV_DOWN: mutex_lock(&dst_gc_mutex); for (dst = dst_busy_list; dst; dst = dst->next) { diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ab7db83236c9..585093755c23 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -711,15 +711,16 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, struct net *net = dev_net(dev); struct fib_rules_ops *ops; - ASSERT_RTNL(); switch (event) { case NETDEV_REGISTER: + ASSERT_RTNL(); list_for_each_entry(ops, &net->rules_ops, list) attach_rules(&ops->rules_list, dev); break; case NETDEV_UNREGISTER: + ASSERT_RTNL(); list_for_each_entry(ops, &net->rules_ops, list) detach_rules(&ops->rules_list, dev); break; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 34d975b0f277..c64efcff8078 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2358,7 +2358,7 @@ static int rtnetlink_event(struct notifier_block *this, unsigned long event, voi case NETDEV_PRE_TYPE_CHANGE: case NETDEV_GOING_DOWN: case NETDEV_UNREGISTER: - case NETDEV_UNREGISTER_BATCH: + case NETDEV_UNREGISTER_FINAL: case NETDEV_RELEASE: case NETDEV_JOIN: break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index adf273f8ad2e..6a5e6e4b142c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1147,8 +1147,12 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct in_device *in_dev; + if (event == NETDEV_UNREGISTER_FINAL) + goto out; + + in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); if (!in_dev) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index 7f073a38c87d..fd7d9ae64f16 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1041,7 +1041,7 @@ static int fib_inetaddr_event(struct notifier_block *this, unsigned long event, static int fib_netdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev = __in_dev_get_rtnl(dev); + struct in_device *in_dev; struct net *net = dev_net(dev); if (event == NETDEV_UNREGISTER) { @@ -1050,9 +1050,11 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo return NOTIFY_DONE; } - if (!in_dev) + if (event == NETDEV_UNREGISTER_FINAL) return NOTIFY_DONE; + in_dev = __in_dev_get_rtnl(dev); + switch (event) { case NETDEV_UP: for_ifa(in_dev) { @@ -1071,8 +1073,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo case NETDEV_CHANGE: rt_cache_flush(dev_net(dev), 0); break; - case NETDEV_UNREGISTER_BATCH: - break; } return NOTIFY_DONE; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6bc85f7c31e3..e581009cb09e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2566,10 +2566,14 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, void *data) { struct net_device *dev = (struct net_device *) data; - struct inet6_dev *idev = __in6_dev_get(dev); + struct inet6_dev *idev; int run_pending = 0; int err; + if (event == NETDEV_UNREGISTER_FINAL) + return NOTIFY_DONE; + + idev = __in6_dev_get(dev); switch (event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { -- cgit v1.2.3 From b87fb39e399137257a6db3224ea854117e9486e9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 19 Aug 2012 03:47:30 +0000 Subject: ipv6: gre: fix ip6gre_err() ip6gre_err() miscomputes grehlen (sizeof(ipv6h) is 4 or 8, not 40 as expected), and should take into account 'offset' parameter. Also uses pskb_may_pull() to cope with some fragged skbs Signed-off-by: Eric Dumazet Cc: Dmitry Kozlov Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index a84ad5dc4fcf..424d11a4e7ff 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -415,8 +415,8 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, u8 type, u8 code, int offset, __be32 info) { const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb->data; - __be16 *p = (__be16 *)(ipv6h + 1); - int grehlen = sizeof(ipv6h) + 4; + __be16 *p = (__be16 *)(skb->data + offset); + int grehlen = offset + 4; struct ip6_tnl *t; __be16 flags; @@ -432,8 +432,10 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } /* If only 8 bytes returned, keyed message will be dropped here */ - if (skb_headlen(skb) < grehlen) + if (!pskb_may_pull(skb, grehlen)) return; + ipv6h = (const struct ipv6hdr *)skb->data; + p = (__be16 *)(skb->data + offset); rcu_read_lock(); -- cgit v1.2.3 From 748e2d9396a18c3fd3d07d47c1b41320acf1fbf4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 22 Aug 2012 21:50:59 +0000 Subject: net: reinstate rtnl in call_netdevice_notifiers() Eric Biederman pointed out that not holding RTNL while calling call_netdevice_notifiers() was racy. This patch is a direct transcription his feedback against commit 0115e8e30d6fc (net: remove delay at device dismantle) Thanks Eric ! Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Mahesh Bandewar Cc: "Eric W. Biederman" Cc: Gao feng Acked-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/core/dev.c | 9 +++++++-- net/core/fib_rules.c | 3 +-- net/ipv4/devinet.c | 6 +----- net/ipv4/fib_frontend.c | 7 ++----- net/ipv6/addrconf.c | 6 +----- 5 files changed, 12 insertions(+), 19 deletions(-) (limited to 'net/ipv6') diff --git a/net/core/dev.c b/net/core/dev.c index 0640d2a859c6..bc857fead8c8 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1466,8 +1466,7 @@ EXPORT_SYMBOL(unregister_netdevice_notifier); int call_netdevice_notifiers(unsigned long val, struct net_device *dev) { - if (val != NETDEV_UNREGISTER_FINAL) - ASSERT_RTNL(); + ASSERT_RTNL(); return raw_notifier_call_chain(&netdev_chain, val, dev); } EXPORT_SYMBOL(call_netdevice_notifiers); @@ -5782,7 +5781,11 @@ static void netdev_wait_allrefs(struct net_device *dev) /* Rebroadcast unregister notification */ call_netdevice_notifiers(NETDEV_UNREGISTER, dev); + + __rtnl_unlock(); rcu_barrier(); + rtnl_lock(); + call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); if (test_bit(__LINK_STATE_LINKWATCH_PENDING, &dev->state)) { @@ -5855,7 +5858,9 @@ void netdev_run_todo(void) = list_first_entry(&list, struct net_device, todo_list); list_del(&dev->todo_list); + rtnl_lock(); call_netdevice_notifiers(NETDEV_UNREGISTER_FINAL, dev); + __rtnl_unlock(); if (unlikely(dev->reg_state != NETREG_UNREGISTERING)) { pr_err("network todo '%s' but state %d\n", diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index 585093755c23..ab7db83236c9 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -711,16 +711,15 @@ static int fib_rules_event(struct notifier_block *this, unsigned long event, struct net *net = dev_net(dev); struct fib_rules_ops *ops; + ASSERT_RTNL(); switch (event) { case NETDEV_REGISTER: - ASSERT_RTNL(); list_for_each_entry(ops, &net->rules_ops, list) attach_rules(&ops->rules_list, dev); break; case NETDEV_UNREGISTER: - ASSERT_RTNL(); list_for_each_entry(ops, &net->rules_ops, list) detach_rules(&ops->rules_list, dev); break; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 6a5e6e4b142c..adf273f8ad2e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -1147,12 +1147,8 @@ static int inetdev_event(struct notifier_block *this, unsigned long event, void *ptr) { struct net_device *dev = ptr; - struct in_device *in_dev; - - if (event == NETDEV_UNREGISTER_FINAL) - goto out; + struct in_device *in_dev = __in_dev_get_rtnl(dev); - in_dev = __in_dev_get_rtnl(dev); ASSERT_RTNL(); if (!in_dev) { diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index fd7d9ae64f16..acdee325d972 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -1050,9 +1050,6 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo return NOTIFY_DONE; } - if (event == NETDEV_UNREGISTER_FINAL) - return NOTIFY_DONE; - in_dev = __in_dev_get_rtnl(dev); switch (event) { @@ -1064,14 +1061,14 @@ static int fib_netdev_event(struct notifier_block *this, unsigned long event, vo fib_sync_up(dev); #endif atomic_inc(&net->ipv4.dev_addr_genid); - rt_cache_flush(dev_net(dev), -1); + rt_cache_flush(net, -1); break; case NETDEV_DOWN: fib_disable_ip(dev, 0, 0); break; case NETDEV_CHANGEMTU: case NETDEV_CHANGE: - rt_cache_flush(dev_net(dev), 0); + rt_cache_flush(net, 0); break; } return NOTIFY_DONE; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e581009cb09e..6bc85f7c31e3 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -2566,14 +2566,10 @@ static int addrconf_notify(struct notifier_block *this, unsigned long event, void *data) { struct net_device *dev = (struct net_device *) data; - struct inet6_dev *idev; + struct inet6_dev *idev = __in6_dev_get(dev); int run_pending = 0; int err; - if (event == NETDEV_UNREGISTER_FINAL) - return NOTIFY_DONE; - - idev = __in6_dev_get(dev); switch (event) { case NETDEV_REGISTER: if (!idev && dev->mtu >= IPV6_MIN_MTU) { -- cgit v1.2.3 From 4cdd34084d539c758d00c5dc7bf95db2e4f2bc70 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:13:58 +0200 Subject: netfilter: nf_conntrack_ipv6: improve fragmentation handling The IPv6 conntrack fragmentation currently has a couple of shortcomings. Fragmentes are collected in PREROUTING/OUTPUT, are defragmented, the defragmented packet is then passed to conntrack, the resulting conntrack information is attached to each original fragment and the fragments then continue their way through the stack. Helper invocation occurs in the POSTROUTING hook, at which point only the original fragments are available. The result of this is that fragmented packets are never passed to helpers. This patch improves the situation in the following way: - If a reassembled packet belongs to a connection that has a helper assigned, the reassembled packet is passed through the stack instead of the original fragments. - During defragmentation, the largest received fragment size is stored. On output, the packet is refragmented if required. If the largest received fragment size exceeds the outgoing MTU, a "packet too big" message is generated, thus behaving as if the original fragments were passed through the stack from an outside point of view. - The ipv6_helper() hook function can't receive fragments anymore for connections using a helper, so it is switched to use ipv6_skip_exthdr() instead of the netfilter specific nf_ct_ipv6_skip_exthdr() and the reassembled packets are passed to connection tracking helpers. The result of this is that we can properly track fragmented packets, but still generate ICMPv6 Packet too big messages if we would have before. This patch is also required as a precondition for IPv6 NAT, where NAT helpers might enlarge packets up to a point that they require fragmentation. In that case we can't generate Packet too big messages since the proper MTU can't be calculated in all cases (f.i. when changing textual representation of a variable amount of addresses), so the packet is transparently fragmented iff the original packet or fragments would have fit the outgoing MTU. IPVS parts by Jesper Dangaard Brouer . Signed-off-by: Patrick McHardy --- include/linux/ipv6.h | 1 + net/ipv6/ip6_output.c | 7 +++-- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 41 ++++++++++++++++++++------ net/ipv6/netfilter/nf_conntrack_reasm.c | 19 ++++++++++-- net/netfilter/ipvs/ip_vs_xmit.c | 9 +++++- 5 files changed, 62 insertions(+), 15 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 879db26ec401..0b94e91ed685 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -256,6 +256,7 @@ struct inet6_skb_parm { #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) __u16 dsthao; #endif + __u16 frag_max_size; #define IP6SKB_XFRM_TRANSFORMED 1 #define IP6SKB_FORWARDED 2 diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 5b2d63ed793e..a4f6263fddca 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -493,7 +493,8 @@ int ip6_forward(struct sk_buff *skb) if (mtu < IPV6_MIN_MTU) mtu = IPV6_MIN_MTU; - if (skb->len > mtu && !skb_is_gso(skb)) { + if ((!skb->local_df && skb->len > mtu && !skb_is_gso(skb)) || + (IP6CB(skb)->frag_max_size && IP6CB(skb)->frag_max_size > mtu)) { /* Again, force OUTPUT device used as source address */ skb->dev = dst->dev; icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu); @@ -636,7 +637,9 @@ int ip6_fragment(struct sk_buff *skb, int (*output)(struct sk_buff *)) /* We must not fragment if the socket is set to force MTU discovery * or if the skb it not generated by a local socket. */ - if (unlikely(!skb->local_df && skb->len > mtu)) { + if (unlikely(!skb->local_df && skb->len > mtu) || + (IP6CB(skb)->frag_max_size && + IP6CB(skb)->frag_max_size > mtu)) { if (skb->sk && dst_allfrag(skb_dst(skb))) sk_nocaps_add(skb->sk, NETIF_F_GSO_MASK); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 4794f96cf2e0..521ddca876f8 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -153,10 +153,10 @@ static unsigned int ipv6_helper(unsigned int hooknum, const struct nf_conn_help *help; const struct nf_conntrack_helper *helper; enum ip_conntrack_info ctinfo; - unsigned int ret, protoff; - unsigned int extoff = (u8 *)(ipv6_hdr(skb) + 1) - skb->data; - unsigned char pnum = ipv6_hdr(skb)->nexthdr; - + unsigned int ret; + __be16 frag_off; + int protoff; + u8 nexthdr; /* This is where we call the helper: as the packet goes out. */ ct = nf_ct_get(skb, &ctinfo); @@ -171,9 +171,10 @@ static unsigned int ipv6_helper(unsigned int hooknum, if (!helper) return NF_ACCEPT; - protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, - skb->len - extoff); - if (protoff > skb->len || pnum == NEXTHDR_FRAGMENT) { + nexthdr = ipv6_hdr(skb)->nexthdr; + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &nexthdr, + &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("proto header not found\n"); return NF_ACCEPT; } @@ -199,9 +200,14 @@ static unsigned int ipv6_confirm(unsigned int hooknum, static unsigned int __ipv6_conntrack_in(struct net *net, unsigned int hooknum, struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, int (*okfn)(struct sk_buff *)) { struct sk_buff *reasm = skb->nfct_reasm; + const struct nf_conn_help *help; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; /* This packet is fragmented and has reassembled packet. */ if (reasm) { @@ -213,6 +219,23 @@ static unsigned int __ipv6_conntrack_in(struct net *net, if (ret != NF_ACCEPT) return ret; } + + /* Conntrack helpers need the entire reassembled packet in the + * POST_ROUTING hook. + */ + ct = nf_ct_get(reasm, &ctinfo); + if (ct != NULL && !nf_ct_is_untracked(ct)) { + help = nfct_help(ct); + if (help && help->helper) { + nf_conntrack_get_reasm(skb); + NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, + (struct net_device *)in, + (struct net_device *)out, + okfn, NF_IP6_PRI_CONNTRACK + 1); + return NF_DROP_ERR(-ECANCELED); + } + } + nf_conntrack_get(reasm->nfct); skb->nfct = reasm->nfct; skb->nfctinfo = reasm->nfctinfo; @@ -228,7 +251,7 @@ static unsigned int ipv6_conntrack_in(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { - return __ipv6_conntrack_in(dev_net(in), hooknum, skb, okfn); + return __ipv6_conntrack_in(dev_net(in), hooknum, skb, in, out, okfn); } static unsigned int ipv6_conntrack_local(unsigned int hooknum, @@ -242,7 +265,7 @@ static unsigned int ipv6_conntrack_local(unsigned int hooknum, net_notice_ratelimited("ipv6_conntrack_local: packet too short\n"); return NF_ACCEPT; } - return __ipv6_conntrack_in(dev_net(out), hooknum, skb, okfn); + return __ipv6_conntrack_in(dev_net(out), hooknum, skb, in, out, okfn); } static struct nf_hook_ops ipv6_conntrack_ops[] __read_mostly = { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index c9c78c2e666b..f94fb3ac2a79 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -190,6 +190,7 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; + unsigned int payload_len; int offset, end; if (fq->q.last_in & INET_FRAG_COMPLETE) { @@ -197,8 +198,10 @@ static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, goto err; } + payload_len = ntohs(ipv6_hdr(skb)->payload_len); + offset = ntohs(fhdr->frag_off) & ~0x7; - end = offset + (ntohs(ipv6_hdr(skb)->payload_len) - + end = offset + (payload_len - ((u8 *)(fhdr + 1) - (u8 *)(ipv6_hdr(skb) + 1))); if ((unsigned int)end > IPV6_MAXPLEN) { @@ -307,6 +310,8 @@ found: skb->dev = NULL; fq->q.stamp = skb->tstamp; fq->q.meat += skb->len; + if (payload_len > fq->q.max_size) + fq->q.max_size = payload_len; atomic_add(skb->truesize, &nf_init_frags.mem); /* The first fragment. @@ -412,10 +417,12 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) } atomic_sub(head->truesize, &nf_init_frags.mem); + head->local_df = 1; head->next = NULL; head->dev = dev; head->tstamp = fq->q.stamp; ipv6_hdr(head)->payload_len = htons(payload_len); + IP6CB(head)->frag_max_size = sizeof(struct ipv6hdr) + fq->q.max_size; /* Yes, and fold redundant checksum back. 8) */ if (head->ip_summed == CHECKSUM_COMPLETE) @@ -592,6 +599,7 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, int (*okfn)(struct sk_buff *)) { struct sk_buff *s, *s2; + unsigned int ret = 0; for (s = NFCT_FRAG6_CB(skb)->orig; s;) { nf_conntrack_put_reasm(s->nfct_reasm); @@ -601,8 +609,13 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, s2 = s->next; s->next = NULL; - NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, in, out, okfn, - NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + if (ret != -ECANCELED) + ret = NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, s, + in, out, okfn, + NF_IP6_PRI_CONNTRACK_DEFRAG + 1); + else + kfree_skb(s); + s = s2; } nf_conntrack_put_reasm(skb); diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index 67a39786b0a1..56f6d5d81a77 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -88,7 +88,14 @@ __ip_vs_dst_check(struct ip_vs_dest *dest, u32 rtos) static inline bool __mtu_check_toobig_v6(const struct sk_buff *skb, u32 mtu) { - if (skb->len > mtu && !skb_is_gso(skb)) { + if (IP6CB(skb)->frag_max_size) { + /* frag_max_size tell us that, this packet have been + * defragmented by netfilter IPv6 conntrack module. + */ + if (IP6CB(skb)->frag_max_size > mtu) + return true; /* largest fragment violate MTU */ + } + else if (skb->len > mtu && !skb_is_gso(skb)) { return true; /* Packet size violate MTU size */ } return false; -- cgit v1.2.3 From 2b60af017880f7dc35d1fac65f48fc94f8a3c1ec Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:13:59 +0200 Subject: netfilter: nf_conntrack_ipv6: fix tracking of ICMPv6 error messages containing fragments ICMPv6 error messages are tracked by extracting the conntrack tuple of the inner packet and looking up the corresponding conntrack entry. Tuple extraction uses the ->get_l4proto() callback, which in case of fragments returns NEXTHDR_FRAGMENT instead of the upper protocol, even for the first fragment when the entire next header is present, resulting in a failure to find the correct connection tracking entry. This patch changes ipv6_get_l4proto() to use ipv6_skip_exthdr() instead of nf_ct_ipv6_skip_exthdr() in order to skip fragment headers when the fragment offset is zero. Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 63 +++----------------------- 1 file changed, 6 insertions(+), 57 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index 521ddca876f8..dcf6010f68d9 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -64,82 +64,31 @@ static int ipv6_print_tuple(struct seq_file *s, tuple->src.u3.ip6, tuple->dst.u3.ip6); } -/* - * Based on ipv6_skip_exthdr() in net/ipv6/exthdr.c - * - * This function parses (probably truncated) exthdr set "hdr" - * of length "len". "nexthdrp" initially points to some place, - * where type of the first header can be found. - * - * It skips all well-known exthdrs, and returns pointer to the start - * of unparsable area i.e. the first header with unknown type. - * if success, *nexthdr is updated by type/protocol of this header. - * - * NOTES: - it may return pointer pointing beyond end of packet, - * if the last recognized header is truncated in the middle. - * - if packet is truncated, so that all parsed headers are skipped, - * it returns -1. - * - if packet is fragmented, return pointer of the fragment header. - * - ESP is unparsable for now and considered like - * normal payload protocol. - * - Note also special handling of AUTH header. Thanks to IPsec wizards. - */ - -static int nf_ct_ipv6_skip_exthdr(const struct sk_buff *skb, int start, - u8 *nexthdrp, int len) -{ - u8 nexthdr = *nexthdrp; - - while (ipv6_ext_hdr(nexthdr)) { - struct ipv6_opt_hdr hdr; - int hdrlen; - - if (len < (int)sizeof(struct ipv6_opt_hdr)) - return -1; - if (nexthdr == NEXTHDR_NONE) - break; - if (nexthdr == NEXTHDR_FRAGMENT) - break; - if (skb_copy_bits(skb, start, &hdr, sizeof(hdr))) - BUG(); - if (nexthdr == NEXTHDR_AUTH) - hdrlen = (hdr.hdrlen+2)<<2; - else - hdrlen = ipv6_optlen(&hdr); - - nexthdr = hdr.nexthdr; - len -= hdrlen; - start += hdrlen; - } - - *nexthdrp = nexthdr; - return start; -} - static int ipv6_get_l4proto(const struct sk_buff *skb, unsigned int nhoff, unsigned int *dataoff, u_int8_t *protonum) { unsigned int extoff = nhoff + sizeof(struct ipv6hdr); - unsigned char pnum; + __be16 frag_off; int protoff; + u8 nexthdr; if (skb_copy_bits(skb, nhoff + offsetof(struct ipv6hdr, nexthdr), - &pnum, sizeof(pnum)) != 0) { + &nexthdr, sizeof(nexthdr)) != 0) { pr_debug("ip6_conntrack_core: can't get nexthdr\n"); return -NF_ACCEPT; } - protoff = nf_ct_ipv6_skip_exthdr(skb, extoff, &pnum, skb->len - extoff); + protoff = ipv6_skip_exthdr(skb, extoff, &nexthdr, &frag_off); /* * (protoff == skb->len) mean that the packet doesn't have no data * except of IPv6 & ext headers. but it's tracked anyway. - YK */ - if ((protoff < 0) || (protoff > skb->len)) { + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { pr_debug("ip6_conntrack_core: can't find proto in pkt\n"); return -NF_ACCEPT; } *dataoff = protoff; - *protonum = pnum; + *protonum = nexthdr; return NF_ACCEPT; } -- cgit v1.2.3 From 0ad352cb433ed2b05921a32b5ee20410512e2320 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:08 +0200 Subject: netfilter: ipv6: expand skb head in ip6_route_me_harder after oif change Expand the skb headroom if the oif changed due to rerouting similar to how IPv4 packets are handled. Signed-off-by: Patrick McHardy --- net/ipv6/netfilter.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter.c b/net/ipv6/netfilter.c index db31561cc8df..429089cb073d 100644 --- a/net/ipv6/netfilter.c +++ b/net/ipv6/netfilter.c @@ -15,6 +15,7 @@ int ip6_route_me_harder(struct sk_buff *skb) { struct net *net = dev_net(skb_dst(skb)->dev); const struct ipv6hdr *iph = ipv6_hdr(skb); + unsigned int hh_len; struct dst_entry *dst; struct flowi6 fl6 = { .flowi6_oif = skb->sk ? skb->sk->sk_bound_dev_if : 0, @@ -47,6 +48,13 @@ int ip6_route_me_harder(struct sk_buff *skb) } #endif + /* Change in oif may mean change in hh_len. */ + hh_len = skb_dst(skb)->dev->hard_header_len; + if (skb_headroom(skb) < hh_len && + pskb_expand_head(skb, HH_DATA_ALIGN(hh_len - skb_headroom(skb)), + 0, GFP_ATOMIC)) + return -1; + return 0; } EXPORT_SYMBOL(ip6_route_me_harder); -- cgit v1.2.3 From 58a317f1061c894d2344c0b6a18ab4a64b69b815 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:12 +0200 Subject: netfilter: ipv6: add IPv6 NAT support Signed-off-by: Patrick McHardy --- include/linux/netfilter/nfnetlink_conntrack.h | 2 + include/net/netfilter/nf_nat_l3proto.h | 5 + include/net/netfilter/nf_nat_l4proto.h | 1 + include/net/netns/ipv6.h | 1 + net/core/secure_seq.c | 1 + net/ipv6/netfilter/Kconfig | 12 + net/ipv6/netfilter/Makefile | 4 + net/ipv6/netfilter/ip6table_nat.c | 321 +++++++++++++++++++++++++ net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c | 37 ++- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 287 ++++++++++++++++++++++ net/ipv6/netfilter/nf_nat_proto_icmpv6.c | 90 +++++++ net/netfilter/nf_nat_core.c | 2 + net/netfilter/xt_nat.c | 3 + 13 files changed, 764 insertions(+), 2 deletions(-) create mode 100644 net/ipv6/netfilter/ip6table_nat.c create mode 100644 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c create mode 100644 net/ipv6/netfilter/nf_nat_proto_icmpv6.c (limited to 'net/ipv6') diff --git a/include/linux/netfilter/nfnetlink_conntrack.h b/include/linux/netfilter/nfnetlink_conntrack.h index 68920eab287c..43bfe3e1685b 100644 --- a/include/linux/netfilter/nfnetlink_conntrack.h +++ b/include/linux/netfilter/nfnetlink_conntrack.h @@ -147,6 +147,8 @@ enum ctattr_nat { CTA_NAT_V4_MAXIP, #define CTA_NAT_MAXIP CTA_NAT_V4_MAXIP CTA_NAT_PROTO, + CTA_NAT_V6_MINIP, + CTA_NAT_V6_MAXIP, __CTA_NAT_MAX }; #define CTA_NAT_MAX (__CTA_NAT_MAX - 1) diff --git a/include/net/netfilter/nf_nat_l3proto.h b/include/net/netfilter/nf_nat_l3proto.h index beed96961fa7..bd3b97e02c82 100644 --- a/include/net/netfilter/nf_nat_l3proto.h +++ b/include/net/netfilter/nf_nat_l3proto.h @@ -43,5 +43,10 @@ extern int nf_nat_icmp_reply_translation(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, unsigned int hooknum); +extern int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + unsigned int hdrlen); #endif /* _NF_NAT_L3PROTO_H */ diff --git a/include/net/netfilter/nf_nat_l4proto.h b/include/net/netfilter/nf_nat_l4proto.h index 1f0a4f018fcf..24feb68d1bcc 100644 --- a/include/net/netfilter/nf_nat_l4proto.h +++ b/include/net/netfilter/nf_nat_l4proto.h @@ -51,6 +51,7 @@ extern const struct nf_nat_l4proto *__nf_nat_l4proto_find(u8 l3proto, u8 l4proto extern const struct nf_nat_l4proto nf_nat_l4proto_tcp; extern const struct nf_nat_l4proto nf_nat_l4proto_udp; extern const struct nf_nat_l4proto nf_nat_l4proto_icmp; +extern const struct nf_nat_l4proto nf_nat_l4proto_icmpv6; extern const struct nf_nat_l4proto nf_nat_l4proto_unknown; extern bool nf_nat_l4proto_in_range(const struct nf_conntrack_tuple *tuple, diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index df0a5456a3fd..0318104a9458 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -42,6 +42,7 @@ struct netns_ipv6 { #ifdef CONFIG_SECURITY struct xt_table *ip6table_security; #endif + struct xt_table *ip6table_nat; #endif struct rt6_info *ip6_null_entry; struct rt6_statistics *rt6_stats; diff --git a/net/core/secure_seq.c b/net/core/secure_seq.c index 99b2596531bb..e61a8bb7fce7 100644 --- a/net/core/secure_seq.c +++ b/net/core/secure_seq.c @@ -76,6 +76,7 @@ u32 secure_ipv6_port_ephemeral(const __be32 *saddr, const __be32 *daddr, return hash[0]; } +EXPORT_SYMBOL(secure_ipv6_port_ephemeral); #endif #ifdef CONFIG_INET diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 10135342799e..b27e0ad4b738 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,6 +25,18 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. +config NF_NAT_IPV6 + tristate "IPv6 NAT" + depends on NF_CONNTRACK_IPV6 + depends on NETFILTER_ADVANCED + select NF_NAT + help + The IPv6 NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. It is controlled by + the `nat' table in ip6tables, see the man page for ip6tables(8). + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 534d3f216f7b..76779376da4c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_IP6_NF_FILTER) += ip6table_filter.o obj-$(CONFIG_IP6_NF_MANGLE) += ip6table_mangle.o obj-$(CONFIG_IP6_NF_RAW) += ip6table_raw.o obj-$(CONFIG_IP6_NF_SECURITY) += ip6table_security.o +obj-$(CONFIG_NF_NAT_IPV6) += ip6table_nat.o # objects for l3 independent conntrack nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o @@ -15,6 +16,9 @@ nf_conntrack_ipv6-y := nf_conntrack_l3proto_ipv6.o nf_conntrack_proto_icmpv6.o # l3 independent conntrack obj-$(CONFIG_NF_CONNTRACK_IPV6) += nf_conntrack_ipv6.o nf_defrag_ipv6.o +nf_nat_ipv6-y := nf_nat_l3proto_ipv6.o nf_nat_proto_icmpv6.o +obj-$(CONFIG_NF_NAT_IPV6) += nf_nat_ipv6.o + # defrag nf_defrag_ipv6-y := nf_defrag_ipv6_hooks.o nf_conntrack_reasm.o obj-$(CONFIG_NF_DEFRAG_IPV6) += nf_defrag_ipv6.o diff --git a/net/ipv6/netfilter/ip6table_nat.c b/net/ipv6/netfilter/ip6table_nat.c new file mode 100644 index 000000000000..e418bd6350a4 --- /dev/null +++ b/net/ipv6/netfilter/ip6table_nat.c @@ -0,0 +1,321 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 NAT code. Development of IPv6 NAT + * funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include + +#include +#include +#include + +static const struct xt_table nf_nat_ipv6_table = { + .name = "nat", + .valid_hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .me = THIS_MODULE, + .af = NFPROTO_IPV6, +}; + +static unsigned int alloc_null_binding(struct nf_conn *ct, unsigned int hooknum) +{ + /* Force range to this IP; let proto decide mapping for + * per-proto parts (hence not IP_NAT_RANGE_PROTO_SPECIFIED). + */ + struct nf_nat_range range; + + range.flags = 0; + pr_debug("Allocating NULL binding for %p (%pI6)\n", ct, + HOOK2MANIP(hooknum) == NF_NAT_MANIP_SRC ? + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.dst.u3.ip6 : + &ct->tuplehash[IP_CT_DIR_REPLY].tuple.src.u3.ip6); + + return nf_nat_setup_info(ct, &range, HOOK2MANIP(hooknum)); +} + +static unsigned int nf_nat_rule_find(struct sk_buff *skb, unsigned int hooknum, + const struct net_device *in, + const struct net_device *out, + struct nf_conn *ct) +{ + struct net *net = nf_ct_net(ct); + unsigned int ret; + + ret = ip6t_do_table(skb, hooknum, in, out, net->ipv6.ip6table_nat); + if (ret == NF_ACCEPT) { + if (!nf_nat_initialized(ct, HOOK2MANIP(hooknum))) + ret = alloc_null_binding(ct, hooknum); + } + return ret; +} + +static unsigned int +nf_nat_ipv6_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + struct nf_conn_nat *nat; + enum nf_nat_manip_type maniptype = HOOK2MANIP(hooknum); + __be16 frag_off; + int hdrlen; + u8 nexthdr; + + ct = nf_ct_get(skb, &ctinfo); + /* Can't track? It's not due to stress, or conntrack would + * have dropped it. Hence it's the user's responsibilty to + * packet filter it out, or implement conntrack/NAT for that + * protocol. 8) --RR + */ + if (!ct) + return NF_ACCEPT; + + /* Don't try to NAT if this packet is not conntracked */ + if (nf_ct_is_untracked(ct)) + return NF_ACCEPT; + + nat = nfct_nat(ct); + if (!nat) { + /* NAT module was loaded late. */ + if (nf_ct_is_confirmed(ct)) + return NF_ACCEPT; + nat = nf_ct_ext_add(ct, NF_CT_EXT_NAT, GFP_ATOMIC); + if (nat == NULL) { + pr_debug("failed to add NAT extension\n"); + return NF_ACCEPT; + } + } + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + nexthdr = ipv6_hdr(skb)->nexthdr; + hdrlen = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, ctinfo, + hooknum, hdrlen)) + return NF_DROP; + else + return NF_ACCEPT; + } + /* Fall thru... (Only ICMPs can be IP_CT_IS_REPLY) */ + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + unsigned int ret; + + ret = nf_nat_rule_find(skb, hooknum, in, out, ct); + if (ret != NF_ACCEPT) + return ret; + } else + pr_debug("Already setup manip %s for ct %p\n", + maniptype == NF_NAT_MANIP_SRC ? "SRC" : "DST", + ct); + break; + + default: + /* ESTABLISHED */ + NF_CT_ASSERT(ctinfo == IP_CT_ESTABLISHED || + ctinfo == IP_CT_ESTABLISHED_REPLY); + } + + return nf_nat_packet(ct, ctinfo, hooknum, skb); +} + +static unsigned int +nf_nat_ipv6_in(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + unsigned int ret; + struct in6_addr daddr = ipv6_hdr(skb)->daddr; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + ipv6_addr_cmp(&daddr, &ipv6_hdr(skb)->daddr)) + skb_dst_drop(skb); + + return ret; +} + +static unsigned int +nf_nat_ipv6_out(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ +#ifdef CONFIG_XFRM + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; +#endif + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct ipv6hdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); +#ifdef CONFIG_XFRM + if (ret != NF_DROP && ret != NF_STOLEN && + !(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.src.u3, + &ct->tuplehash[!dir].tuple.dst.u3) || + (ct->tuplehash[dir].tuple.src.u.all != + ct->tuplehash[!dir].tuple.dst.u.all)) + if (nf_xfrm_me_harder(skb, AF_INET6) < 0) + ret = NF_DROP; + } +#endif + return ret; +} + +static unsigned int +nf_nat_ipv6_local_fn(unsigned int hooknum, + struct sk_buff *skb, + const struct net_device *in, + const struct net_device *out, + int (*okfn)(struct sk_buff *)) +{ + const struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned int ret; + + /* root is playing with raw sockets. */ + if (skb->len < sizeof(struct ipv6hdr)) + return NF_ACCEPT; + + ret = nf_nat_ipv6_fn(hooknum, skb, in, out, okfn); + if (ret != NF_DROP && ret != NF_STOLEN && + (ct = nf_ct_get(skb, &ctinfo)) != NULL) { + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + + if (!nf_inet_addr_cmp(&ct->tuplehash[dir].tuple.dst.u3, + &ct->tuplehash[!dir].tuple.src.u3)) { + if (ip6_route_me_harder(skb)) + ret = NF_DROP; + } +#ifdef CONFIG_XFRM + else if (!(IP6CB(skb)->flags & IP6SKB_XFRM_TRANSFORMED) && + ct->tuplehash[dir].tuple.dst.u.all != + ct->tuplehash[!dir].tuple.src.u.all) + if (nf_xfrm_me_harder(skb, AF_INET6)) + ret = NF_DROP; +#endif + } + return ret; +} + +static struct nf_hook_ops nf_nat_ipv6_ops[] __read_mostly = { + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv6_in, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_PRE_ROUTING, + .priority = NF_IP6_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv6_out, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_POST_ROUTING, + .priority = NF_IP6_PRI_NAT_SRC, + }, + /* Before packet filtering, change destination */ + { + .hook = nf_nat_ipv6_local_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_OUT, + .priority = NF_IP6_PRI_NAT_DST, + }, + /* After packet filtering, change source */ + { + .hook = nf_nat_ipv6_fn, + .owner = THIS_MODULE, + .pf = NFPROTO_IPV6, + .hooknum = NF_INET_LOCAL_IN, + .priority = NF_IP6_PRI_NAT_SRC, + }, +}; + +static int __net_init ip6table_nat_net_init(struct net *net) +{ + struct ip6t_replace *repl; + + repl = ip6t_alloc_initial_table(&nf_nat_ipv6_table); + if (repl == NULL) + return -ENOMEM; + net->ipv6.ip6table_nat = ip6t_register_table(net, &nf_nat_ipv6_table, repl); + kfree(repl); + if (IS_ERR(net->ipv6.ip6table_nat)) + return PTR_ERR(net->ipv6.ip6table_nat); + return 0; +} + +static void __net_exit ip6table_nat_net_exit(struct net *net) +{ + ip6t_unregister_table(net, net->ipv6.ip6table_nat); +} + +static struct pernet_operations ip6table_nat_net_ops = { + .init = ip6table_nat_net_init, + .exit = ip6table_nat_net_exit, +}; + +static int __init ip6table_nat_init(void) +{ + int err; + + err = register_pernet_subsys(&ip6table_nat_net_ops); + if (err < 0) + goto err1; + + err = nf_register_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + if (err < 0) + goto err2; + return 0; + +err2: + unregister_pernet_subsys(&ip6table_nat_net_ops); +err1: + return err; +} + +static void __exit ip6table_nat_exit(void) +{ + nf_unregister_hooks(nf_nat_ipv6_ops, ARRAY_SIZE(nf_nat_ipv6_ops)); + unregister_pernet_subsys(&ip6table_nat_net_ops); +} + +module_init(ip6table_nat_init); +module_exit(ip6table_nat_exit); + +MODULE_LICENSE("GPL"); diff --git a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c index dcf6010f68d9..8860d23e61cf 100644 --- a/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_conntrack_l3proto_ipv6.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include @@ -142,6 +143,36 @@ static unsigned int ipv6_confirm(unsigned int hooknum, const struct net_device *out, int (*okfn)(struct sk_buff *)) { + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + unsigned char pnum = ipv6_hdr(skb)->nexthdr; + int protoff; + __be16 frag_off; + + ct = nf_ct_get(skb, &ctinfo); + if (!ct || ctinfo == IP_CT_RELATED_REPLY) + goto out; + + protoff = ipv6_skip_exthdr(skb, sizeof(struct ipv6hdr), &pnum, + &frag_off); + if (protoff < 0 || (frag_off & htons(~0x7)) != 0) { + pr_debug("proto header not found\n"); + goto out; + } + + /* adjust seqs for loopback traffic only in outgoing direction */ + if (test_bit(IPS_SEQ_ADJUST_BIT, &ct->status) && + !nf_is_loopback_packet(skb)) { + typeof(nf_nat_seq_adjust_hook) seq_adjust; + + seq_adjust = rcu_dereference(nf_nat_seq_adjust_hook); + if (!seq_adjust || + !seq_adjust(skb, ct, ctinfo, protoff)) { + NF_CT_STAT_INC_ATOMIC(nf_ct_net(ct), drop); + return NF_DROP; + } + } +out: /* We've seen it coming out the other side: confirm it */ return nf_conntrack_confirm(skb); } @@ -170,12 +201,14 @@ static unsigned int __ipv6_conntrack_in(struct net *net, } /* Conntrack helpers need the entire reassembled packet in the - * POST_ROUTING hook. + * POST_ROUTING hook. In case of unconfirmed connections NAT + * might reassign a helper, so the entire packet is also + * required. */ ct = nf_ct_get(reasm, &ctinfo); if (ct != NULL && !nf_ct_is_untracked(ct)) { help = nfct_help(ct); - if (help && help->helper) { + if ((help && help->helper) || !nf_ct_is_confirmed(ct)) { nf_conntrack_get_reasm(skb); NF_HOOK_THRESH(NFPROTO_IPV6, hooknum, reasm, (struct net_device *)in, diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c new file mode 100644 index 000000000000..81a2d1c3da8e --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -0,0 +1,287 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Development of IPv6 NAT funded by Astaro. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include +#include +#include +#include +#include + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv6; + +#ifdef CONFIG_XFRM +static void nf_nat_ipv6_decode_session(struct sk_buff *skb, + const struct nf_conn *ct, + enum ip_conntrack_dir dir, + unsigned long statusbit, + struct flowi *fl) +{ + const struct nf_conntrack_tuple *t = &ct->tuplehash[dir].tuple; + struct flowi6 *fl6 = &fl->u.ip6; + + if (ct->status & statusbit) { + fl6->daddr = t->dst.u3.in6; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl6->fl6_dport = t->dst.u.all; + } + + statusbit ^= IPS_NAT_MASK; + + if (ct->status & statusbit) { + fl6->saddr = t->src.u3.in6; + if (t->dst.protonum == IPPROTO_TCP || + t->dst.protonum == IPPROTO_UDP || + t->dst.protonum == IPPROTO_UDPLITE || + t->dst.protonum == IPPROTO_DCCP || + t->dst.protonum == IPPROTO_SCTP) + fl6->fl6_sport = t->src.u.all; + } +} +#endif + +static bool nf_nat_ipv6_in_range(const struct nf_conntrack_tuple *t, + const struct nf_nat_range *range) +{ + return ipv6_addr_cmp(&t->src.u3.in6, &range->min_addr.in6) >= 0 && + ipv6_addr_cmp(&t->src.u3.in6, &range->max_addr.in6) <= 0; +} + +static u32 nf_nat_ipv6_secure_port(const struct nf_conntrack_tuple *t, + __be16 dport) +{ + return secure_ipv6_port_ephemeral(t->src.u3.ip6, t->dst.u3.ip6, dport); +} + +static bool nf_nat_ipv6_manip_pkt(struct sk_buff *skb, + unsigned int iphdroff, + const struct nf_nat_l4proto *l4proto, + const struct nf_conntrack_tuple *target, + enum nf_nat_manip_type maniptype) +{ + struct ipv6hdr *ipv6h; + __be16 frag_off; + int hdroff; + u8 nexthdr; + + if (!skb_make_writable(skb, iphdroff + sizeof(*ipv6h))) + return false; + + ipv6h = (void *)skb->data + iphdroff; + nexthdr = ipv6h->nexthdr; + hdroff = ipv6_skip_exthdr(skb, iphdroff + sizeof(*ipv6h), + &nexthdr, &frag_off); + if (hdroff < 0) + goto manip_addr; + + if ((frag_off & htons(~0x7)) == 0 && + !l4proto->manip_pkt(skb, &nf_nat_l3proto_ipv6, iphdroff, hdroff, + target, maniptype)) + return false; +manip_addr: + if (maniptype == NF_NAT_MANIP_SRC) + ipv6h->saddr = target->src.u3.in6; + else + ipv6h->daddr = target->dst.u3.in6; + + return true; +} + +static void nf_nat_ipv6_csum_update(struct sk_buff *skb, + unsigned int iphdroff, __sum16 *check, + const struct nf_conntrack_tuple *t, + enum nf_nat_manip_type maniptype) +{ + const struct ipv6hdr *ipv6h = (struct ipv6hdr *)(skb->data + iphdroff); + const struct in6_addr *oldip, *newip; + + if (maniptype == NF_NAT_MANIP_SRC) { + oldip = &ipv6h->saddr; + newip = &t->src.u3.in6; + } else { + oldip = &ipv6h->daddr; + newip = &t->dst.u3.in6; + } + inet_proto_csum_replace16(check, skb, oldip->s6_addr32, + newip->s6_addr32, 1); +} + +static void nf_nat_ipv6_csum_recalc(struct sk_buff *skb, + u8 proto, void *data, __sum16 *check, + int datalen, int oldlen) +{ + const struct ipv6hdr *ipv6h = ipv6_hdr(skb); + struct rt6_info *rt = (struct rt6_info *)skb_dst(skb); + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + if (!(rt->rt6i_flags & RTF_LOCAL) && + (!skb->dev || skb->dev->features & NETIF_F_V6_CSUM)) { + skb->ip_summed = CHECKSUM_PARTIAL; + skb->csum_start = skb_headroom(skb) + + skb_network_offset(skb) + + (data - (void *)skb->data); + skb->csum_offset = (void *)check - data; + *check = ~csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, proto, 0); + } else { + *check = 0; + *check = csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + datalen, proto, + csum_partial(data, datalen, + 0)); + if (proto == IPPROTO_UDP && !*check) + *check = CSUM_MANGLED_0; + } + } else + inet_proto_csum_replace2(check, skb, + htons(oldlen), htons(datalen), 1); +} + +static int nf_nat_ipv6_nlattr_to_range(struct nlattr *tb[], + struct nf_nat_range *range) +{ + if (tb[CTA_NAT_V6_MINIP]) { + nla_memcpy(&range->min_addr.ip6, tb[CTA_NAT_V6_MINIP], + sizeof(struct in6_addr)); + range->flags |= NF_NAT_RANGE_MAP_IPS; + } + + if (tb[CTA_NAT_V6_MAXIP]) + nla_memcpy(&range->max_addr.ip6, tb[CTA_NAT_V6_MAXIP], + sizeof(struct in6_addr)); + else + range->max_addr = range->min_addr; + + return 0; +} + +static const struct nf_nat_l3proto nf_nat_l3proto_ipv6 = { + .l3proto = NFPROTO_IPV6, + .secure_port = nf_nat_ipv6_secure_port, + .in_range = nf_nat_ipv6_in_range, + .manip_pkt = nf_nat_ipv6_manip_pkt, + .csum_update = nf_nat_ipv6_csum_update, + .csum_recalc = nf_nat_ipv6_csum_recalc, + .nlattr_to_range = nf_nat_ipv6_nlattr_to_range, +#ifdef CONFIG_XFRM + .decode_session = nf_nat_ipv6_decode_session, +#endif +}; + +int nf_nat_icmpv6_reply_translation(struct sk_buff *skb, + struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + unsigned int hooknum, + unsigned int hdrlen) +{ + struct { + struct icmp6hdr icmp6; + struct ipv6hdr ip6; + } *inside; + enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo); + enum nf_nat_manip_type manip = HOOK2MANIP(hooknum); + const struct nf_nat_l4proto *l4proto; + struct nf_conntrack_tuple target; + unsigned long statusbit; + + NF_CT_ASSERT(ctinfo == IP_CT_RELATED || ctinfo == IP_CT_RELATED_REPLY); + + if (!skb_make_writable(skb, hdrlen + sizeof(*inside))) + return 0; + if (nf_ip6_checksum(skb, hooknum, hdrlen, IPPROTO_ICMPV6)) + return 0; + + inside = (void *)skb->data + hdrlen; + if (inside->icmp6.icmp6_type == NDISC_REDIRECT) { + if ((ct->status & IPS_NAT_DONE_MASK) != IPS_NAT_DONE_MASK) + return 0; + if (ct->status & IPS_NAT_MASK) + return 0; + } + + if (manip == NF_NAT_MANIP_SRC) + statusbit = IPS_SRC_NAT; + else + statusbit = IPS_DST_NAT; + + /* Invert if this is reply direction */ + if (dir == IP_CT_DIR_REPLY) + statusbit ^= IPS_NAT_MASK; + + if (!(ct->status & statusbit)) + return 1; + + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, inside->ip6.nexthdr); + if (!nf_nat_ipv6_manip_pkt(skb, hdrlen + sizeof(inside->icmp6), + l4proto, &ct->tuplehash[!dir].tuple, !manip)) + return 0; + + if (skb->ip_summed != CHECKSUM_PARTIAL) { + struct ipv6hdr *ipv6h = ipv6_hdr(skb); + inside = (void *)skb->data + hdrlen; + inside->icmp6.icmp6_cksum = 0; + inside->icmp6.icmp6_cksum = + csum_ipv6_magic(&ipv6h->saddr, &ipv6h->daddr, + skb->len - hdrlen, IPPROTO_ICMPV6, + csum_partial(&inside->icmp6, + skb->len - hdrlen, 0)); + } + + nf_ct_invert_tuplepr(&target, &ct->tuplehash[!dir].tuple); + l4proto = __nf_nat_l4proto_find(NFPROTO_IPV6, IPPROTO_ICMPV6); + if (!nf_nat_ipv6_manip_pkt(skb, 0, l4proto, &target, manip)) + return 0; + + return 1; +} +EXPORT_SYMBOL_GPL(nf_nat_icmpv6_reply_translation); + +static int __init nf_nat_l3proto_ipv6_init(void) +{ + int err; + + err = nf_nat_l4proto_register(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); + if (err < 0) + goto err1; + err = nf_nat_l3proto_register(&nf_nat_l3proto_ipv6); + if (err < 0) + goto err2; + return err; + +err2: + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); +err1: + return err; +} + +static void __exit nf_nat_l3proto_ipv6_exit(void) +{ + nf_nat_l3proto_unregister(&nf_nat_l3proto_ipv6); + nf_nat_l4proto_unregister(NFPROTO_IPV6, &nf_nat_l4proto_icmpv6); +} + +MODULE_LICENSE("GPL"); +MODULE_ALIAS("nf-nat-" __stringify(AF_INET6)); + +module_init(nf_nat_l3proto_ipv6_init); +module_exit(nf_nat_l3proto_ipv6_exit); diff --git a/net/ipv6/netfilter/nf_nat_proto_icmpv6.c b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c new file mode 100644 index 000000000000..5d6da784305b --- /dev/null +++ b/net/ipv6/netfilter/nf_nat_proto_icmpv6.c @@ -0,0 +1,90 @@ +/* + * Copyright (c) 2011 Patrick Mchardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 ICMP NAT code. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include + +#include +#include +#include +#include +#include + +static bool +icmpv6_in_range(const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype, + const union nf_conntrack_man_proto *min, + const union nf_conntrack_man_proto *max) +{ + return ntohs(tuple->src.u.icmp.id) >= ntohs(min->icmp.id) && + ntohs(tuple->src.u.icmp.id) <= ntohs(max->icmp.id); +} + +static void +icmpv6_unique_tuple(const struct nf_nat_l3proto *l3proto, + struct nf_conntrack_tuple *tuple, + const struct nf_nat_range *range, + enum nf_nat_manip_type maniptype, + const struct nf_conn *ct) +{ + static u16 id; + unsigned int range_size; + unsigned int i; + + range_size = ntohs(range->max_proto.icmp.id) - + ntohs(range->min_proto.icmp.id) + 1; + + if (!(range->flags & NF_NAT_RANGE_PROTO_SPECIFIED)) + range_size = 0xffff; + + for (i = 0; ; ++id) { + tuple->src.u.icmp.id = htons(ntohs(range->min_proto.icmp.id) + + (id % range_size)); + if (++i == range_size || !nf_nat_used_tuple(tuple, ct)) + return; + } +} + +static bool +icmpv6_manip_pkt(struct sk_buff *skb, + const struct nf_nat_l3proto *l3proto, + unsigned int iphdroff, unsigned int hdroff, + const struct nf_conntrack_tuple *tuple, + enum nf_nat_manip_type maniptype) +{ + struct icmp6hdr *hdr; + + if (!skb_make_writable(skb, hdroff + sizeof(*hdr))) + return false; + + hdr = (struct icmp6hdr *)(skb->data + hdroff); + l3proto->csum_update(skb, iphdroff, &hdr->icmp6_cksum, + tuple, maniptype); + if (hdr->icmp6_code == ICMPV6_ECHO_REQUEST || + hdr->icmp6_code == ICMPV6_ECHO_REPLY) { + inet_proto_csum_replace2(&hdr->icmp6_cksum, skb, + hdr->icmp6_identifier, + tuple->src.u.icmp.id, 0); + hdr->icmp6_identifier = tuple->src.u.icmp.id; + } + return true; +} + +const struct nf_nat_l4proto nf_nat_l4proto_icmpv6 = { + .l4proto = IPPROTO_ICMPV6, + .manip_pkt = icmpv6_manip_pkt, + .in_range = icmpv6_in_range, + .unique_tuple = icmpv6_unique_tuple, +#if defined(CONFIG_NF_CT_NETLINK) || defined(CONFIG_NF_CT_NETLINK_MODULE) + .nlattr_to_range = nf_nat_l4proto_nlattr_to_range, +#endif +}; diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c index c577b753fb9a..29d445235199 100644 --- a/net/netfilter/nf_nat_core.c +++ b/net/netfilter/nf_nat_core.c @@ -696,6 +696,8 @@ static int nfnetlink_parse_nat_proto(struct nlattr *attr, static const struct nla_policy nat_nla_policy[CTA_NAT_MAX+1] = { [CTA_NAT_V4_MINIP] = { .type = NLA_U32 }, [CTA_NAT_V4_MAXIP] = { .type = NLA_U32 }, + [CTA_NAT_V6_MINIP] = { .len = sizeof(struct in6_addr) }, + [CTA_NAT_V6_MAXIP] = { .len = sizeof(struct in6_addr) }, [CTA_NAT_PROTO] = { .type = NLA_NESTED }, }; diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index 7521368a6034..81aafa8e4fef 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -163,5 +163,8 @@ module_init(xt_nat_init); module_exit(xt_nat_exit); MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); MODULE_ALIAS("ipt_SNAT"); MODULE_ALIAS("ipt_DNAT"); +MODULE_ALIAS("ip6t_SNAT"); +MODULE_ALIAS("ip6t_DNAT"); -- cgit v1.2.3 From b3f644fc8232ca761da0b5c5ccb6f30b423c4302 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:14 +0200 Subject: netfilter: ip6tables: add MASQUERADE target Signed-off-by: Patrick McHardy --- include/net/addrconf.h | 2 +- include/net/netfilter/nf_nat.h | 4 +- net/ipv4/netfilter/ipt_MASQUERADE.c | 3 +- net/ipv6/addrconf.c | 2 +- net/ipv6/netfilter/Kconfig | 12 ++++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_MASQUERADE.c | 135 +++++++++++++++++++++++++++++++++++ 7 files changed, 155 insertions(+), 4 deletions(-) create mode 100644 net/ipv6/netfilter/ip6t_MASQUERADE.c (limited to 'net/ipv6') diff --git a/include/net/addrconf.h b/include/net/addrconf.h index 089a09d001d1..9e63e76b20e7 100644 --- a/include/net/addrconf.h +++ b/include/net/addrconf.h @@ -78,7 +78,7 @@ extern struct inet6_ifaddr *ipv6_get_ifaddr(struct net *net, int strict); extern int ipv6_dev_get_saddr(struct net *net, - struct net_device *dev, + const struct net_device *dev, const struct in6_addr *daddr, unsigned int srcprefs, struct in6_addr *saddr); diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index 1752f1339054..bd8eea720f2e 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -43,7 +43,9 @@ struct nf_conn_nat { struct nf_conn *ct; union nf_conntrack_nat_help help; #if defined(CONFIG_IP_NF_TARGET_MASQUERADE) || \ - defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) + defined(CONFIG_IP_NF_TARGET_MASQUERADE_MODULE) || \ + defined(CONFIG_IP6_NF_TARGET_MASQUERADE) || \ + defined(CONFIG_IP6_NF_TARGET_MASQUERADE_MODULE) int masq_index; #endif }; diff --git a/net/ipv4/netfilter/ipt_MASQUERADE.c b/net/ipv4/netfilter/ipt_MASQUERADE.c index 1c3aa28b51ae..5d5d4d1be9c2 100644 --- a/net/ipv4/netfilter/ipt_MASQUERADE.c +++ b/net/ipv4/netfilter/ipt_MASQUERADE.c @@ -99,7 +99,8 @@ device_cmp(struct nf_conn *i, void *ifindex) if (!nat) return 0; - + if (nf_ct_l3num(i) != NFPROTO_IPV4) + return 0; return nat->masq_index == (int)(long)ifindex; } diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index e581009cb09e..19d4bffda9d7 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1093,7 +1093,7 @@ out: return ret; } -int ipv6_dev_get_saddr(struct net *net, struct net_device *dst_dev, +int ipv6_dev_get_saddr(struct net *net, const struct net_device *dst_dev, const struct in6_addr *daddr, unsigned int prefs, struct in6_addr *saddr) { diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index b27e0ad4b738..54a5032ea3df 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -144,6 +144,18 @@ config IP6_NF_TARGET_HL (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_HL. +config IP6_NF_TARGET_MASQUERADE + tristate "MASQUERADE target support" + depends on NF_NAT_IPV6 + help + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 76779376da4c..068bad199587 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -34,4 +34,5 @@ obj-$(CONFIG_IP6_NF_MATCH_RPFILTER) += ip6t_rpfilter.o obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets +obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_MASQUERADE.c b/net/ipv6/netfilter/ip6t_MASQUERADE.c new file mode 100644 index 000000000000..60e9053bab05 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_MASQUERADE.c @@ -0,0 +1,135 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv6 MASQUERADE target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int +masquerade_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + enum ip_conntrack_info ctinfo; + struct in6_addr src; + struct nf_conn *ct; + struct nf_nat_range newrange; + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED || + ctinfo == IP_CT_RELATED_REPLY)); + + if (ipv6_dev_get_saddr(dev_net(par->out), par->out, + &ipv6_hdr(skb)->daddr, 0, &src) < 0) + return NF_DROP; + + nfct_nat(ct)->masq_index = par->out->ifindex; + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = src; + newrange.max_addr.in6 = src; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_SRC); +} + +static int masquerade_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) + return -EINVAL; + return 0; +} + +static int device_cmp(struct nf_conn *ct, void *ifindex) +{ + const struct nf_conn_nat *nat = nfct_nat(ct); + + if (!nat) + return 0; + if (nf_ct_l3num(ct) != NFPROTO_IPV6) + return 0; + return nat->masq_index == (int)(long)ifindex; +} + +static int masq_device_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + const struct net_device *dev = ptr; + struct net *net = dev_net(dev); + + if (event == NETDEV_DOWN) + nf_ct_iterate_cleanup(net, device_cmp, + (void *)(long)dev->ifindex); + + return NOTIFY_DONE; +} + +static struct notifier_block masq_dev_notifier = { + .notifier_call = masq_device_event, +}; + +static int masq_inet_event(struct notifier_block *this, + unsigned long event, void *ptr) +{ + struct inet6_ifaddr *ifa = ptr; + + return masq_device_event(this, event, ifa->idev->dev); +} + +static struct notifier_block masq_inet_notifier = { + .notifier_call = masq_inet_event, +}; + +static struct xt_target masquerade_tg6_reg __read_mostly = { + .name = "MASQUERADE", + .family = NFPROTO_IPV6, + .checkentry = masquerade_tg6_checkentry, + .target = masquerade_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = 1 << NF_INET_POST_ROUTING, + .me = THIS_MODULE, +}; + +static int __init masquerade_tg6_init(void) +{ + int err; + + err = xt_register_target(&masquerade_tg6_reg); + if (err == 0) { + register_netdevice_notifier(&masq_dev_notifier); + register_inet6addr_notifier(&masq_inet_notifier); + } + + return err; +} +static void __exit masquerade_tg6_exit(void) +{ + unregister_inet6addr_notifier(&masq_inet_notifier); + unregister_netdevice_notifier(&masq_dev_notifier); + xt_unregister_target(&masquerade_tg6_reg); +} + +module_init(masquerade_tg6_init); +module_exit(masquerade_tg6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Xtables: automatic address SNAT"); -- cgit v1.2.3 From 115e23ac78f87b632b5406e9d504fd56d17ffef1 Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:16 +0200 Subject: netfilter: ip6tables: add REDIRECT target Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/Kconfig | 11 +++++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_REDIRECT.c | 98 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 110 insertions(+) create mode 100644 net/ipv6/netfilter/ip6t_REDIRECT.c (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 54a5032ea3df..585590f16f8c 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -156,6 +156,17 @@ config IP6_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_REDIRECT + tristate "REDIRECT target support" + depends on NF_NAT_IPV6 + help + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 068bad199587..e30a531d40ce 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -35,4 +35,5 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o +obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_REDIRECT.c b/net/ipv6/netfilter/ip6t_REDIRECT.c new file mode 100644 index 000000000000..60497a3c6004 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_REDIRECT.c @@ -0,0 +1,98 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include + +static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; + +static unsigned int +redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + struct nf_nat_range newrange; + struct in6_addr newdst; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (par->hooknum == NF_INET_LOCAL_OUT) + newdst = loopback_addr; + else { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool addr = false; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + if (idev != NULL) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { + newdst = ifa->addr; + addr = true; + break; + } + } + rcu_read_unlock(); + + if (!addr) + return NF_DROP; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = newdst; + newrange.max_addr.in6 = newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + +static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) + return -EINVAL; + return 0; +} + +static struct xt_target redirect_tg6_reg __read_mostly = { + .name = "REDIRECT", + .family = NFPROTO_IPV6, + .checkentry = redirect_tg6_checkentry, + .target = redirect_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, +}; + +static int __init redirect_tg6_init(void) +{ + return xt_register_target(&redirect_tg6_reg); +} + +static void __exit redirect_tg6_exit(void) +{ + xt_unregister_target(&redirect_tg6_reg); +} + +module_init(redirect_tg6_init); +module_exit(redirect_tg6_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); -- cgit v1.2.3 From ed72d9e294a66fce8f4b4a2f6c8c011b22f1a87c Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:18 +0200 Subject: netfilter: ip6tables: add NETMAP target Signed-off-by: Patrick McHardy --- net/ipv6/netfilter/Kconfig | 10 +++++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_NETMAP.c | 94 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) create mode 100644 net/ipv6/netfilter/ip6t_NETMAP.c (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 585590f16f8c..7bdf73be9a99 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -156,6 +156,16 @@ config IP6_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_NETMAP + tristate "NETMAP target support" + depends on NF_NAT_IPV6 + help + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_TARGET_REDIRECT tristate "REDIRECT target support" depends on NF_NAT_IPV6 diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index e30a531d40ce..0864ce601651 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -35,5 +35,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o +obj-$(CONFIG_IP6_NF_TARGET_NETMAP) += ip6t_NETMAP.o obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_NETMAP.c b/net/ipv6/netfilter/ip6t_NETMAP.c new file mode 100644 index 000000000000..4f3bf360e50f --- /dev/null +++ b/net/ipv6/netfilter/ip6t_NETMAP.c @@ -0,0 +1,94 @@ +/* + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Svenning Soerensen's IPv4 NETMAP target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include + +static unsigned int +netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + struct nf_nat_range newrange; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + union nf_inet_addr new_addr, netmask; + unsigned int i; + + ct = nf_ct_get(skb, &ctinfo); + for (i = 0; i < ARRAY_SIZE(range->min_addr.ip6); i++) + netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ + range->max_addr.ip6[i]); + + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) + new_addr.in6 = ipv6_hdr(skb)->daddr; + else + new_addr.in6 = ipv6_hdr(skb)->saddr; + + for (i = 0; i < ARRAY_SIZE(new_addr.ip6); i++) { + new_addr.ip6[i] &= ~netmask.ip6[i]; + new_addr.ip6[i] |= range->min_addr.ip6[i] & + netmask.ip6[i]; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr = new_addr; + newrange.max_addr = new_addr; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); +} + +static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) + return -EINVAL; + return 0; +} + +static struct xt_target netmap_tg6_reg __read_mostly = { + .name = "NETMAP", + .family = NFPROTO_IPV6, + .target = netmap_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .checkentry = netmap_tg6_checkentry, + .me = THIS_MODULE, +}; + +static int __init netmap_tg6_init(void) +{ + return xt_register_target(&netmap_tg6_reg); +} + +static void netmap_tg6_exit(void) +{ + xt_unregister_target(&netmap_tg6_reg); +} + +module_init(netmap_tg6_init); +module_exit(netmap_tg6_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv6 subnets"); +MODULE_AUTHOR("Patrick McHardy "); -- cgit v1.2.3 From 8a91bb0c304b0853f8c59b1b48c7822c52362cba Mon Sep 17 00:00:00 2001 From: Patrick McHardy Date: Sun, 26 Aug 2012 19:14:31 +0200 Subject: netfilter: ip6tables: add stateless IPv6-to-IPv6 Network Prefix Translation target Signed-off-by: Patrick McHardy --- include/linux/netfilter_ipv6/Kbuild | 1 + include/linux/netfilter_ipv6/ip6t_NPT.h | 16 ++++ net/ipv6/netfilter/Kconfig | 9 ++ net/ipv6/netfilter/Makefile | 1 + net/ipv6/netfilter/ip6t_NPT.c | 165 ++++++++++++++++++++++++++++++++ 5 files changed, 192 insertions(+) create mode 100644 include/linux/netfilter_ipv6/ip6t_NPT.h create mode 100644 net/ipv6/netfilter/ip6t_NPT.c (limited to 'net/ipv6') diff --git a/include/linux/netfilter_ipv6/Kbuild b/include/linux/netfilter_ipv6/Kbuild index bd095bc075e9..b88c0058bf73 100644 --- a/include/linux/netfilter_ipv6/Kbuild +++ b/include/linux/netfilter_ipv6/Kbuild @@ -1,6 +1,7 @@ header-y += ip6_tables.h header-y += ip6t_HL.h header-y += ip6t_LOG.h +header-y += ip6t_NPT.h header-y += ip6t_REJECT.h header-y += ip6t_ah.h header-y += ip6t_frag.h diff --git a/include/linux/netfilter_ipv6/ip6t_NPT.h b/include/linux/netfilter_ipv6/ip6t_NPT.h new file mode 100644 index 000000000000..f763355481b5 --- /dev/null +++ b/include/linux/netfilter_ipv6/ip6t_NPT.h @@ -0,0 +1,16 @@ +#ifndef __NETFILTER_IP6T_NPT +#define __NETFILTER_IP6T_NPT + +#include +#include + +struct ip6t_npt_tginfo { + union nf_inet_addr src_pfx; + union nf_inet_addr dst_pfx; + __u8 src_pfx_len; + __u8 dst_pfx_len; + /* Used internally by the kernel */ + __sum16 adjustment; +}; + +#endif /* __NETFILTER_IP6T_NPT */ diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 7bdf73be9a99..3b73254d7bf1 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -177,6 +177,15 @@ config IP6_NF_TARGET_REDIRECT To compile it as a module, choose M here. If unsure, say N. +config IP6_NF_TARGET_NPT + tristate "NPT (Network Prefix translation) target support" + depends on NETFILTER_ADVANCED + help + This option adds the `SNPT' and `DNPT' target, which perform + stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296. + + To compile it as a module, choose M here. If unsure, say N. + config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 0864ce601651..5752132ca159 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -36,5 +36,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o obj-$(CONFIG_IP6_NF_TARGET_NETMAP) += ip6t_NETMAP.o +obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_NPT.c b/net/ipv6/netfilter/ip6t_NPT.c new file mode 100644 index 000000000000..e9486915eff6 --- /dev/null +++ b/net/ipv6/netfilter/ip6t_NPT.c @@ -0,0 +1,165 @@ +/* + * Copyright (c) 2011, 2012 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include + +static __sum16 csum16_complement(__sum16 a) +{ + return (__force __sum16)(0xffff - (__force u16)a); +} + +static __sum16 csum16_add(__sum16 a, __sum16 b) +{ + u16 sum; + + sum = (__force u16)a + (__force u16)b; + sum += (__force u16)a < (__force u16)b; + return (__force __sum16)sum; +} + +static __sum16 csum16_sub(__sum16 a, __sum16 b) +{ + return csum16_add(a, csum16_complement(b)); +} + +static int ip6t_npt_checkentry(const struct xt_tgchk_param *par) +{ + struct ip6t_npt_tginfo *npt = par->targinfo; + __sum16 src_sum = 0, dst_sum = 0; + unsigned int i; + + if (npt->src_pfx_len > 64 || npt->dst_pfx_len > 64) + return -EINVAL; + + for (i = 0; i < ARRAY_SIZE(npt->src_pfx.in6.s6_addr16); i++) { + src_sum = csum16_add(src_sum, + (__force __sum16)npt->src_pfx.in6.s6_addr16[i]); + dst_sum = csum16_add(dst_sum, + (__force __sum16)npt->dst_pfx.in6.s6_addr16[i]); + } + + npt->adjustment = csum16_sub(src_sum, dst_sum); + return 0; +} + +static bool ip6t_npt_map_pfx(const struct ip6t_npt_tginfo *npt, + struct in6_addr *addr) +{ + unsigned int pfx_len; + unsigned int i, idx; + __be32 mask; + __sum16 sum; + + pfx_len = max(npt->src_pfx_len, npt->dst_pfx_len); + for (i = 0; i < pfx_len; i += 32) { + if (pfx_len - i >= 32) + mask = 0; + else + mask = htonl(~((1 << (pfx_len - i)) - 1)); + + idx = i / 32; + addr->s6_addr32[idx] &= mask; + addr->s6_addr32[idx] |= npt->dst_pfx.in6.s6_addr32[idx]; + } + + if (pfx_len <= 48) + idx = 3; + else { + for (idx = 4; idx < ARRAY_SIZE(addr->s6_addr16); idx++) { + if ((__force __sum16)addr->s6_addr16[idx] != + CSUM_MANGLED_0) + break; + } + if (idx == ARRAY_SIZE(addr->s6_addr16)) + return false; + } + + sum = csum16_add((__force __sum16)addr->s6_addr16[idx], + npt->adjustment); + if (sum == CSUM_MANGLED_0) + sum = 0; + *(__force __sum16 *)&addr->s6_addr16[idx] = sum; + + return true; +} + +static unsigned int +ip6t_snpt_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ip6t_npt_tginfo *npt = par->targinfo; + + if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->saddr)) { + icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, + offsetof(struct ipv6hdr, saddr)); + return NF_DROP; + } + return XT_CONTINUE; +} + +static unsigned int +ip6t_dnpt_tg(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct ip6t_npt_tginfo *npt = par->targinfo; + + if (!ip6t_npt_map_pfx(npt, &ipv6_hdr(skb)->daddr)) { + icmpv6_send(skb, ICMPV6_PARAMPROB, ICMPV6_HDR_FIELD, + offsetof(struct ipv6hdr, daddr)); + return NF_DROP; + } + return XT_CONTINUE; +} + +static struct xt_target ip6t_npt_target_reg[] __read_mostly = { + { + .name = "SNPT", + .target = ip6t_snpt_tg, + .targetsize = sizeof(struct ip6t_npt_tginfo), + .checkentry = ip6t_npt_checkentry, + .family = NFPROTO_IPV6, + .hooks = (1 << NF_INET_LOCAL_IN) | + (1 << NF_INET_POST_ROUTING), + .me = THIS_MODULE, + }, + { + .name = "DNPT", + .target = ip6t_dnpt_tg, + .targetsize = sizeof(struct ip6t_npt_tginfo), + .checkentry = ip6t_npt_checkentry, + .family = NFPROTO_IPV6, + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, +}; + +static int __init ip6t_npt_init(void) +{ + return xt_register_targets(ip6t_npt_target_reg, + ARRAY_SIZE(ip6t_npt_target_reg)); +} + +static void __exit ip6t_npt_exit(void) +{ + xt_unregister_targets(ip6t_npt_target_reg, + ARRAY_SIZE(ip6t_npt_target_reg)); +} + +module_init(ip6t_npt_init); +module_exit(ip6t_npt_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("IPv6-to-IPv6 Network Prefix Translation (RFC 6296)"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS("ip6t_SNPT"); +MODULE_ALIAS("ip6t_DNPT"); -- cgit v1.2.3 From 48f125ce1cc3ff275f9587b5bf56bf0f90766c7d Mon Sep 17 00:00:00 2001 From: Julia Lawall Date: Wed, 29 Aug 2012 06:49:12 +0000 Subject: net: ipv6: fix error return code Initialize return variable before exiting on an error path. The initial initialization of the return variable is also dropped, because that value is never used. A simplified version of the semantic match that finds this problem is as follows: (http://coccinelle.lip6.fr/) // ( if@p1 (\(ret < 0\|ret != 0\)) { ... return ret; } | ret@p1 = 0 ) ... when != ret = e1 when != &ret *if(...) { ... when != ret = e2 when forall return ret; } // Signed-off-by: Julia Lawall Signed-off-by: David S. Miller --- net/ipv6/esp6.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c index 6dc7fd353ef5..282f3723ee19 100644 --- a/net/ipv6/esp6.c +++ b/net/ipv6/esp6.c @@ -167,8 +167,6 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) struct esp_data *esp = x->data; /* skb is pure payload to encrypt */ - err = -ENOMEM; - aead = esp->aead; alen = crypto_aead_authsize(aead); @@ -203,8 +201,10 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb) } tmp = esp_alloc_tmp(aead, nfrags + sglists, seqhilen); - if (!tmp) + if (!tmp) { + err = -ENOMEM; goto error; + } seqhi = esp_tmp_seqhi(tmp); iv = esp_tmp_iv(aead, tmp, seqhilen); -- cgit v1.2.3 From eb7e0575966f9c84434e92c8a3f69719cc2e7571 Mon Sep 17 00:00:00 2001 From: Sorin Dumitru Date: Thu, 30 Aug 2012 02:01:45 +0000 Subject: ipv6: remove some deadcode __ipv6_regen_rndid no longer returns anything other than 0 so there's no point in verifying what it returns Signed-off-by: Sorin Dumitru Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 29 +++++++---------------------- 1 file changed, 7 insertions(+), 22 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 6bc85f7c31e3..055627f6df22 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -127,8 +127,8 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev) #endif #ifdef CONFIG_IPV6_PRIVACY -static int __ipv6_regen_rndid(struct inet6_dev *idev); -static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); +static void __ipv6_regen_rndid(struct inet6_dev *idev); +static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr); static void ipv6_regen_rndid(unsigned long data); #endif @@ -852,16 +852,7 @@ retry: } in6_ifa_hold(ifp); memcpy(addr.s6_addr, ifp->addr.s6_addr, 8); - if (__ipv6_try_regen_rndid(idev, tmpaddr) < 0) { - spin_unlock_bh(&ifp->lock); - write_unlock(&idev->lock); - pr_warn("%s: regeneration of randomized interface id failed\n", - __func__); - in6_ifa_put(ifp); - in6_dev_put(idev); - ret = -1; - goto out; - } + __ipv6_try_regen_rndid(idev, tmpaddr); memcpy(&addr.s6_addr[8], idev->rndid, 8); age = (now - ifp->tstamp) / HZ; tmp_valid_lft = min_t(__u32, @@ -1600,7 +1591,7 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev) #ifdef CONFIG_IPV6_PRIVACY /* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */ -static int __ipv6_regen_rndid(struct inet6_dev *idev) +static void __ipv6_regen_rndid(struct inet6_dev *idev) { regen: get_random_bytes(idev->rndid, sizeof(idev->rndid)); @@ -1627,8 +1618,6 @@ regen: if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00) goto regen; } - - return 0; } static void ipv6_regen_rndid(unsigned long data) @@ -1642,8 +1631,7 @@ static void ipv6_regen_rndid(unsigned long data) if (idev->dead) goto out; - if (__ipv6_regen_rndid(idev) < 0) - goto out; + __ipv6_regen_rndid(idev); expires = jiffies + idev->cnf.temp_prefered_lft * HZ - @@ -1664,13 +1652,10 @@ out: in6_dev_put(idev); } -static int __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) +static void __ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr) { - int ret = 0; - if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0) - ret = __ipv6_regen_rndid(idev); - return ret; + __ipv6_regen_rndid(idev); } #endif -- cgit v1.2.3 From 8336886f786fdacbc19b719c1f7ea91eb70706d4 Mon Sep 17 00:00:00 2001 From: Jerry Chu Date: Fri, 31 Aug 2012 12:29:12 +0000 Subject: tcp: TCP Fast Open Server - support TFO listeners This patch builds on top of the previous patch to add the support for TFO listeners. This includes - 1. allocating, properly initializing, and managing the per listener fastopen_queue structure when TFO is enabled 2. changes to the inet_csk_accept code to support TFO. E.g., the request_sock can no longer be freed upon accept(), not until 3WHS finishes 3. allowing a TCP_SYN_RECV socket to properly poll() and sendmsg() if it's a TFO socket 4. properly closing a TFO listener, and a TFO socket before 3WHS finishes 5. supporting TCP_FASTOPEN socket option 6. modifying tcp_check_req() to use to check a TFO socket as well as request_sock 7. supporting TCP's TFO cookie option 8. adding a new SYN-ACK retransmit handler to use the timer directly off the TFO socket rather than the listener socket. Note that TFO server side will not retransmit anything other than SYN-ACK until the 3WHS is completed. The patch also contains an important function "reqsk_fastopen_remove()" to manage the somewhat complex relation between a listener, its request_sock, and the corresponding child socket. See the comment above the function for the detail. Signed-off-by: H.K. Jerry Chu Cc: Yuchung Cheng Cc: Neal Cardwell Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: David S. Miller --- include/net/request_sock.h | 13 ------ include/net/tcp.h | 6 ++- net/core/request_sock.c | 95 +++++++++++++++++++++++++++++++++++++++++ net/ipv4/af_inet.c | 28 +++++++++++- net/ipv4/inet_connection_sock.c | 57 ++++++++++++++++++++++--- net/ipv4/syncookies.c | 1 + net/ipv4/tcp.c | 49 ++++++++++++++++++--- net/ipv4/tcp_ipv4.c | 4 +- net/ipv4/tcp_minisocks.c | 61 +++++++++++++++++++++----- net/ipv4/tcp_output.c | 21 ++++++--- net/ipv4/tcp_timer.c | 39 ++++++++++++++++- net/ipv6/syncookies.c | 1 + net/ipv6/tcp_ipv6.c | 5 ++- 13 files changed, 330 insertions(+), 50 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/request_sock.h b/include/net/request_sock.h index c3cdd6c9f448..b01d8dd9ee7c 100644 --- a/include/net/request_sock.h +++ b/include/net/request_sock.h @@ -226,19 +226,6 @@ static inline struct request_sock *reqsk_queue_remove(struct request_sock_queue return req; } -static inline struct sock *reqsk_queue_get_child(struct request_sock_queue *queue, - struct sock *parent) -{ - struct request_sock *req = reqsk_queue_remove(queue); - struct sock *child = req->sk; - - WARN_ON(child == NULL); - - sk_acceptq_removed(parent); - __reqsk_free(req); - return child; -} - static inline int reqsk_queue_removed(struct request_sock_queue *queue, struct request_sock *req) { diff --git a/include/net/tcp.h b/include/net/tcp.h index 9f8821e3293a..1421b02a7905 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -424,7 +424,8 @@ extern enum tcp_tw_status tcp_timewait_state_process(struct inet_timewait_sock * const struct tcphdr *th); extern struct sock * tcp_check_req(struct sock *sk,struct sk_buff *skb, struct request_sock *req, - struct request_sock **prev); + struct request_sock **prev, + bool fastopen); extern int tcp_child_process(struct sock *parent, struct sock *child, struct sk_buff *skb); extern bool tcp_use_frto(struct sock *sk); @@ -478,7 +479,8 @@ extern int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, extern int tcp_connect(struct sock *sk); extern struct sk_buff * tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp); + struct request_values *rvp, + struct tcp_fastopen_cookie *foc); extern int tcp_disconnect(struct sock *sk, int flags); void tcp_connect_init(struct sock *sk); diff --git a/net/core/request_sock.c b/net/core/request_sock.c index 9b570a6a33c5..c31d9e8668c3 100644 --- a/net/core/request_sock.c +++ b/net/core/request_sock.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -130,3 +131,97 @@ void reqsk_queue_destroy(struct request_sock_queue *queue) kfree(lopt); } +/* + * This function is called to set a Fast Open socket's "fastopen_rsk" field + * to NULL when a TFO socket no longer needs to access the request_sock. + * This happens only after 3WHS has been either completed or aborted (e.g., + * RST is received). + * + * Before TFO, a child socket is created only after 3WHS is completed, + * hence it never needs to access the request_sock. things get a lot more + * complex with TFO. A child socket, accepted or not, has to access its + * request_sock for 3WHS processing, e.g., to retransmit SYN-ACK pkts, + * until 3WHS is either completed or aborted. Afterwards the req will stay + * until either the child socket is accepted, or in the rare case when the + * listener is closed before the child is accepted. + * + * In short, a request socket is only freed after BOTH 3WHS has completed + * (or aborted) and the child socket has been accepted (or listener closed). + * When a child socket is accepted, its corresponding req->sk is set to + * NULL since it's no longer needed. More importantly, "req->sk == NULL" + * will be used by the code below to determine if a child socket has been + * accepted or not, and the check is protected by the fastopenq->lock + * described below. + * + * Note that fastopen_rsk is only accessed from the child socket's context + * with its socket lock held. But a request_sock (req) can be accessed by + * both its child socket through fastopen_rsk, and a listener socket through + * icsk_accept_queue.rskq_accept_head. To protect the access a simple spin + * lock per listener "icsk->icsk_accept_queue.fastopenq->lock" is created. + * only in the rare case when both the listener and the child locks are held, + * e.g., in inet_csk_listen_stop() do we not need to acquire the lock. + * The lock also protects other fields such as fastopenq->qlen, which is + * decremented by this function when fastopen_rsk is no longer needed. + * + * Note that another solution was to simply use the existing socket lock + * from the listener. But first socket lock is difficult to use. It is not + * a simple spin lock - one must consider sock_owned_by_user() and arrange + * to use sk_add_backlog() stuff. But what really makes it infeasible is the + * locking hierarchy violation. E.g., inet_csk_listen_stop() may try to + * acquire a child's lock while holding listener's socket lock. A corner + * case might also exist in tcp_v4_hnd_req() that will trigger this locking + * order. + * + * When a TFO req is created, it needs to sock_hold its listener to prevent + * the latter data structure from going away. + * + * This function also sets "treq->listener" to NULL and unreference listener + * socket. treq->listener is used by the listener so it is protected by the + * fastopenq->lock in this function. + */ +void reqsk_fastopen_remove(struct sock *sk, struct request_sock *req, + bool reset) +{ + struct sock *lsk = tcp_rsk(req)->listener; + struct fastopen_queue *fastopenq = + inet_csk(lsk)->icsk_accept_queue.fastopenq; + + BUG_ON(!spin_is_locked(&sk->sk_lock.slock) && !sock_owned_by_user(sk)); + + tcp_sk(sk)->fastopen_rsk = NULL; + spin_lock_bh(&fastopenq->lock); + fastopenq->qlen--; + tcp_rsk(req)->listener = NULL; + if (req->sk) /* the child socket hasn't been accepted yet */ + goto out; + + if (!reset || lsk->sk_state != TCP_LISTEN) { + /* If the listener has been closed don't bother with the + * special RST handling below. + */ + spin_unlock_bh(&fastopenq->lock); + sock_put(lsk); + reqsk_free(req); + return; + } + /* Wait for 60secs before removing a req that has triggered RST. + * This is a simple defense against TFO spoofing attack - by + * counting the req against fastopen.max_qlen, and disabling + * TFO when the qlen exceeds max_qlen. + * + * For more details see CoNext'11 "TCP Fast Open" paper. + */ + req->expires = jiffies + 60*HZ; + if (fastopenq->rskq_rst_head == NULL) + fastopenq->rskq_rst_head = req; + else + fastopenq->rskq_rst_tail->dl_next = req; + + req->dl_next = NULL; + fastopenq->rskq_rst_tail = req; + fastopenq->qlen++; +out: + spin_unlock_bh(&fastopenq->lock); + sock_put(lsk); + return; +} diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index 6681ccf5c3ee..4f70ef0b946d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -149,6 +149,11 @@ void inet_sock_destruct(struct sock *sk) pr_err("Attempt to release alive inet socket %p\n", sk); return; } + if (sk->sk_type == SOCK_STREAM) { + struct fastopen_queue *fastopenq = + inet_csk(sk)->icsk_accept_queue.fastopenq; + kfree(fastopenq); + } WARN_ON(atomic_read(&sk->sk_rmem_alloc)); WARN_ON(atomic_read(&sk->sk_wmem_alloc)); @@ -212,6 +217,26 @@ int inet_listen(struct socket *sock, int backlog) * we can only allow the backlog to be adjusted. */ if (old_state != TCP_LISTEN) { + /* Check special setups for testing purpose to enable TFO w/o + * requiring TCP_FASTOPEN sockopt. + * Note that only TCP sockets (SOCK_STREAM) will reach here. + * Also fastopenq may already been allocated because this + * socket was in TCP_LISTEN state previously but was + * shutdown() (rather than close()). + */ + if ((sysctl_tcp_fastopen & TFO_SERVER_ENABLE) != 0 && + inet_csk(sk)->icsk_accept_queue.fastopenq == NULL) { + if ((sysctl_tcp_fastopen & TFO_SERVER_WO_SOCKOPT1) != 0) + err = fastopen_init_queue(sk, backlog); + else if ((sysctl_tcp_fastopen & + TFO_SERVER_WO_SOCKOPT2) != 0) + err = fastopen_init_queue(sk, + ((uint)sysctl_tcp_fastopen) >> 16); + else + err = 0; + if (err) + goto out; + } err = inet_csk_listen_start(sk, backlog); if (err) goto out; @@ -701,7 +726,8 @@ int inet_accept(struct socket *sock, struct socket *newsock, int flags) sock_rps_record_flow(sk2); WARN_ON(!((1 << sk2->sk_state) & - (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT | TCPF_CLOSE))); + (TCPF_ESTABLISHED | TCPF_SYN_RECV | + TCPF_CLOSE_WAIT | TCPF_CLOSE))); sock_graft(sk2, newsock); diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index 7f75f21d7b83..8464b79c493f 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -283,7 +283,9 @@ static int inet_csk_wait_for_connect(struct sock *sk, long timeo) struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) { struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct sock *newsk; + struct request_sock *req; int error; lock_sock(sk); @@ -296,7 +298,7 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) goto out_err; /* Find already established connection */ - if (reqsk_queue_empty(&icsk->icsk_accept_queue)) { + if (reqsk_queue_empty(queue)) { long timeo = sock_rcvtimeo(sk, flags & O_NONBLOCK); /* If this is a non blocking socket don't sleep */ @@ -308,14 +310,32 @@ struct sock *inet_csk_accept(struct sock *sk, int flags, int *err) if (error) goto out_err; } - - newsk = reqsk_queue_get_child(&icsk->icsk_accept_queue, sk); - WARN_ON(newsk->sk_state == TCP_SYN_RECV); + req = reqsk_queue_remove(queue); + newsk = req->sk; + + sk_acceptq_removed(sk); + if (sk->sk_type == SOCK_STREAM && queue->fastopenq != NULL) { + spin_lock_bh(&queue->fastopenq->lock); + if (tcp_rsk(req)->listener) { + /* We are still waiting for the final ACK from 3WHS + * so can't free req now. Instead, we set req->sk to + * NULL to signify that the child socket is taken + * so reqsk_fastopen_remove() will free the req + * when 3WHS finishes (or is aborted). + */ + req->sk = NULL; + req = NULL; + } + spin_unlock_bh(&queue->fastopenq->lock); + } out: release_sock(sk); + if (req) + __reqsk_free(req); return newsk; out_err: newsk = NULL; + req = NULL; *err = error; goto out; } @@ -720,13 +740,14 @@ EXPORT_SYMBOL_GPL(inet_csk_listen_start); void inet_csk_listen_stop(struct sock *sk) { struct inet_connection_sock *icsk = inet_csk(sk); + struct request_sock_queue *queue = &icsk->icsk_accept_queue; struct request_sock *acc_req; struct request_sock *req; inet_csk_delete_keepalive_timer(sk); /* make all the listen_opt local to us */ - acc_req = reqsk_queue_yank_acceptq(&icsk->icsk_accept_queue); + acc_req = reqsk_queue_yank_acceptq(queue); /* Following specs, it would be better either to send FIN * (and enter FIN-WAIT-1, it is normal close) @@ -736,7 +757,7 @@ void inet_csk_listen_stop(struct sock *sk) * To be honest, we are not able to make either * of the variants now. --ANK */ - reqsk_queue_destroy(&icsk->icsk_accept_queue); + reqsk_queue_destroy(queue); while ((req = acc_req) != NULL) { struct sock *child = req->sk; @@ -754,6 +775,19 @@ void inet_csk_listen_stop(struct sock *sk) percpu_counter_inc(sk->sk_prot->orphan_count); + if (sk->sk_type == SOCK_STREAM && tcp_rsk(req)->listener) { + BUG_ON(tcp_sk(child)->fastopen_rsk != req); + BUG_ON(sk != tcp_rsk(req)->listener); + + /* Paranoid, to prevent race condition if + * an inbound pkt destined for child is + * blocked by sock lock in tcp_v4_rcv(). + * Also to satisfy an assertion in + * tcp_v4_destroy_sock(). + */ + tcp_sk(child)->fastopen_rsk = NULL; + sock_put(sk); + } inet_csk_destroy_sock(child); bh_unlock_sock(child); @@ -763,6 +797,17 @@ void inet_csk_listen_stop(struct sock *sk) sk_acceptq_removed(sk); __reqsk_free(req); } + if (queue->fastopenq != NULL) { + /* Free all the reqs queued in rskq_rst_head. */ + spin_lock_bh(&queue->fastopenq->lock); + acc_req = queue->fastopenq->rskq_rst_head; + queue->fastopenq->rskq_rst_head = NULL; + spin_unlock_bh(&queue->fastopenq->lock); + while ((req = acc_req) != NULL) { + acc_req = req->dl_next; + __reqsk_free(req); + } + } WARN_ON(sk->sk_ack_backlog); } EXPORT_SYMBOL_GPL(inet_csk_listen_stop); diff --git a/net/ipv4/syncookies.c b/net/ipv4/syncookies.c index 650e1528e1e6..ba48e799b031 100644 --- a/net/ipv4/syncookies.c +++ b/net/ipv4/syncookies.c @@ -319,6 +319,7 @@ struct sock *cookie_v4_check(struct sock *sk, struct sk_buff *skb, ireq->tstamp_ok = tcp_opt.saw_tstamp; req->ts_recent = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsval : 0; treq->snt_synack = tcp_opt.saw_tstamp ? tcp_opt.rcv_tsecr : 0; + treq->listener = NULL; /* We throwed the options of the initial SYN away, so we hope * the ACK carries the same options again (see RFC1122 4.2.3.8) diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 2109ff4a1daf..df83d744e380 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -486,8 +486,9 @@ unsigned int tcp_poll(struct file *file, struct socket *sock, poll_table *wait) if (sk->sk_shutdown & RCV_SHUTDOWN) mask |= POLLIN | POLLRDNORM | POLLRDHUP; - /* Connected? */ - if ((1 << sk->sk_state) & ~(TCPF_SYN_SENT | TCPF_SYN_RECV)) { + /* Connected or passive Fast Open socket? */ + if (sk->sk_state != TCP_SYN_SENT && + (sk->sk_state != TCP_SYN_RECV || tp->fastopen_rsk != NULL)) { int target = sock_rcvlowat(sk, 0, INT_MAX); if (tp->urg_seq == tp->copied_seq && @@ -840,10 +841,15 @@ static ssize_t do_tcp_sendpages(struct sock *sk, struct page **pages, int poffse ssize_t copied; long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - /* Wait for a connection to finish. */ - if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) + /* Wait for a connection to finish. One exception is TCP Fast Open + * (passive side) where data is allowed to be sent before a connection + * is fully established. + */ + if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && + !tcp_passive_fastopen(sk)) { if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto out_err; + } clear_bit(SOCK_ASYNC_NOSPACE, &sk->sk_socket->flags); @@ -1042,10 +1048,15 @@ int tcp_sendmsg(struct kiocb *iocb, struct sock *sk, struct msghdr *msg, timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - /* Wait for a connection to finish. */ - if ((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) + /* Wait for a connection to finish. One exception is TCP Fast Open + * (passive side) where data is allowed to be sent before a connection + * is fully established. + */ + if (((1 << sk->sk_state) & ~(TCPF_ESTABLISHED | TCPF_CLOSE_WAIT)) && + !tcp_passive_fastopen(sk)) { if ((err = sk_stream_wait_connect(sk, &timeo)) != 0) goto do_error; + } if (unlikely(tp->repair)) { if (tp->repair_queue == TCP_RECV_QUEUE) { @@ -2144,6 +2155,10 @@ void tcp_close(struct sock *sk, long timeout) * they look as CLOSING or LAST_ACK for Linux) * Probably, I missed some more holelets. * --ANK + * XXX (TFO) - To start off we don't support SYN+ACK+FIN + * in a single packet! (May consider it later but will + * probably need API support or TCP_CORK SYN-ACK until + * data is written and socket is closed.) */ tcp_send_fin(sk); } @@ -2215,8 +2230,16 @@ adjudge_to_death: } } - if (sk->sk_state == TCP_CLOSE) + if (sk->sk_state == TCP_CLOSE) { + struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + /* We could get here with a non-NULL req if the socket is + * aborted (e.g., closed with unread data) before 3WHS + * finishes. + */ + if (req != NULL) + reqsk_fastopen_remove(sk, req, false); inet_csk_destroy_sock(sk); + } /* Otherwise, socket is reprieved until protocol close. */ out: @@ -2688,6 +2711,14 @@ static int do_tcp_setsockopt(struct sock *sk, int level, else icsk->icsk_user_timeout = msecs_to_jiffies(val); break; + + case TCP_FASTOPEN: + if (val >= 0 && ((1 << sk->sk_state) & (TCPF_CLOSE | + TCPF_LISTEN))) + err = fastopen_init_queue(sk, val); + else + err = -EINVAL; + break; default: err = -ENOPROTOOPT; break; @@ -3501,11 +3532,15 @@ EXPORT_SYMBOL(tcp_cookie_generator); void tcp_done(struct sock *sk) { + struct request_sock *req = tcp_sk(sk)->fastopen_rsk; + if (sk->sk_state == TCP_SYN_SENT || sk->sk_state == TCP_SYN_RECV) TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_ATTEMPTFAILS); tcp_set_state(sk, TCP_CLOSE); tcp_clear_xmit_timers(sk); + if (req != NULL) + reqsk_fastopen_remove(sk, req, false); sk->sk_shutdown = SHUTDOWN_MASK; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 36f02f954ac1..bb148dee1edd 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -839,7 +839,7 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet_csk_route_req(sk, &fl4, req)) == NULL) return -1; - skb = tcp_make_synack(sk, dst, req, rvp); + skb = tcp_make_synack(sk, dst, req, rvp, NULL); if (skb) { __tcp_v4_send_check(skb, ireq->loc_addr, ireq->rmt_addr); @@ -1554,7 +1554,7 @@ static struct sock *tcp_v4_hnd_req(struct sock *sk, struct sk_buff *skb) struct request_sock *req = inet_csk_search_req(sk, &prev, th->source, iph->saddr, iph->daddr); if (req) - return tcp_check_req(sk, skb, req, prev); + return tcp_check_req(sk, skb, req, prev, false); nsk = inet_lookup_established(sock_net(sk), &tcp_hashinfo, iph->saddr, th->source, iph->daddr, th->dest, inet_iif(skb)); diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c index 6ff7f10dce9d..e965319d610b 100644 --- a/net/ipv4/tcp_minisocks.c +++ b/net/ipv4/tcp_minisocks.c @@ -507,6 +507,7 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, newicsk->icsk_ack.last_seg_size = skb->len - newtp->tcp_header_len; newtp->rx_opt.mss_clamp = req->mss; TCP_ECN_openreq_child(newtp, req); + newtp->fastopen_rsk = NULL; TCP_INC_STATS_BH(sock_net(sk), TCP_MIB_PASSIVEOPENS); } @@ -515,13 +516,18 @@ struct sock *tcp_create_openreq_child(struct sock *sk, struct request_sock *req, EXPORT_SYMBOL(tcp_create_openreq_child); /* - * Process an incoming packet for SYN_RECV sockets represented - * as a request_sock. + * Process an incoming packet for SYN_RECV sockets represented as a + * request_sock. Normally sk is the listener socket but for TFO it + * points to the child socket. + * + * XXX (TFO) - The current impl contains a special check for ack + * validation and inside tcp_v4_reqsk_send_ack(). Can we do better? */ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, struct request_sock *req, - struct request_sock **prev) + struct request_sock **prev, + bool fastopen) { struct tcp_options_received tmp_opt; const u8 *hash_location; @@ -530,6 +536,8 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, __be32 flg = tcp_flag_word(th) & (TCP_FLAG_RST|TCP_FLAG_SYN|TCP_FLAG_ACK); bool paws_reject = false; + BUG_ON(fastopen == (sk->sk_state == TCP_LISTEN)); + tmp_opt.saw_tstamp = 0; if (th->doff > (sizeof(struct tcphdr)>>2)) { tcp_parse_options(skb, &tmp_opt, &hash_location, 0, NULL); @@ -565,6 +573,9 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * * Enforce "SYN-ACK" according to figure 8, figure 6 * of RFC793, fixed by RFC1122. + * + * Note that even if there is new data in the SYN packet + * they will be thrown away too. */ req->rsk_ops->rtx_syn_ack(sk, req, NULL); return NULL; @@ -622,9 +633,12 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, * sent (the segment carries an unacceptable ACK) ... * a reset is sent." * - * Invalid ACK: reset will be sent by listening socket + * Invalid ACK: reset will be sent by listening socket. + * Note that the ACK validity check for a Fast Open socket is done + * elsewhere and is checked directly against the child socket rather + * than req because user data may have been sent out. */ - if ((flg & TCP_FLAG_ACK) && + if ((flg & TCP_FLAG_ACK) && !fastopen && (TCP_SKB_CB(skb)->ack_seq != tcp_rsk(req)->snt_isn + 1 + tcp_s_data_size(tcp_sk(sk)))) return sk; @@ -637,7 +651,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* RFC793: "first check sequence number". */ if (paws_reject || !tcp_in_window(TCP_SKB_CB(skb)->seq, TCP_SKB_CB(skb)->end_seq, - tcp_rsk(req)->rcv_isn + 1, tcp_rsk(req)->rcv_isn + 1 + req->rcv_wnd)) { + tcp_rsk(req)->rcv_nxt, tcp_rsk(req)->rcv_nxt + req->rcv_wnd)) { /* Out of window: send ACK and drop. */ if (!(flg & TCP_FLAG_RST)) req->rsk_ops->send_ack(sk, skb, req); @@ -648,7 +662,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* In sequence, PAWS is OK. */ - if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_isn + 1)) + if (tmp_opt.saw_tstamp && !after(TCP_SKB_CB(skb)->seq, tcp_rsk(req)->rcv_nxt)) req->ts_recent = tmp_opt.rcv_tsval; if (TCP_SKB_CB(skb)->seq == tcp_rsk(req)->rcv_isn) { @@ -667,10 +681,19 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb, /* ACK sequence verified above, just make sure ACK is * set. If ACK not set, just silently drop the packet. + * + * XXX (TFO) - if we ever allow "data after SYN", the + * following check needs to be removed. */ if (!(flg & TCP_FLAG_ACK)) return NULL; + /* For Fast Open no more processing is needed (sk is the + * child socket). + */ + if (fastopen) + return sk; + /* While TCP_DEFER_ACCEPT is active, drop bare ACK. */ if (req->retrans < inet_csk(sk)->icsk_accept_queue.rskq_defer_accept && TCP_SKB_CB(skb)->end_seq == tcp_rsk(req)->rcv_isn + 1) { @@ -706,11 +729,21 @@ listen_overflow: } embryonic_reset: - NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); - if (!(flg & TCP_FLAG_RST)) + if (!(flg & TCP_FLAG_RST)) { + /* Received a bad SYN pkt - for TFO We try not to reset + * the local connection unless it's really necessary to + * avoid becoming vulnerable to outside attack aiming at + * resetting legit local connections. + */ req->rsk_ops->send_reset(sk, skb); - - inet_csk_reqsk_queue_drop(sk, req, prev); + } else if (fastopen) { /* received a valid RST pkt */ + reqsk_fastopen_remove(sk, req, true); + tcp_reset(sk); + } + if (!fastopen) { + inet_csk_reqsk_queue_drop(sk, req, prev); + NET_INC_STATS_BH(sock_net(sk), LINUX_MIB_EMBRYONICRSTS); + } return NULL; } EXPORT_SYMBOL(tcp_check_req); @@ -719,6 +752,12 @@ EXPORT_SYMBOL(tcp_check_req); * Queue segment on the new socket if the new socket is active, * otherwise we just shortcircuit this and continue with * the new socket. + * + * For the vast majority of cases child->sk_state will be TCP_SYN_RECV + * when entering. But other states are possible due to a race condition + * where after __inet_lookup_established() fails but before the listener + * locked is obtained, other packets cause the same connection to + * be created. */ int tcp_child_process(struct sock *parent, struct sock *child, diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index d04632673a9e..9383b51f3efc 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -702,7 +702,8 @@ static unsigned int tcp_synack_options(struct sock *sk, unsigned int mss, struct sk_buff *skb, struct tcp_out_options *opts, struct tcp_md5sig_key **md5, - struct tcp_extend_values *xvp) + struct tcp_extend_values *xvp, + struct tcp_fastopen_cookie *foc) { struct inet_request_sock *ireq = inet_rsk(req); unsigned int remaining = MAX_TCP_OPTION_SPACE; @@ -747,7 +748,15 @@ static unsigned int tcp_synack_options(struct sock *sk, if (unlikely(!ireq->tstamp_ok)) remaining -= TCPOLEN_SACKPERM_ALIGNED; } - + if (foc != NULL) { + u32 need = TCPOLEN_EXP_FASTOPEN_BASE + foc->len; + need = (need + 3) & ~3U; /* Align to 32 bits */ + if (remaining >= need) { + opts->options |= OPTION_FAST_OPEN_COOKIE; + opts->fastopen_cookie = foc; + remaining -= need; + } + } /* Similar rationale to tcp_syn_options() applies here, too. * If the options fit, the same options should fit now! */ @@ -2658,7 +2667,8 @@ int tcp_send_synack(struct sock *sk) */ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, struct request_sock *req, - struct request_values *rvp) + struct request_values *rvp, + struct tcp_fastopen_cookie *foc) { struct tcp_out_options opts; struct tcp_extend_values *xvp = tcp_xv(rvp); @@ -2718,7 +2728,7 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, #endif TCP_SKB_CB(skb)->when = tcp_time_stamp; tcp_header_size = tcp_synack_options(sk, req, mss, - skb, &opts, &md5, xvp) + skb, &opts, &md5, xvp, foc) + sizeof(*th); skb_push(skb, tcp_header_size); @@ -2772,7 +2782,8 @@ struct sk_buff *tcp_make_synack(struct sock *sk, struct dst_entry *dst, } th->seq = htonl(TCP_SKB_CB(skb)->seq); - th->ack_seq = htonl(tcp_rsk(req)->rcv_isn + 1); + /* XXX data is queued and acked as is. No buffer/window check */ + th->ack_seq = htonl(tcp_rsk(req)->rcv_nxt); /* RFC1323: The window in SYN & SYN/ACK segments is never scaled. */ th->window = htons(min(req->rcv_wnd, 65535U)); diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c index b774a03bd1dc..fc04711e80c8 100644 --- a/net/ipv4/tcp_timer.c +++ b/net/ipv4/tcp_timer.c @@ -304,6 +304,35 @@ static void tcp_probe_timer(struct sock *sk) } } +/* + * Timer for Fast Open socket to retransmit SYNACK. Note that the + * sk here is the child socket, not the parent (listener) socket. + */ +static void tcp_fastopen_synack_timer(struct sock *sk) +{ + struct inet_connection_sock *icsk = inet_csk(sk); + int max_retries = icsk->icsk_syn_retries ? : + sysctl_tcp_synack_retries + 1; /* add one more retry for fastopen */ + struct request_sock *req; + + req = tcp_sk(sk)->fastopen_rsk; + req->rsk_ops->syn_ack_timeout(sk, req); + + if (req->retrans >= max_retries) { + tcp_write_err(sk); + return; + } + /* XXX (TFO) - Unlike regular SYN-ACK retransmit, we ignore error + * returned from rtx_syn_ack() to make it more persistent like + * regular retransmit because if the child socket has been accepted + * it's not good to give up too easily. + */ + req->rsk_ops->rtx_syn_ack(sk, req, NULL); + req->retrans++; + inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS, + TCP_TIMEOUT_INIT << req->retrans, TCP_RTO_MAX); +} + /* * The TCP retransmit timer. */ @@ -317,7 +346,15 @@ void tcp_retransmit_timer(struct sock *sk) tcp_resume_early_retransmit(sk); return; } - + if (tp->fastopen_rsk) { + BUG_ON(sk->sk_state != TCP_SYN_RECV && + sk->sk_state != TCP_FIN_WAIT1); + tcp_fastopen_synack_timer(sk); + /* Before we receive ACK to our SYN-ACK don't retransmit + * anything else (e.g., data or FIN segments). + */ + return; + } if (!tp->packets_out) goto out; diff --git a/net/ipv6/syncookies.c b/net/ipv6/syncookies.c index bb46061c813a..182ab9a85d6c 100644 --- a/net/ipv6/syncookies.c +++ b/net/ipv6/syncookies.c @@ -190,6 +190,7 @@ struct sock *cookie_v6_check(struct sock *sk, struct sk_buff *skb) ireq = inet_rsk(req); ireq6 = inet6_rsk(req); treq = tcp_rsk(req); + treq->listener = NULL; if (security_inet_conn_request(sk, skb, req)) goto out_free; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f99b81d53cca..09078b9bc6f6 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -475,7 +475,7 @@ static int tcp_v6_send_synack(struct sock *sk, struct dst_entry *dst, if (!dst && (dst = inet6_csk_route_req(sk, fl6, req)) == NULL) goto done; - skb = tcp_make_synack(sk, dst, req, rvp); + skb = tcp_make_synack(sk, dst, req, rvp, NULL); if (skb) { __tcp_v6_send_check(skb, &treq->loc_addr, &treq->rmt_addr); @@ -987,7 +987,7 @@ static struct sock *tcp_v6_hnd_req(struct sock *sk,struct sk_buff *skb) &ipv6_hdr(skb)->saddr, &ipv6_hdr(skb)->daddr, inet6_iif(skb)); if (req) - return tcp_check_req(sk, skb, req, prev); + return tcp_check_req(sk, skb, req, prev, false); nsk = __inet6_lookup_established(sock_net(sk), &tcp_hashinfo, &ipv6_hdr(skb)->saddr, th->source, @@ -1179,6 +1179,7 @@ have_isn: want_cookie) goto drop_and_free; + tcp_rsk(req)->listener = NULL; inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; -- cgit v1.2.3 From d013ef2aba8fe765ca683598e404203215632373 Mon Sep 17 00:00:00 2001 From: Julian Anastasov Date: Wed, 5 Sep 2012 10:53:18 +0000 Subject: tcp: fix possible socket refcount problem for ipv6 commit 144d56e91044181ec0ef67aeca91e9a8b5718348 ("tcp: fix possible socket refcount problem") is missing the IPv6 part. As tcp_release_cb is shared by both protocols we should hold sock reference for the TCP_MTU_REDUCED_DEFERRED bit. Signed-off-by: Julian Anastasov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a3e60cc04a8a..acd32e3f1b68 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -403,8 +403,9 @@ static void tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt, tp->mtu_info = ntohl(info); if (!sock_owned_by_user(sk)) tcp_v6_mtu_reduced(sk); - else - set_bit(TCP_MTU_REDUCED_DEFERRED, &tp->tsq_flags); + else if (!test_and_set_bit(TCP_MTU_REDUCED_DEFERRED, + &tp->tsq_flags)) + sock_hold(sk); goto out; } -- cgit v1.2.3 From 2c9693222952b08c224093a1263cb5aee02d10ff Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 5 Sep 2012 15:34:58 +1000 Subject: netfilter: ipv6: using csum_ipv6_magic requires net/ip6_checksum.h Fixes this build error: net/ipv6/netfilter/nf_nat_l3proto_ipv6.c: In function 'nf_nat_ipv6_csum_recalc': net/ipv6/netfilter/nf_nat_l3proto_ipv6.c:144:4: error: implicit declaration of function 'csum_ipv6_magic' [-Werror=implicit-function-declaration] Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_nat_l3proto_ipv6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c index 81a2d1c3da8e..abfe75a2e316 100644 --- a/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c +++ b/net/ipv6/netfilter/nf_nat_l3proto_ipv6.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From ef2c7d7b59708d54213c7556a82d14de9a7e4475 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 5 Sep 2012 02:12:42 +0000 Subject: ipv6: fix handling of blackhole and prohibit routes When adding a blackhole or a prohibit route, they were handling like classic routes. Moreover, it was only possible to add this kind of routes by specifying an interface. Bug already reported here: http://bugs.debian.org/cgi-bin/bugreport.cgi?bug=498498 Before the patch: $ ip route add blackhole 2001::1/128 RTNETLINK answers: No such device $ ip route add blackhole 2001::1/128 dev eth0 $ ip -6 route | grep 2001 2001::1 dev eth0 metric 1024 After: $ ip route add blackhole 2001::1/128 $ ip -6 route | grep 2001 blackhole 2001::1 dev lo metric 1024 error -22 v2: wrong patch v3: add a field fc_type in struct fib6_config to store RTN_* type Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 1 + net/ipv6/route.c | 32 ++++++++++++++++++++++++++++---- 2 files changed, 29 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0fedbd8d747a..cd64cf35a49f 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -37,6 +37,7 @@ struct fib6_config { int fc_ifindex; u32 fc_flags; u32 fc_protocol; + u32 fc_type; /* only 8 bits are used */ struct in6_addr fc_dst; struct in6_addr fc_src; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 0ddf2d132e7f..fa264447a751 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1463,8 +1463,18 @@ int ip6_route_add(struct fib6_config *cfg) } rt->dst.output = ip6_pkt_discard_out; rt->dst.input = ip6_pkt_discard; - rt->dst.error = -ENETUNREACH; rt->rt6i_flags = RTF_REJECT|RTF_NONEXTHOP; + switch (cfg->fc_type) { + case RTN_BLACKHOLE: + rt->dst.error = -EINVAL; + break; + case RTN_PROHIBIT: + rt->dst.error = -EACCES; + break; + default: + rt->dst.error = -ENETUNREACH; + break; + } goto install_route; } @@ -2261,8 +2271,11 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, cfg->fc_src_len = rtm->rtm_src_len; cfg->fc_flags = RTF_UP; cfg->fc_protocol = rtm->rtm_protocol; + cfg->fc_type = rtm->rtm_type; - if (rtm->rtm_type == RTN_UNREACHABLE) + if (rtm->rtm_type == RTN_UNREACHABLE || + rtm->rtm_type == RTN_BLACKHOLE || + rtm->rtm_type == RTN_PROHIBIT) cfg->fc_flags |= RTF_REJECT; if (rtm->rtm_type == RTN_LOCAL) @@ -2391,8 +2404,19 @@ static int rt6_fill_node(struct net *net, rtm->rtm_table = table; if (nla_put_u32(skb, RTA_TABLE, table)) goto nla_put_failure; - if (rt->rt6i_flags & RTF_REJECT) - rtm->rtm_type = RTN_UNREACHABLE; + if (rt->rt6i_flags & RTF_REJECT) { + switch (rt->dst.error) { + case -EINVAL: + rtm->rtm_type = RTN_BLACKHOLE; + break; + case -EACCES: + rtm->rtm_type = RTN_PROHIBIT; + break; + default: + rtm->rtm_type = RTN_UNREACHABLE; + break; + } + } else if (rt->rt6i_flags & RTF_LOCAL) rtm->rtm_type = RTN_LOCAL; else if (rt->dst.dev && (rt->dst.dev->flags & IFF_LOOPBACK)) -- cgit v1.2.3 From 979402b16cde048ced4839e21cc49e0779352b80 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 5 Sep 2012 23:34:44 +0000 Subject: udp: increment UDP_MIB_INERRORS if copy failed In UDP recvmsg(), we miss an increase of UDP_MIB_INERRORS if the copy of skb to userspace failed for whatever reason. Reported-by: Shawn Bohrer Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/udp.c | 5 +++++ net/ipv6/udp.c | 11 +++++++++++ 2 files changed, 16 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 6f6d1aca3c3d..2814f66dac64 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1226,6 +1226,11 @@ try_again: if (unlikely(err)) { trace_kfree_skb(skb, udp_recvmsg); + if (!peeked) { + atomic_inc(&sk->sk_drops); + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, is_udplite); + } goto out_free; } diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 99d0077b56b8..07e2bfef6845 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -394,6 +394,17 @@ try_again: } if (unlikely(err)) { trace_kfree_skb(skb, udpv6_recvmsg); + if (!peeked) { + atomic_inc(&sk->sk_drops); + if (is_udp4) + UDP_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, + is_udplite); + else + UDP6_INC_STATS_USER(sock_net(sk), + UDP_MIB_INERRORS, + is_udplite); + } goto out_free; } if (!peeked) { -- cgit v1.2.3 From b4949ab269a20e9af9a0c40729bac56e8f8a43a2 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Thu, 6 Sep 2012 05:53:35 +0000 Subject: ipv6: fix handling of throw routes It's the same problem that previous fix about blackhole and prohibit routes. When adding a throw route, it was handled like a classic route. Moreover, it was only possible to add this kind of routes by specifying an interface. Before the patch: $ ip route add throw 2001::2/128 RTNETLINK answers: No such device $ ip route add throw 2001::2/128 dev eth0 $ ip -6 route | grep 2001::2 2001::2 dev eth0 metric 1024 After: $ ip route add throw 2001::2/128 $ ip -6 route | grep 2001::2 throw 2001::2 dev lo metric 1024 error -11 Reported-by: Markus Stenberg Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fa264447a751..339d921cf3b6 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1471,6 +1471,9 @@ int ip6_route_add(struct fib6_config *cfg) case RTN_PROHIBIT: rt->dst.error = -EACCES; break; + case RTN_THROW: + rt->dst.error = -EAGAIN; + break; default: rt->dst.error = -ENETUNREACH; break; @@ -2275,7 +2278,8 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_UNREACHABLE || rtm->rtm_type == RTN_BLACKHOLE || - rtm->rtm_type == RTN_PROHIBIT) + rtm->rtm_type == RTN_PROHIBIT || + rtm->rtm_type == RTN_THROW) cfg->fc_flags |= RTF_REJECT; if (rtm->rtm_type == RTN_LOCAL) @@ -2412,6 +2416,9 @@ static int rt6_fill_node(struct net *net, case -EACCES: rtm->rtm_type = RTN_PROHIBIT; break; + case -EAGAIN: + rtm->rtm_type = RTN_THROW; + break; default: rtm->rtm_type = RTN_UNREACHABLE; break; -- cgit v1.2.3 From 15e473046cb6e5d18a4d0057e61d76315230382b Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 7 Sep 2012 20:12:54 +0000 Subject: netlink: Rename pid to portid to avoid confusion It is a frequent mistake to confuse the netlink port identifier with a process identifier. Try to reduce this confusion by renaming fields that hold port identifiers portid instead of pid. I have carefully avoided changing the structures exported to userspace to avoid changing the userspace API. I have successfully built an allyesconfig kernel with this change. Signed-off-by: "Eric W. Biederman" Acked-by: Stephen Hemminger Signed-off-by: David S. Miller --- crypto/crypto_user.c | 4 +- drivers/net/team/team.c | 38 +++--- drivers/net/wireless/mac80211_hwsim.c | 46 ++++---- drivers/scsi/scsi_transport_iscsi.c | 4 +- drivers/staging/gdm72xx/netlink_k.c | 2 +- fs/dlm/netlink.c | 8 +- include/linux/netlink.h | 14 +-- include/net/cfg80211.h | 2 +- include/net/genetlink.h | 34 +++--- include/net/netfilter/nf_conntrack_ecache.h | 32 +++--- include/net/netlink.h | 26 ++--- include/net/nfc/nfc.h | 2 +- include/net/xfrm.h | 6 +- kernel/audit.c | 20 ++-- kernel/taskstats.c | 4 +- net/bridge/br_fdb.c | 6 +- net/bridge/br_netlink.c | 2 +- net/can/gw.c | 2 +- net/core/fib_rules.c | 6 +- net/core/neighbour.c | 8 +- net/core/rtnetlink.c | 12 +- net/dcb/dcbnl.c | 18 +-- net/decnet/dn_dev.c | 6 +- net/decnet/dn_route.c | 10 +- net/decnet/dn_table.c | 12 +- net/ieee802154/nl-mac.c | 6 +- net/ieee802154/nl-phy.c | 6 +- net/ipv4/devinet.c | 28 ++--- net/ipv4/fib_frontend.c | 10 +- net/ipv4/fib_semantics.c | 8 +- net/ipv4/fib_trie.c | 2 +- net/ipv4/inet_diag.c | 32 +++--- net/ipv4/ipmr.c | 10 +- net/ipv4/route.c | 8 +- net/ipv4/tcp_metrics.c | 2 +- net/ipv4/udp_diag.c | 6 +- net/ipv6/addrconf.c | 32 +++--- net/ipv6/addrlabel.c | 10 +- net/ipv6/ip6mr.c | 10 +- net/ipv6/route.c | 20 ++-- net/irda/irnetlink.c | 2 +- net/key/af_key.c | 32 +++--- net/l2tp/l2tp_netlink.c | 24 ++-- net/netfilter/ipset/ip_set_core.c | 24 ++-- net/netfilter/ipvs/ip_vs_ctl.c | 6 +- net/netfilter/nf_conntrack_ecache.c | 2 +- net/netfilter/nf_conntrack_netlink.c | 78 ++++++------- net/netfilter/nfnetlink_acct.c | 12 +- net/netfilter/nfnetlink_cthelper.c | 12 +- net/netfilter/nfnetlink_cttimeout.c | 12 +- net/netfilter/nfnetlink_log.c | 18 +-- net/netfilter/nfnetlink_queue_core.c | 28 ++--- net/netlabel/netlabel_cipso_v4.c | 2 +- net/netlabel/netlabel_mgmt.c | 4 +- net/netlabel/netlabel_unlabeled.c | 2 +- net/netlink/af_netlink.c | 172 ++++++++++++++-------------- net/netlink/genetlink.c | 42 +++---- net/nfc/netlink.c | 26 ++--- net/openvswitch/actions.c | 4 +- net/openvswitch/datapath.c | 84 +++++++------- net/openvswitch/datapath.h | 2 +- net/openvswitch/vport.c | 2 +- net/openvswitch/vport.h | 6 +- net/packet/diag.c | 6 +- net/phonet/pn_netlink.c | 14 +-- net/sched/act_api.c | 52 ++++----- net/sched/cls_api.c | 14 +-- net/sched/sch_api.c | 44 +++---- net/tipc/netlink.c | 2 +- net/unix/diag.c | 14 +-- net/wireless/core.h | 2 +- net/wireless/mlme.c | 16 +-- net/wireless/nl80211.c | 110 +++++++++--------- net/xfrm/xfrm_state.c | 10 +- net/xfrm/xfrm_user.c | 62 +++++----- 75 files changed, 728 insertions(+), 728 deletions(-) (limited to 'net/ipv6') diff --git a/crypto/crypto_user.c b/crypto/crypto_user.c index 165914e63ef2..6bba414d0c61 100644 --- a/crypto/crypto_user.c +++ b/crypto/crypto_user.c @@ -166,7 +166,7 @@ static int crypto_report_alg(struct crypto_alg *alg, struct crypto_user_alg *ualg; int err = 0; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, info->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, info->nlmsg_seq, CRYPTO_MSG_GETALG, sizeof(*ualg), info->nlmsg_flags); if (!nlh) { err = -EMSGSIZE; @@ -216,7 +216,7 @@ static int crypto_report(struct sk_buff *in_skb, struct nlmsghdr *in_nlh, if (err) return err; - return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).pid); + return nlmsg_unicast(crypto_nlsk, skb, NETLINK_CB(in_skb).portid); } static int crypto_dump_report(struct sk_buff *skb, struct netlink_callback *cb) diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c index b4f67b55ef79..266af7b38ebc 100644 --- a/drivers/net/team/team.c +++ b/drivers/net/team/team.c @@ -1886,7 +1886,7 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &team_nl_family, 0, TEAM_CMD_NOOP); if (IS_ERR(hdr)) { err = PTR_ERR(hdr); @@ -1895,7 +1895,7 @@ static int team_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) genlmsg_end(msg, hdr); - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_msg_put: nlmsg_free(msg); @@ -1952,7 +1952,7 @@ static int team_nl_send_generic(struct genl_info *info, struct team *team, if (err < 0) goto err_fill; - err = genlmsg_unicast(genl_info_net(info), skb, info->snd_pid); + err = genlmsg_unicast(genl_info_net(info), skb, info->snd_portid); return err; err_fill: @@ -1961,11 +1961,11 @@ err_fill: } typedef int team_nl_send_func_t(struct sk_buff *skb, - struct team *team, u32 pid); + struct team *team, u32 portid); -static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 pid) +static int team_nl_send_unicast(struct sk_buff *skb, struct team *team, u32 portid) { - return genlmsg_unicast(dev_net(team->dev), skb, pid); + return genlmsg_unicast(dev_net(team->dev), skb, portid); } static int team_nl_fill_one_option_get(struct sk_buff *skb, struct team *team, @@ -2050,13 +2050,13 @@ nest_cancel: } static int __send_and_alloc_skb(struct sk_buff **pskb, - struct team *team, u32 pid, + struct team *team, u32 portid, team_nl_send_func_t *send_func) { int err; if (*pskb) { - err = send_func(*pskb, team, pid); + err = send_func(*pskb, team, portid); if (err) return err; } @@ -2066,7 +2066,7 @@ static int __send_and_alloc_skb(struct sk_buff **pskb, return 0; } -static int team_nl_send_options_get(struct team *team, u32 pid, u32 seq, +static int team_nl_send_options_get(struct team *team, u32 portid, u32 seq, int flags, team_nl_send_func_t *send_func, struct list_head *sel_opt_inst_list) { @@ -2083,11 +2083,11 @@ static int team_nl_send_options_get(struct team *team, u32 pid, u32 seq, struct team_option_inst, tmp_list); start_again: - err = __send_and_alloc_skb(&skb, team, pid, send_func); + err = __send_and_alloc_skb(&skb, team, portid, send_func); if (err) return err; - hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags | NLM_F_MULTI, + hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags | NLM_F_MULTI, TEAM_CMD_OPTIONS_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2120,15 +2120,15 @@ start_again: goto start_again; send_done: - nlh = nlmsg_put(skb, pid, seq, NLMSG_DONE, 0, flags | NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, NLMSG_DONE, 0, flags | NLM_F_MULTI); if (!nlh) { - err = __send_and_alloc_skb(&skb, team, pid, send_func); + err = __send_and_alloc_skb(&skb, team, portid, send_func); if (err) goto errout; goto send_done; } - return send_func(skb, team, pid); + return send_func(skb, team, portid); nla_put_failure: err = -EMSGSIZE; @@ -2151,7 +2151,7 @@ static int team_nl_cmd_options_get(struct sk_buff *skb, struct genl_info *info) list_for_each_entry(opt_inst, &team->option_inst_list, list) list_add_tail(&opt_inst->tmp_list, &sel_opt_inst_list); - err = team_nl_send_options_get(team, info->snd_pid, info->snd_seq, + err = team_nl_send_options_get(team, info->snd_portid, info->snd_seq, NLM_F_ACK, team_nl_send_unicast, &sel_opt_inst_list); @@ -2305,7 +2305,7 @@ team_put: } static int team_nl_fill_port_list_get(struct sk_buff *skb, - u32 pid, u32 seq, int flags, + u32 portid, u32 seq, int flags, struct team *team, bool fillall) { @@ -2313,7 +2313,7 @@ static int team_nl_fill_port_list_get(struct sk_buff *skb, void *hdr; struct team_port *port; - hdr = genlmsg_put(skb, pid, seq, &team_nl_family, flags, + hdr = genlmsg_put(skb, portid, seq, &team_nl_family, flags, TEAM_CMD_PORT_LIST_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2362,7 +2362,7 @@ static int team_nl_fill_port_list_get_all(struct sk_buff *skb, struct genl_info *info, int flags, struct team *team) { - return team_nl_fill_port_list_get(skb, info->snd_pid, + return team_nl_fill_port_list_get(skb, info->snd_portid, info->snd_seq, NLM_F_ACK, team, true); } @@ -2415,7 +2415,7 @@ static struct genl_multicast_group team_change_event_mcgrp = { }; static int team_nl_send_multicast(struct sk_buff *skb, - struct team *team, u32 pid) + struct team *team, u32 portid) { return genlmsg_multicast_netns(dev_net(team->dev), skb, 0, team_change_event_mcgrp.id, GFP_KERNEL); diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index 72b0456e41bf..9d45b3bb974c 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -38,7 +38,7 @@ MODULE_AUTHOR("Jouni Malinen"); MODULE_DESCRIPTION("Software simulator of 802.11 radio(s) for mac80211"); MODULE_LICENSE("GPL"); -static u32 wmediumd_pid; +static u32 wmediumd_portid; static int radios = 2; module_param(radios, int, 0444); @@ -545,7 +545,7 @@ static bool mac80211_hwsim_addr_match(struct mac80211_hwsim_data *data, static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, struct sk_buff *my_skb, - int dst_pid) + int dst_portid) { struct sk_buff *skb; struct mac80211_hwsim_data *data = hw->priv; @@ -619,7 +619,7 @@ static void mac80211_hwsim_tx_frame_nl(struct ieee80211_hw *hw, goto nla_put_failure; genlmsg_end(skb, msg_head); - genlmsg_unicast(&init_net, skb, dst_pid); + genlmsg_unicast(&init_net, skb, dst_portid); /* Enqueue the packet */ skb_queue_tail(&data->pending, my_skb); @@ -715,7 +715,7 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, { bool ack; struct ieee80211_tx_info *txi; - u32 _pid; + u32 _portid; mac80211_hwsim_monitor_rx(hw, skb); @@ -726,10 +726,10 @@ static void mac80211_hwsim_tx(struct ieee80211_hw *hw, } /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(hw, skb, _portid); /* NO wmediumd detected, perfect medium simulation */ ack = mac80211_hwsim_tx_frame_no_nl(hw, skb); @@ -814,7 +814,7 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, struct ieee80211_hw *hw = arg; struct sk_buff *skb; struct ieee80211_tx_info *info; - u32 _pid; + u32 _portid; hwsim_check_magic(vif); @@ -831,10 +831,10 @@ static void mac80211_hwsim_beacon_tx(void *arg, u8 *mac, mac80211_hwsim_monitor_rx(hw, skb); /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(hw, skb, _portid); mac80211_hwsim_tx_frame_no_nl(hw, skb); dev_kfree_skb(skb); @@ -1315,7 +1315,7 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif) struct hwsim_vif_priv *vp = (void *)vif->drv_priv; struct sk_buff *skb; struct ieee80211_pspoll *pspoll; - u32 _pid; + u32 _portid; if (!vp->assoc) return; @@ -1336,10 +1336,10 @@ static void hwsim_send_ps_poll(void *dat, u8 *mac, struct ieee80211_vif *vif) memcpy(pspoll->ta, mac, ETH_ALEN); /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(data->hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(data->hw, skb, _portid); if (!mac80211_hwsim_tx_frame_no_nl(data->hw, skb)) printk(KERN_DEBUG "%s: PS-poll frame not ack'ed\n", __func__); @@ -1353,7 +1353,7 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, struct hwsim_vif_priv *vp = (void *)vif->drv_priv; struct sk_buff *skb; struct ieee80211_hdr *hdr; - u32 _pid; + u32 _portid; if (!vp->assoc) return; @@ -1375,10 +1375,10 @@ static void hwsim_send_nullfunc(struct mac80211_hwsim_data *data, u8 *mac, memcpy(hdr->addr3, vp->bssid, ETH_ALEN); /* wmediumd mode check */ - _pid = ACCESS_ONCE(wmediumd_pid); + _portid = ACCESS_ONCE(wmediumd_portid); - if (_pid) - return mac80211_hwsim_tx_frame_nl(data->hw, skb, _pid); + if (_portid) + return mac80211_hwsim_tx_frame_nl(data->hw, skb, _portid); if (!mac80211_hwsim_tx_frame_no_nl(data->hw, skb)) printk(KERN_DEBUG "%s: nullfunc frame not ack'ed\n", __func__); @@ -1632,10 +1632,10 @@ static int hwsim_register_received_nl(struct sk_buff *skb_2, if (info == NULL) goto out; - wmediumd_pid = info->snd_pid; + wmediumd_portid = info->snd_portid; printk(KERN_DEBUG "mac80211_hwsim: received a REGISTER, " - "switching to wmediumd mode with pid %d\n", info->snd_pid); + "switching to wmediumd mode with pid %d\n", info->snd_portid); return 0; out: @@ -1672,10 +1672,10 @@ static int mac80211_hwsim_netlink_notify(struct notifier_block *nb, if (state != NETLINK_URELEASE) return NOTIFY_DONE; - if (notify->pid == wmediumd_pid) { + if (notify->portid == wmediumd_portid) { printk(KERN_INFO "mac80211_hwsim: wmediumd released netlink" " socket, switching to perfect channel medium\n"); - wmediumd_pid = 0; + wmediumd_portid = 0; } return NOTIFY_DONE; diff --git a/drivers/scsi/scsi_transport_iscsi.c b/drivers/scsi/scsi_transport_iscsi.c index 519bd5303f3f..31969f2e13ce 100644 --- a/drivers/scsi/scsi_transport_iscsi.c +++ b/drivers/scsi/scsi_transport_iscsi.c @@ -2119,7 +2119,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) switch (nlh->nlmsg_type) { case ISCSI_UEVENT_CREATE_SESSION: err = iscsi_if_create_session(priv, ep, ev, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, ev->u.c_session.initial_cmdsn, ev->u.c_session.cmds_max, ev->u.c_session.queue_depth); @@ -2132,7 +2132,7 @@ iscsi_if_recv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, uint32_t *group) } err = iscsi_if_create_session(priv, ep, ev, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, ev->u.c_bound_session.initial_cmdsn, ev->u.c_bound_session.cmds_max, ev->u.c_bound_session.queue_depth); diff --git a/drivers/staging/gdm72xx/netlink_k.c b/drivers/staging/gdm72xx/netlink_k.c index 2109cab0a14c..20d0aec52e72 100644 --- a/drivers/staging/gdm72xx/netlink_k.c +++ b/drivers/staging/gdm72xx/netlink_k.c @@ -135,7 +135,7 @@ int netlink_send(struct sock *sock, int group, u16 type, void *msg, int len) } memcpy(nlmsg_data(nlh), msg, len); - NETLINK_CB(skb).pid = 0; + NETLINK_CB(skb).portid = 0; NETLINK_CB(skb).dst_group = 0; ret = netlink_broadcast(sock, skb, 0, group+1, GFP_ATOMIC); diff --git a/fs/dlm/netlink.c b/fs/dlm/netlink.c index ef17e0169da1..60a327863b11 100644 --- a/fs/dlm/netlink.c +++ b/fs/dlm/netlink.c @@ -14,7 +14,7 @@ #include "dlm_internal.h" static uint32_t dlm_nl_seqnum; -static uint32_t listener_nlpid; +static uint32_t listener_nlportid; static struct genl_family family = { .id = GENL_ID_GENERATE, @@ -64,13 +64,13 @@ static int send_data(struct sk_buff *skb) return rv; } - return genlmsg_unicast(&init_net, skb, listener_nlpid); + return genlmsg_unicast(&init_net, skb, listener_nlportid); } static int user_cmd(struct sk_buff *skb, struct genl_info *info) { - listener_nlpid = info->snd_pid; - printk("user_cmd nlpid %u\n", listener_nlpid); + listener_nlportid = info->snd_portid; + printk("user_cmd nlpid %u\n", listener_nlportid); return 0; } diff --git a/include/linux/netlink.h b/include/linux/netlink.h index cd17dda5a987..73ade5fbc856 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -165,7 +165,7 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) struct netlink_skb_parms { struct scm_creds creds; /* Skb credentials */ - __u32 pid; + __u32 portid; __u32 dst_group; struct sock *ssk; }; @@ -205,14 +205,14 @@ extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group) extern void netlink_clear_multicast_users(struct sock *sk, unsigned int group); extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); extern int netlink_has_listeners(struct sock *sk, unsigned int group); -extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 pid, int nonblock); -extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 pid, +extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); +extern int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, __u32 portid, __u32 group, gfp_t allocation); extern int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, - __u32 pid, __u32 group, gfp_t allocation, + __u32 portid, __u32 group, gfp_t allocation, int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), void *filter_data); -extern int netlink_set_err(struct sock *ssk, __u32 pid, __u32 group, int code); +extern int netlink_set_err(struct sock *ssk, __u32 portid, __u32 group, int code); extern int netlink_register_notifier(struct notifier_block *nb); extern int netlink_unregister_notifier(struct notifier_block *nb); @@ -253,12 +253,12 @@ struct netlink_callback { struct netlink_notify { struct net *net; - int pid; + int portid; int protocol; }; struct nlmsghdr * -__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags); +__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags); struct netlink_dump_control { int (*dump)(struct sk_buff *skb, struct netlink_callback *); diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index ba2e6160fad1..7b3b0568d289 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -2458,7 +2458,7 @@ struct wireless_dev { int beacon_interval; - u32 ap_unexpected_nlpid; + u32 ap_unexpected_nlportid; #ifdef CONFIG_CFG80211_WEXT /* wext data */ diff --git a/include/net/genetlink.h b/include/net/genetlink.h index 48905cd3884c..bdfbe68c1c3b 100644 --- a/include/net/genetlink.h +++ b/include/net/genetlink.h @@ -65,7 +65,7 @@ struct genl_family { /** * struct genl_info - receiving information * @snd_seq: sending sequence number - * @snd_pid: netlink pid of sender + * @snd_portid: netlink portid of sender * @nlhdr: netlink message header * @genlhdr: generic netlink message header * @userhdr: user specific header @@ -75,7 +75,7 @@ struct genl_family { */ struct genl_info { u32 snd_seq; - u32 snd_pid; + u32 snd_portid; struct nlmsghdr * nlhdr; struct genlmsghdr * genlhdr; void * userhdr; @@ -130,10 +130,10 @@ extern int genl_register_mc_group(struct genl_family *family, struct genl_multicast_group *grp); extern void genl_unregister_mc_group(struct genl_family *family, struct genl_multicast_group *grp); -extern void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, +extern void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags); -void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd); /** @@ -183,7 +183,7 @@ static inline void *genlmsg_put_reply(struct sk_buff *skb, struct genl_family *family, int flags, u8 cmd) { - return genlmsg_put(skb, info->snd_pid, info->snd_seq, family, + return genlmsg_put(skb, info->snd_portid, info->snd_seq, family, flags, cmd); } @@ -212,49 +212,49 @@ static inline void genlmsg_cancel(struct sk_buff *skb, void *hdr) * genlmsg_multicast_netns - multicast a netlink message to a specific netns * @net: the net namespace * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */ static inline int genlmsg_multicast_netns(struct net *net, struct sk_buff *skb, - u32 pid, unsigned int group, gfp_t flags) + u32 portid, unsigned int group, gfp_t flags) { - return nlmsg_multicast(net->genl_sock, skb, pid, group, flags); + return nlmsg_multicast(net->genl_sock, skb, portid, group, flags); } /** * genlmsg_multicast - multicast a netlink message to the default netns * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */ -static inline int genlmsg_multicast(struct sk_buff *skb, u32 pid, +static inline int genlmsg_multicast(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - return genlmsg_multicast_netns(&init_net, skb, pid, group, flags); + return genlmsg_multicast_netns(&init_net, skb, portid, group, flags); } /** * genlmsg_multicast_allns - multicast a netlink message to all net namespaces * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags * * This function must hold the RTNL or rcu_read_lock(). */ -int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, +int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags); /** * genlmsg_unicast - unicast a netlink message * @skb: netlink message as socket buffer - * @pid: netlink pid of the destination socket + * @portid: netlink portid of the destination socket */ -static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 pid) +static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 portid) { - return nlmsg_unicast(net->genl_sock, skb, pid); + return nlmsg_unicast(net->genl_sock, skb, portid); } /** @@ -264,7 +264,7 @@ static inline int genlmsg_unicast(struct net *net, struct sk_buff *skb, u32 pid) */ static inline int genlmsg_reply(struct sk_buff *skb, struct genl_info *info) { - return genlmsg_unicast(genl_info_net(info), skb, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), skb, info->snd_portid); } /** diff --git a/include/net/netfilter/nf_conntrack_ecache.h b/include/net/netfilter/nf_conntrack_ecache.h index 4a045cda9c60..5654d292efd4 100644 --- a/include/net/netfilter/nf_conntrack_ecache.h +++ b/include/net/netfilter/nf_conntrack_ecache.h @@ -17,7 +17,7 @@ struct nf_conntrack_ecache { unsigned long missed; /* missed events */ u16 ctmask; /* bitmask of ct events to be delivered */ u16 expmask; /* bitmask of expect events to be delivered */ - u32 pid; /* netlink pid of destroyer */ + u32 portid; /* netlink portid of destroyer */ struct timer_list timeout; }; @@ -60,7 +60,7 @@ nf_ct_ecache_ext_add(struct nf_conn *ct, u16 ctmask, u16 expmask, gfp_t gfp) /* This structure is passed to event handler */ struct nf_ct_event { struct nf_conn *ct; - u32 pid; + u32 portid; int report; }; @@ -92,7 +92,7 @@ nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) static inline int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, - u32 pid, + u32 portid, int report) { int ret = 0; @@ -112,11 +112,11 @@ nf_conntrack_eventmask_report(unsigned int eventmask, if (nf_ct_is_confirmed(ct) && !nf_ct_is_dying(ct)) { struct nf_ct_event item = { .ct = ct, - .pid = e->pid ? e->pid : pid, + .portid = e->portid ? e->portid : portid, .report = report }; /* This is a resent of a destroy event? If so, skip missed */ - unsigned long missed = e->pid ? 0 : e->missed; + unsigned long missed = e->portid ? 0 : e->missed; if (!((eventmask | missed) & e->ctmask)) goto out_unlock; @@ -126,11 +126,11 @@ nf_conntrack_eventmask_report(unsigned int eventmask, spin_lock_bh(&ct->lock); if (ret < 0) { /* This is a destroy event that has been - * triggered by a process, we store the PID + * triggered by a process, we store the PORTID * to include it in the retransmission. */ if (eventmask & (1 << IPCT_DESTROY) && - e->pid == 0 && pid != 0) - e->pid = pid; + e->portid == 0 && portid != 0) + e->portid = portid; else e->missed |= eventmask; } else @@ -145,9 +145,9 @@ out_unlock: static inline int nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, - u32 pid, int report) + u32 portid, int report) { - return nf_conntrack_eventmask_report(1 << event, ct, pid, report); + return nf_conntrack_eventmask_report(1 << event, ct, portid, report); } static inline int @@ -158,7 +158,7 @@ nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) struct nf_exp_event { struct nf_conntrack_expect *exp; - u32 pid; + u32 portid; int report; }; @@ -172,7 +172,7 @@ extern void nf_ct_expect_unregister_notifier(struct net *net, struct nf_exp_even static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp, - u32 pid, + u32 portid, int report) { struct net *net = nf_ct_exp_net(exp); @@ -191,7 +191,7 @@ nf_ct_expect_event_report(enum ip_conntrack_expect_events event, if (e->expmask & (1 << event)) { struct nf_exp_event item = { .exp = exp, - .pid = pid, + .portid = portid, .report = report }; notify->fcn(1 << event, &item); @@ -216,20 +216,20 @@ static inline void nf_conntrack_event_cache(enum ip_conntrack_events event, struct nf_conn *ct) {} static inline int nf_conntrack_eventmask_report(unsigned int eventmask, struct nf_conn *ct, - u32 pid, + u32 portid, int report) { return 0; } static inline int nf_conntrack_event(enum ip_conntrack_events event, struct nf_conn *ct) { return 0; } static inline int nf_conntrack_event_report(enum ip_conntrack_events event, struct nf_conn *ct, - u32 pid, + u32 portid, int report) { return 0; } static inline void nf_ct_deliver_cached_events(const struct nf_conn *ct) {} static inline void nf_ct_expect_event(enum ip_conntrack_expect_events event, struct nf_conntrack_expect *exp) {} static inline void nf_ct_expect_event_report(enum ip_conntrack_expect_events e, struct nf_conntrack_expect *exp, - u32 pid, + u32 portid, int report) {} static inline int nf_conntrack_ecache_init(struct net *net) diff --git a/include/net/netlink.h b/include/net/netlink.h index 09175d5d1fbf..9690b0f6698a 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -217,19 +217,19 @@ struct nla_policy { /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request - * @pid: Netlink PID of requesting application + * @portid: Netlink PORTID of requesting application */ struct nl_info { struct nlmsghdr *nlh; struct net *nl_net; - u32 pid; + u32 portid; }; extern int netlink_rcv_skb(struct sk_buff *skb, int (*cb)(struct sk_buff *, struct nlmsghdr *)); extern int nlmsg_notify(struct sock *sk, struct sk_buff *skb, - u32 pid, unsigned int group, int report, + u32 portid, unsigned int group, int report, gfp_t flags); extern int nla_validate(const struct nlattr *head, @@ -444,7 +444,7 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) /** * nlmsg_put - Add a new netlink message to an skb * @skb: socket buffer to store message in - * @pid: netlink process id + * @portid: netlink process id * @seq: sequence number of message * @type: message type * @payload: length of message payload @@ -453,13 +453,13 @@ static inline int nlmsg_report(const struct nlmsghdr *nlh) * Returns NULL if the tailroom of the skb is insufficient to store * the message header and payload. */ -static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +static inline struct nlmsghdr *nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int payload, int flags) { if (unlikely(skb_tailroom(skb) < nlmsg_total_size(payload))) return NULL; - return __nlmsg_put(skb, pid, seq, type, payload, flags); + return __nlmsg_put(skb, portid, seq, type, payload, flags); } /** @@ -478,7 +478,7 @@ static inline struct nlmsghdr *nlmsg_put_answer(struct sk_buff *skb, int type, int payload, int flags) { - return nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + return nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, type, payload, flags); } @@ -563,18 +563,18 @@ static inline void nlmsg_free(struct sk_buff *skb) * nlmsg_multicast - multicast a netlink message * @sk: netlink socket to spread messages to * @skb: netlink message as socket buffer - * @pid: own netlink pid to avoid sending to yourself + * @portid: own netlink portid to avoid sending to yourself * @group: multicast group id * @flags: allocation flags */ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, - u32 pid, unsigned int group, gfp_t flags) + u32 portid, unsigned int group, gfp_t flags) { int err; NETLINK_CB(skb).dst_group = group; - err = netlink_broadcast(sk, skb, pid, group, flags); + err = netlink_broadcast(sk, skb, portid, group, flags); if (err > 0) err = 0; @@ -585,13 +585,13 @@ static inline int nlmsg_multicast(struct sock *sk, struct sk_buff *skb, * nlmsg_unicast - unicast a netlink message * @sk: netlink socket to spread message to * @skb: netlink message as socket buffer - * @pid: netlink pid of the destination socket + * @portid: netlink portid of the destination socket */ -static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 pid) +static inline int nlmsg_unicast(struct sock *sk, struct sk_buff *skb, u32 portid) { int err; - err = netlink_unicast(sk, skb, pid, MSG_DONTWAIT); + err = netlink_unicast(sk, skb, portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/include/net/nfc/nfc.h b/include/net/nfc/nfc.h index 6431f5e39022..6735909f826d 100644 --- a/include/net/nfc/nfc.h +++ b/include/net/nfc/nfc.h @@ -89,7 +89,7 @@ struct nfc_target { }; struct nfc_genl_data { - u32 poll_req_pid; + u32 poll_req_portid; struct mutex genl_data_mutex; }; diff --git a/include/net/xfrm.h b/include/net/xfrm.h index 36ad56ba648b..ee8613f01860 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -263,7 +263,7 @@ struct km_event { } data; u32 seq; - u32 pid; + u32 portid; u32 event; struct net *net; }; @@ -310,7 +310,7 @@ extern void km_state_notify(struct xfrm_state *x, const struct km_event *c); struct xfrm_tmpl; extern int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -extern void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +extern void km_state_expired(struct xfrm_state *x, int hard, u32 portid); extern int __xfrm_state_delete(struct xfrm_state *x); struct xfrm_state_afinfo { @@ -1554,7 +1554,7 @@ extern int xfrm_migrate(const struct xfrm_selector *sel, u8 dir, u8 type, #endif extern int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport); -extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid); +extern void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid); extern int km_report(struct net *net, u8 proto, struct xfrm_selector *sel, xfrm_address_t *addr); extern void xfrm_input_init(void); diff --git a/kernel/audit.c b/kernel/audit.c index a24aafa850ae..e0cf64a0ae2d 100644 --- a/kernel/audit.c +++ b/kernel/audit.c @@ -87,11 +87,11 @@ static int audit_failure = AUDIT_FAIL_PRINTK; /* * If audit records are to be written to the netlink socket, audit_pid - * contains the pid of the auditd process and audit_nlk_pid contains - * the pid to use to send netlink messages to that process. + * contains the pid of the auditd process and audit_nlk_portid contains + * the portid to use to send netlink messages to that process. */ int audit_pid; -static int audit_nlk_pid; +static int audit_nlk_portid; /* If audit_rate_limit is non-zero, limit the rate of sending audit records * to that number per second. This prevents DoS attacks, but results in @@ -401,7 +401,7 @@ static void kauditd_send_skb(struct sk_buff *skb) int err; /* take a reference in case we can't send it and we want to hold it */ skb_get(skb); - err = netlink_unicast(audit_sock, skb, audit_nlk_pid, 0); + err = netlink_unicast(audit_sock, skb, audit_nlk_portid, 0); if (err < 0) { BUG_ON(err != -ECONNREFUSED); /* Shouldn't happen */ printk(KERN_ERR "audit: *NO* daemon at audit_pid=%d\n", audit_pid); @@ -692,7 +692,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) status_set.backlog_limit = audit_backlog_limit; status_set.lost = atomic_read(&audit_lost); status_set.backlog = skb_queue_len(&audit_skb_queue); - audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_GET, 0, 0, + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_GET, 0, 0, &status_set, sizeof(status_set)); break; case AUDIT_SET: @@ -720,7 +720,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) sessionid, sid, 1); audit_pid = new_pid; - audit_nlk_pid = NETLINK_CB(skb).pid; + audit_nlk_portid = NETLINK_CB(skb).portid; } if (status_get->mask & AUDIT_STATUS_RATE_LIMIT) { err = audit_set_rate_limit(status_get->rate_limit, @@ -782,7 +782,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* fallthrough */ case AUDIT_LIST: - err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, + err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid, uid, seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; @@ -801,7 +801,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } /* fallthrough */ case AUDIT_LIST_RULES: - err = audit_receive_filter(msg_type, NETLINK_CB(skb).pid, + err = audit_receive_filter(msg_type, NETLINK_CB(skb).portid, uid, seq, data, nlmsg_len(nlh), loginuid, sessionid, sid); break; @@ -872,7 +872,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) memcpy(sig_data->ctx, ctx, len); security_release_secctx(ctx, len); } - audit_send_reply(NETLINK_CB(skb).pid, seq, AUDIT_SIGNAL_INFO, + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_SIGNAL_INFO, 0, 0, sig_data, sizeof(*sig_data) + len); kfree(sig_data); break; @@ -891,7 +891,7 @@ static int audit_receive_msg(struct sk_buff *skb, struct nlmsghdr *nlh) rcu_read_unlock(); if (!err) - audit_send_reply(NETLINK_CB(skb).pid, seq, + audit_send_reply(NETLINK_CB(skb).portid, seq, AUDIT_TTY_GET, 0, 0, &s, sizeof(s)); break; } diff --git a/kernel/taskstats.c b/kernel/taskstats.c index d0a32796550f..123793cd06f9 100644 --- a/kernel/taskstats.c +++ b/kernel/taskstats.c @@ -467,7 +467,7 @@ static int cmd_attr_register_cpumask(struct genl_info *info) rc = parse(info->attrs[TASKSTATS_CMD_ATTR_REGISTER_CPUMASK], mask); if (rc < 0) goto out; - rc = add_del_listener(info->snd_pid, mask, REGISTER); + rc = add_del_listener(info->snd_portid, mask, REGISTER); out: free_cpumask_var(mask); return rc; @@ -483,7 +483,7 @@ static int cmd_attr_deregister_cpumask(struct genl_info *info) rc = parse(info->attrs[TASKSTATS_CMD_ATTR_DEREGISTER_CPUMASK], mask); if (rc < 0) goto out; - rc = add_del_listener(info->snd_pid, mask, DEREGISTER); + rc = add_del_listener(info->snd_portid, mask, DEREGISTER); out: free_cpumask_var(mask); return rc; diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index 9ce430b4657c..ddf93efc133c 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -467,14 +467,14 @@ static int fdb_to_nud(const struct net_bridge_fdb_entry *fdb) static int fdb_fill_info(struct sk_buff *skb, const struct net_bridge *br, const struct net_bridge_fdb_entry *fdb, - u32 pid, u32 seq, int type, unsigned int flags) + u32 portid, u32 seq, int type, unsigned int flags) { unsigned long now = jiffies; struct nda_cacheinfo ci; struct nlmsghdr *nlh; struct ndmsg *ndm; - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*ndm), flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -555,7 +555,7 @@ int br_fdb_dump(struct sk_buff *skb, goto skip; if (fdb_fill_info(skb, br, f, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) < 0) diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c index fe41260fbf38..093f527276a3 100644 --- a/net/bridge/br_netlink.c +++ b/net/bridge/br_netlink.c @@ -127,7 +127,7 @@ static int br_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) goto skip; if (br_fill_ifinfo(skb, port, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) < 0) break; diff --git a/net/can/gw.c b/net/can/gw.c index b54d5e695b03..127879c55fb6 100644 --- a/net/can/gw.c +++ b/net/can/gw.c @@ -549,7 +549,7 @@ static int cgw_dump_jobs(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; - if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).pid, + if (cgw_put_job(skb, gwj, RTM_NEWROUTE, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) break; cont: diff --git a/net/core/fib_rules.c b/net/core/fib_rules.c index ab7db83236c9..58a4ba27dfe3 100644 --- a/net/core/fib_rules.c +++ b/net/core/fib_rules.c @@ -402,7 +402,7 @@ static int fib_nl_newrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (unresolved) ops->unresolved_rules++; - notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).pid); + notify_rule_change(RTM_NEWRULE, rule, ops, nlh, NETLINK_CB(skb).portid); flush_route_cache(ops); rules_ops_put(ops); return 0; @@ -500,7 +500,7 @@ static int fib_nl_delrule(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) } notify_rule_change(RTM_DELRULE, rule, ops, nlh, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid); if (ops->delete) ops->delete(rule); fib_rule_put(rule); @@ -601,7 +601,7 @@ static int dump_rules(struct sk_buff *skb, struct netlink_callback *cb, if (idx < cb->args[1]) goto skip; - if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).pid, + if (fib_nl_fill_rule(skb, rule, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWRULE, NLM_F_MULTI, ops) < 0) break; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 117afaf51268..c160adb38e5a 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -2102,7 +2102,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) if (tidx < tbl_skip || (family && tbl->family != family)) continue; - if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).pid, + if (neightbl_fill_info(skb, tbl, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) break; @@ -2115,7 +2115,7 @@ static int neightbl_dump_info(struct sk_buff *skb, struct netlink_callback *cb) goto next; if (neightbl_fill_param_info(skb, tbl, p, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGHTBL, NLM_F_MULTI) <= 0) @@ -2244,7 +2244,7 @@ static int neigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (idx < s_idx) goto next; - if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (neigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI) <= 0) { @@ -2281,7 +2281,7 @@ static int pneigh_dump_table(struct neigh_table *tbl, struct sk_buff *skb, continue; if (idx < s_idx) goto next; - if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).pid, + if (pneigh_fill_info(skb, n, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWNEIGH, NLM_F_MULTI, tbl) <= 0) { diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 508c5df4a09c..92575370d9f0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1081,7 +1081,7 @@ static int rtnl_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) goto cont; if (rtnl_fill_ifinfo(skb, dev, RTM_NEWLINK, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, 0, NLM_F_MULTI, ext_filter_mask) <= 0) @@ -1899,14 +1899,14 @@ static int rtnl_getlink(struct sk_buff *skb, struct nlmsghdr* nlh, void *arg) if (nskb == NULL) return -ENOBUFS; - err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).pid, + err = rtnl_fill_ifinfo(nskb, dev, RTM_NEWLINK, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, 0, ext_filter_mask); if (err < 0) { /* -EMSGSIZE implies BUG in if_nlmsg_size */ WARN_ON(err == -EMSGSIZE); kfree_skb(nskb); } else - err = rtnl_unicast(nskb, net, NETLINK_CB(skb).pid); + err = rtnl_unicast(nskb, net, NETLINK_CB(skb).portid); return err; } @@ -2180,9 +2180,9 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, { struct netdev_hw_addr *ha; int err; - u32 pid, seq; + u32 portid, seq; - pid = NETLINK_CB(cb->skb).pid; + portid = NETLINK_CB(cb->skb).portid; seq = cb->nlh->nlmsg_seq; list_for_each_entry(ha, &list->list, list) { @@ -2190,7 +2190,7 @@ static int nlmsg_populate_fdb(struct sk_buff *skb, goto skip; err = nlmsg_populate_fdb_fill(skb, dev, ha->addr, - pid, seq, 0, NTF_SELF); + portid, seq, 0, NTF_SELF); if (err < 0) return err; skip: diff --git a/net/dcb/dcbnl.c b/net/dcb/dcbnl.c index 81f2bb62dea3..70989e672304 100644 --- a/net/dcb/dcbnl.c +++ b/net/dcb/dcbnl.c @@ -1319,7 +1319,7 @@ nla_put_failure: } static int dcbnl_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid, int dcbx_ver) + u32 seq, u32 portid, int dcbx_ver) { struct net *net = dev_net(dev); struct sk_buff *skb; @@ -1330,7 +1330,7 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, if (!ops) return -EOPNOTSUPP; - skb = dcbnl_newmsg(event, cmd, pid, seq, 0, &nlh); + skb = dcbnl_newmsg(event, cmd, portid, seq, 0, &nlh); if (!skb) return -ENOBUFS; @@ -1353,16 +1353,16 @@ static int dcbnl_notify(struct net_device *dev, int event, int cmd, } int dcbnl_ieee_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid) + u32 seq, u32 portid) { - return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_IEEE); + return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_IEEE); } EXPORT_SYMBOL(dcbnl_ieee_notify); int dcbnl_cee_notify(struct net_device *dev, int event, int cmd, - u32 seq, u32 pid) + u32 seq, u32 portid) { - return dcbnl_notify(dev, event, cmd, seq, pid, DCB_CAP_DCBX_VER_CEE); + return dcbnl_notify(dev, event, cmd, seq, portid, DCB_CAP_DCBX_VER_CEE); } EXPORT_SYMBOL(dcbnl_cee_notify); @@ -1656,7 +1656,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) struct net_device *netdev; struct dcbmsg *dcb = nlmsg_data(nlh); struct nlattr *tb[DCB_ATTR_MAX + 1]; - u32 pid = skb ? NETLINK_CB(skb).pid : 0; + u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = -EINVAL; struct sk_buff *reply_skb; struct nlmsghdr *reply_nlh = NULL; @@ -1690,7 +1690,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) goto out; } - reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, pid, nlh->nlmsg_seq, + reply_skb = dcbnl_newmsg(fn->type, dcb->cmd, portid, nlh->nlmsg_seq, nlh->nlmsg_flags, &reply_nlh); if (!reply_skb) { ret = -ENOBUFS; @@ -1705,7 +1705,7 @@ static int dcb_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg) nlmsg_end(reply_skb, reply_nlh); - ret = rtnl_unicast(reply_skb, &init_net, pid); + ret = rtnl_unicast(reply_skb, &init_net, portid); out: dev_put(netdev); return ret; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index f3924ab1e019..7b7e561412d3 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -667,12 +667,12 @@ static inline size_t dn_ifaddr_nlmsg_size(void) } static int dn_nl_fill_ifaddr(struct sk_buff *skb, struct dn_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -753,7 +753,7 @@ static int dn_nl_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (dn_idx < skip_naddr) continue; - if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).pid, + if (dn_nl_fill_ifaddr(skb, ifa, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) < 0) goto done; diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index c855e8d0738f..b57419cc41a4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -1543,7 +1543,7 @@ static int dn_route_input(struct sk_buff *skb) return dn_route_input_slow(skb); } -static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +static int dn_rt_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { struct dn_route *rt = (struct dn_route *)skb_dst(skb); @@ -1551,7 +1551,7 @@ static int dn_rt_fill_info(struct sk_buff *skb, u32 pid, u32 seq, struct nlmsghdr *nlh; long expires; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); if (!nlh) return -EMSGSIZE; @@ -1685,7 +1685,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void if (rtm->rtm_flags & RTM_F_NOTIFY) rt->rt_flags |= RTCF_NOTIFY; - err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); + err = dn_rt_fill_info(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err == 0) goto out_free; @@ -1694,7 +1694,7 @@ static int dn_cache_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void goto out_free; } - return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).pid); + return rtnl_unicast(skb, &init_net, NETLINK_CB(in_skb).portid); out_free: kfree_skb(skb); @@ -1737,7 +1737,7 @@ int dn_cache_dump(struct sk_buff *skb, struct netlink_callback *cb) if (idx < s_idx) continue; skb_dst_set(skb, dst_clone(&rt->dst)); - if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (dn_rt_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, 1, NLM_F_MULTI) <= 0) { skb_dst_drop(skb); diff --git a/net/decnet/dn_table.c b/net/decnet/dn_table.c index 16c986ab1228..f968c1b58f47 100644 --- a/net/decnet/dn_table.c +++ b/net/decnet/dn_table.c @@ -291,14 +291,14 @@ static inline size_t dn_fib_nlmsg_size(struct dn_fib_info *fi) return payload; } -static int dn_fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +static int dn_fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, u8 scope, void *dst, int dst_len, struct dn_fib_info *fi, unsigned int flags) { struct rtmsg *rtm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -374,14 +374,14 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, struct nlmsghdr *nlh, struct netlink_skb_parms *req) { struct sk_buff *skb; - u32 pid = req ? req->pid : 0; + u32 portid = req ? req->portid : 0; int err = -ENOBUFS; skb = nlmsg_new(dn_fib_nlmsg_size(DN_FIB_INFO(f)), GFP_KERNEL); if (skb == NULL) goto errout; - err = dn_fib_dump_info(skb, pid, nlh->nlmsg_seq, event, tb_id, + err = dn_fib_dump_info(skb, portid, nlh->nlmsg_seq, event, tb_id, f->fn_type, f->fn_scope, &f->fn_key, z, DN_FIB_INFO(f), 0); if (err < 0) { @@ -390,7 +390,7 @@ static void dn_rtmsg_fib(int event, struct dn_fib_node *f, int z, u32 tb_id, kfree_skb(skb); goto errout; } - rtnl_notify(skb, &init_net, pid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); + rtnl_notify(skb, &init_net, portid, RTNLGRP_DECnet_ROUTE, nlh, GFP_KERNEL); return; errout: if (err < 0) @@ -411,7 +411,7 @@ static __inline__ int dn_hash_dump_bucket(struct sk_buff *skb, continue; if (f->fn_state & DN_S_ZOMBIE) continue; - if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).pid, + if (dn_fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->n, diff --git a/net/ieee802154/nl-mac.c b/net/ieee802154/nl-mac.c index 1e9917124e75..96bb08abece2 100644 --- a/net/ieee802154/nl-mac.c +++ b/net/ieee802154/nl-mac.c @@ -246,7 +246,7 @@ nla_put_failure: } EXPORT_SYMBOL(ieee802154_nl_start_confirm); -static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 pid, +static int ieee802154_nl_fill_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev) { void *hdr; @@ -534,7 +534,7 @@ static int ieee802154_list_iface(struct sk_buff *skb, if (!msg) goto out_dev; - rc = ieee802154_nl_fill_iface(msg, info->snd_pid, info->snd_seq, + rc = ieee802154_nl_fill_iface(msg, info->snd_portid, info->snd_seq, 0, dev); if (rc < 0) goto out_free; @@ -565,7 +565,7 @@ static int ieee802154_dump_iface(struct sk_buff *skb, if (idx < s_idx || (dev->type != ARPHRD_IEEE802154)) goto cont; - if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).pid, + if (ieee802154_nl_fill_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) break; cont: diff --git a/net/ieee802154/nl-phy.c b/net/ieee802154/nl-phy.c index d54be34cca94..22b1a7058fd3 100644 --- a/net/ieee802154/nl-phy.c +++ b/net/ieee802154/nl-phy.c @@ -35,7 +35,7 @@ #include "ieee802154.h" -static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 pid, +static int ieee802154_nl_fill_phy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct wpan_phy *phy) { void *hdr; @@ -105,7 +105,7 @@ static int ieee802154_list_phy(struct sk_buff *skb, if (!msg) goto out_dev; - rc = ieee802154_nl_fill_phy(msg, info->snd_pid, info->snd_seq, + rc = ieee802154_nl_fill_phy(msg, info->snd_portid, info->snd_seq, 0, phy); if (rc < 0) goto out_free; @@ -138,7 +138,7 @@ static int ieee802154_dump_phy_iter(struct wpan_phy *phy, void *_data) return 0; rc = ieee802154_nl_fill_phy(data->skb, - NETLINK_CB(data->cb->skb).pid, + NETLINK_CB(data->cb->skb).portid, data->cb->nlh->nlmsg_seq, NLM_F_MULTI, phy); diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index f14eff506743..7b00556e184b 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -311,7 +311,7 @@ int inet_addr_onlink(struct in_device *in_dev, __be32 a, __be32 b) } static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, - int destroy, struct nlmsghdr *nlh, u32 pid) + int destroy, struct nlmsghdr *nlh, u32 portid) { struct in_ifaddr *promote = NULL; struct in_ifaddr *ifa, *ifa1 = *ifap; @@ -345,7 +345,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, inet_hash_remove(ifa); *ifap1 = ifa->ifa_next; - rtmsg_ifa(RTM_DELADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa); inet_free_ifa(ifa); @@ -382,7 +382,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, is valid, it will try to restore deleted routes... Grr. So that, this order is correct. */ - rtmsg_ifa(RTM_DELADDR, ifa1, nlh, pid); + rtmsg_ifa(RTM_DELADDR, ifa1, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_DOWN, ifa1); if (promote) { @@ -395,7 +395,7 @@ static void __inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } promote->ifa_flags &= ~IFA_F_SECONDARY; - rtmsg_ifa(RTM_NEWADDR, promote, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, promote, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, promote); for (ifa = next_sec; ifa; ifa = ifa->ifa_next) { @@ -417,7 +417,7 @@ static void inet_del_ifa(struct in_device *in_dev, struct in_ifaddr **ifap, } static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) + u32 portid) { struct in_device *in_dev = ifa->ifa_dev; struct in_ifaddr *ifa1, **ifap, **last_primary; @@ -464,7 +464,7 @@ static int __inet_insert_ifa(struct in_ifaddr *ifa, struct nlmsghdr *nlh, /* Send message first, then call notifier. Notifier will trigger FIB update, so that listeners of netlink will know about new ifaddr */ - rtmsg_ifa(RTM_NEWADDR, ifa, nlh, pid); + rtmsg_ifa(RTM_NEWADDR, ifa, nlh, portid); blocking_notifier_call_chain(&inetaddr_chain, NETDEV_UP, ifa); return 0; @@ -563,7 +563,7 @@ static int inet_rtm_deladdr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg !inet_ifa_match(nla_get_be32(tb[IFA_ADDRESS]), ifa))) continue; - __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).pid); + __inet_del_ifa(in_dev, ifap, 1, nlh, NETLINK_CB(skb).portid); return 0; } @@ -649,7 +649,7 @@ static int inet_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, void *arg if (IS_ERR(ifa)) return PTR_ERR(ifa); - return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).pid); + return __inet_insert_ifa(ifa, nlh, NETLINK_CB(skb).portid); } /* @@ -1246,12 +1246,12 @@ static size_t inet_nlmsg_size(void) } static int inet_fill_ifaddr(struct sk_buff *skb, struct in_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), flags); if (nlh == NULL) return -EMSGSIZE; @@ -1313,7 +1313,7 @@ static int inet_dump_ifaddr(struct sk_buff *skb, struct netlink_callback *cb) if (ip_idx < s_ip_idx) continue; if (inet_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI) <= 0) { rcu_read_unlock(); @@ -1335,7 +1335,7 @@ done: } static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, - u32 pid) + u32 portid) { struct sk_buff *skb; u32 seq = nlh ? nlh->nlmsg_seq : 0; @@ -1347,14 +1347,14 @@ static void rtmsg_ifa(int event, struct in_ifaddr *ifa, struct nlmsghdr *nlh, if (skb == NULL) goto errout; - err = inet_fill_ifaddr(skb, ifa, pid, seq, event, 0); + err = inet_fill_ifaddr(skb, ifa, portid, seq, event, 0); if (err < 0) { /* -EMSGSIZE implies BUG in inet_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, pid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); + rtnl_notify(skb, net, portid, RTNLGRP_IPV4_IFADDR, nlh, GFP_KERNEL); return; errout: if (err < 0) diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c index dc1f10ed1872..68c93d1bb03a 100644 --- a/net/ipv4/fib_frontend.c +++ b/net/ipv4/fib_frontend.c @@ -557,7 +557,7 @@ static int rtm_to_fib_config(struct net *net, struct sk_buff *skb, cfg->fc_flags = rtm->rtm_flags; cfg->fc_nlflags = nlh->nlmsg_flags; - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = net; @@ -955,7 +955,7 @@ static void nl_fib_input(struct sk_buff *skb) struct fib_result_nl *frn; struct nlmsghdr *nlh; struct fib_table *tb; - u32 pid; + u32 portid; net = sock_net(skb->sk); nlh = nlmsg_hdr(skb); @@ -973,10 +973,10 @@ static void nl_fib_input(struct sk_buff *skb) nl_fib_lookup(frn, tb); - pid = NETLINK_CB(skb).pid; /* pid of sending process */ - NETLINK_CB(skb).pid = 0; /* from kernel */ + portid = NETLINK_CB(skb).portid; /* pid of sending process */ + NETLINK_CB(skb).portid = 0; /* from kernel */ NETLINK_CB(skb).dst_group = 0; /* unicast */ - netlink_unicast(net->ipv4.fibnl, skb, pid, MSG_DONTWAIT); + netlink_unicast(net->ipv4.fibnl, skb, portid, MSG_DONTWAIT); } static int __net_init nl_fib_lookup_init(struct net *net) diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index da80dc14cc76..3509065e409a 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -391,7 +391,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, if (skb == NULL) goto errout; - err = fib_dump_info(skb, info->pid, seq, event, tb_id, + err = fib_dump_info(skb, info->portid, seq, event, tb_id, fa->fa_type, key, dst_len, fa->fa_tos, fa->fa_info, nlm_flags); if (err < 0) { @@ -400,7 +400,7 @@ void rtmsg_fib(int event, __be32 key, struct fib_alias *fa, kfree_skb(skb); goto errout; } - rtnl_notify(skb, info->nl_net, info->pid, RTNLGRP_IPV4_ROUTE, + rtnl_notify(skb, info->nl_net, info->portid, RTNLGRP_IPV4_ROUTE, info->nlh, GFP_KERNEL); return; errout: @@ -989,14 +989,14 @@ failure: return ERR_PTR(err); } -int fib_dump_info(struct sk_buff *skb, u32 pid, u32 seq, int event, +int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event, u32 tb_id, u8 type, __be32 dst, int dst_len, u8 tos, struct fib_info *fi, unsigned int flags) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), flags); if (nlh == NULL) return -EMSGSIZE; diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 8d766106b540..31d771ca9a70 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -1873,7 +1873,7 @@ static int fn_trie_dump_fa(t_key key, int plen, struct list_head *fah, continue; } - if (fib_dump_info(skb, NETLINK_CB(cb->skb).pid, + if (fib_dump_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE, tb->tb_id, diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c index 8bc005b1435f..535584c00f91 100644 --- a/net/ipv4/inet_diag.c +++ b/net/ipv4/inet_diag.c @@ -70,7 +70,7 @@ static inline void inet_diag_unlock_handler( int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { const struct inet_sock *inet = inet_sk(sk); @@ -84,7 +84,7 @@ int inet_sk_diag_fill(struct sock *sk, struct inet_connection_sock *icsk, handler = inet_diag_table[req->sdiag_protocol]; BUG_ON(handler == NULL); - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -201,23 +201,23 @@ EXPORT_SYMBOL_GPL(inet_sk_diag_fill); static int inet_csk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *req, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { return inet_sk_diag_fill(sk, inet_csk(sk), - skb, req, user_ns, pid, seq, nlmsg_flags, unlh); + skb, req, user_ns, portid, seq, nlmsg_flags, unlh); } static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, struct sk_buff *skb, struct inet_diag_req_v2 *req, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { long tmo; struct inet_diag_msg *r; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), nlmsg_flags); if (!nlh) return -EMSGSIZE; @@ -260,14 +260,14 @@ static int inet_twsk_diag_fill(struct inet_timewait_sock *tw, static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct inet_diag_req_v2 *r, struct user_namespace *user_ns, - u32 pid, u32 seq, u16 nlmsg_flags, + u32 portid, u32 seq, u16 nlmsg_flags, const struct nlmsghdr *unlh) { if (sk->sk_state == TCP_TIME_WAIT) return inet_twsk_diag_fill((struct inet_timewait_sock *)sk, - skb, r, pid, seq, nlmsg_flags, + skb, r, portid, seq, nlmsg_flags, unlh); - return inet_csk_diag_fill(sk, skb, r, user_ns, pid, seq, nlmsg_flags, unlh); + return inet_csk_diag_fill(sk, skb, r, user_ns, portid, seq, nlmsg_flags, unlh); } int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_skb, @@ -316,14 +316,14 @@ int inet_diag_dump_one_icsk(struct inet_hashinfo *hashinfo, struct sk_buff *in_s err = sk_diag_fill(sk, rep, req, sk_user_ns(NETLINK_CB(in_skb).ssk), - NETLINK_CB(in_skb).pid, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { WARN_ON(err == -EMSGSIZE); nlmsg_free(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; @@ -557,7 +557,7 @@ static int inet_csk_diag_dump(struct sock *sk, return inet_csk_diag_fill(sk, skb, r, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -592,14 +592,14 @@ static int inet_twsk_diag_dump(struct inet_timewait_sock *tw, } return inet_twsk_diag_fill(tw, skb, r, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct request_sock *req, struct user_namespace *user_ns, - u32 pid, u32 seq, + u32 portid, u32 seq, const struct nlmsghdr *unlh) { const struct inet_request_sock *ireq = inet_rsk(req); @@ -608,7 +608,7 @@ static int inet_diag_fill_req(struct sk_buff *skb, struct sock *sk, struct nlmsghdr *nlh; long tmo; - nlh = nlmsg_put(skb, pid, seq, unlh->nlmsg_type, sizeof(*r), + nlh = nlmsg_put(skb, portid, seq, unlh->nlmsg_type, sizeof(*r), NLM_F_MULTI); if (!nlh) return -EMSGSIZE; @@ -711,7 +711,7 @@ static int inet_diag_dump_reqs(struct sk_buff *skb, struct sock *sk, err = inet_diag_fill_req(skb, sk, req, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh); if (err < 0) { cb->args[3] = j + 1; diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c index 8aa7a4cf9139..1daa95c2a0ba 100644 --- a/net/ipv4/ipmr.c +++ b/net/ipv4/ipmr.c @@ -626,7 +626,7 @@ static void ipmr_destroy_unres(struct mr_table *mrt, struct mfc_cache *c) e->error = -ETIMEDOUT; memset(&e->msg, 0, sizeof(e->msg)); - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { kfree_skb(skb); } @@ -870,7 +870,7 @@ static void ipmr_cache_resolve(struct net *net, struct mr_table *mrt, memset(&e->msg, 0, sizeof(e->msg)); } - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else { ip_mr_forward(net, mrt, skb, c, 0); } @@ -2117,12 +2117,12 @@ int ipmr_get_route(struct net *net, struct sk_buff *skb, } static int ipmr_fill_mroute(struct mr_table *mrt, struct sk_buff *skb, - u32 pid, u32 seq, struct mfc_cache *c) + u32 portid, u32 seq, struct mfc_cache *c) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2176,7 +2176,7 @@ static int ipmr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (e < s_e) goto next_entry; if (ipmr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc) < 0) goto done; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index d39edf16d607..940f4f4cb201 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2136,7 +2136,7 @@ struct rtable *ip_route_output_flow(struct net *net, struct flowi4 *flp4, EXPORT_SYMBOL_GPL(ip_route_output_flow); static int rt_fill_info(struct net *net, __be32 dst, __be32 src, - struct flowi4 *fl4, struct sk_buff *skb, u32 pid, + struct flowi4 *fl4, struct sk_buff *skb, u32 portid, u32 seq, int event, int nowait, unsigned int flags) { struct rtable *rt = skb_rtable(skb); @@ -2146,7 +2146,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 error; u32 metrics[RTAX_MAX]; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*r), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*r), flags); if (nlh == NULL) return -EMSGSIZE; @@ -2306,12 +2306,12 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, void rt->rt_flags |= RTCF_NOTIFY; err = rt_fill_info(net, dst, src, &fl4, skb, - NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWROUTE, 0, 0); if (err <= 0) goto errout_free; - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; diff --git a/net/ipv4/tcp_metrics.c b/net/ipv4/tcp_metrics.c index 988edb63ee73..4c752a6e0bcd 100644 --- a/net/ipv4/tcp_metrics.c +++ b/net/ipv4/tcp_metrics.c @@ -803,7 +803,7 @@ static int tcp_metrics_dump_info(struct sk_buff *skb, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &tcp_metrics_nl_family, NLM_F_MULTI, TCP_METRICS_CMD_GET); if (!hdr) diff --git a/net/ipv4/udp_diag.c b/net/ipv4/udp_diag.c index d2f336ea82ca..505b30ad9182 100644 --- a/net/ipv4/udp_diag.c +++ b/net/ipv4/udp_diag.c @@ -26,7 +26,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, return inet_sk_diag_fill(sk, NULL, skb, req, sk_user_ns(NETLINK_CB(cb->skb).ssk), - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, cb->nlh); } @@ -72,14 +72,14 @@ static int udp_dump_one(struct udp_table *tbl, struct sk_buff *in_skb, err = inet_sk_diag_fill(sk, NULL, rep, req, sk_user_ns(NETLINK_CB(in_skb).ssk), - NETLINK_CB(in_skb).pid, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, nlh); if (err < 0) { WARN_ON(err == -EMSGSIZE); kfree_skb(rep); goto out; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 572cb660837b..1237d5d037d8 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -3534,12 +3534,12 @@ static inline int inet6_ifaddr_msgsize(void) } static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; u32 preferred, valid; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3577,7 +3577,7 @@ static int inet6_fill_ifaddr(struct sk_buff *skb, struct inet6_ifaddr *ifa, } static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, - u32 pid, u32 seq, int event, u16 flags) + u32 portid, u32 seq, int event, u16 flags) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -3586,7 +3586,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, if (ipv6_addr_scope(&ifmca->mca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3602,7 +3602,7 @@ static int inet6_fill_ifmcaddr(struct sk_buff *skb, struct ifmcaddr6 *ifmca, } static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct nlmsghdr *nlh; u8 scope = RT_SCOPE_UNIVERSE; @@ -3611,7 +3611,7 @@ static int inet6_fill_ifacaddr(struct sk_buff *skb, struct ifacaddr6 *ifaca, if (ipv6_addr_scope(&ifaca->aca_addr) & IFA_SITE) scope = RT_SCOPE_SITE; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(struct ifaddrmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrmsg), flags); if (nlh == NULL) return -EMSGSIZE; @@ -3652,7 +3652,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (++ip_idx < s_ip_idx) continue; err = inet6_fill_ifaddr(skb, ifa, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR, NLM_F_MULTI); @@ -3668,7 +3668,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifmcaddr(skb, ifmca, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETMULTICAST, NLM_F_MULTI); @@ -3683,7 +3683,7 @@ static int in6_dump_addrs(struct inet6_dev *idev, struct sk_buff *skb, if (ip_idx < s_ip_idx) continue; err = inet6_fill_ifacaddr(skb, ifaca, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_GETANYCAST, NLM_F_MULTI); @@ -3805,7 +3805,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, goto errout_ifa; } - err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).pid, + err = inet6_fill_ifaddr(skb, ifa, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDR, 0); if (err < 0) { /* -EMSGSIZE implies BUG in inet6_ifaddr_msgsize() */ @@ -3813,7 +3813,7 @@ static int inet6_rtm_getaddr(struct sk_buff *in_skb, struct nlmsghdr *nlh, kfree_skb(skb); goto errout_ifa; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout_ifa: in6_ifa_put(ifa); errout: @@ -4015,14 +4015,14 @@ static int inet6_fill_link_af(struct sk_buff *skb, const struct net_device *dev) } static int inet6_fill_ifinfo(struct sk_buff *skb, struct inet6_dev *idev, - u32 pid, u32 seq, int event, unsigned int flags) + u32 portid, u32 seq, int event, unsigned int flags) { struct net_device *dev = idev->dev; struct ifinfomsg *hdr; struct nlmsghdr *nlh; void *protoinfo; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*hdr), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*hdr), flags); if (nlh == NULL) return -EMSGSIZE; @@ -4080,7 +4080,7 @@ static int inet6_dump_ifinfo(struct sk_buff *skb, struct netlink_callback *cb) if (!idev) goto cont; if (inet6_fill_ifinfo(skb, idev, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWLINK, NLM_F_MULTI) <= 0) goto out; @@ -4128,14 +4128,14 @@ static inline size_t inet6_prefix_nlmsg_size(void) } static int inet6_fill_prefix(struct sk_buff *skb, struct inet6_dev *idev, - struct prefix_info *pinfo, u32 pid, u32 seq, + struct prefix_info *pinfo, u32 portid, u32 seq, int event, unsigned int flags) { struct prefixmsg *pmsg; struct nlmsghdr *nlh; struct prefix_cacheinfo ci; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*pmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*pmsg), flags); if (nlh == NULL) return -EMSGSIZE; diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index eb6a63632d3c..0b0171570d17 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -470,10 +470,10 @@ static void ip6addrlbl_putmsg(struct nlmsghdr *nlh, static int ip6addrlbl_fill(struct sk_buff *skb, struct ip6addrlbl_entry *p, u32 lseq, - u32 pid, u32 seq, int event, + u32 portid, u32 seq, int event, unsigned int flags) { - struct nlmsghdr *nlh = nlmsg_put(skb, pid, seq, event, + struct nlmsghdr *nlh = nlmsg_put(skb, portid, seq, event, sizeof(struct ifaddrlblmsg), flags); if (!nlh) return -EMSGSIZE; @@ -503,7 +503,7 @@ static int ip6addrlbl_dump(struct sk_buff *skb, struct netlink_callback *cb) net_eq(ip6addrlbl_net(p), net)) { if ((err = ip6addrlbl_fill(skb, p, ip6addrlbl_table.seq, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDRLABEL, NLM_F_MULTI)) <= 0) @@ -574,7 +574,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, } err = ip6addrlbl_fill(skb, p, lseq, - NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, RTM_NEWADDRLABEL, 0); ip6addrlbl_put(p); @@ -585,7 +585,7 @@ static int ip6addrlbl_get(struct sk_buff *in_skb, struct nlmsghdr* nlh, goto out; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); out: return err; } diff --git a/net/ipv6/ip6mr.c b/net/ipv6/ip6mr.c index 4532973f0dd4..08ea3f0b6e55 100644 --- a/net/ipv6/ip6mr.c +++ b/net/ipv6/ip6mr.c @@ -838,7 +838,7 @@ static void ip6mr_destroy_unres(struct mr6_table *mrt, struct mfc6_cache *c) nlh->nlmsg_len = NLMSG_LENGTH(sizeof(struct nlmsgerr)); skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -ETIMEDOUT; - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else kfree_skb(skb); } @@ -1052,7 +1052,7 @@ static void ip6mr_cache_resolve(struct net *net, struct mr6_table *mrt, skb_trim(skb, nlh->nlmsg_len); ((struct nlmsgerr *)NLMSG_DATA(nlh))->error = -EMSGSIZE; } - rtnl_unicast(skb, net, NETLINK_CB(skb).pid); + rtnl_unicast(skb, net, NETLINK_CB(skb).portid); } else ip6_mr_forward(net, mrt, skb, c); } @@ -2202,12 +2202,12 @@ int ip6mr_get_route(struct net *net, } static int ip6mr_fill_mroute(struct mr6_table *mrt, struct sk_buff *skb, - u32 pid, u32 seq, struct mfc6_cache *c) + u32 portid, u32 seq, struct mfc6_cache *c) { struct nlmsghdr *nlh; struct rtmsg *rtm; - nlh = nlmsg_put(skb, pid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*rtm), NLM_F_MULTI); if (nlh == NULL) return -EMSGSIZE; @@ -2260,7 +2260,7 @@ static int ip6mr_rtm_dumproute(struct sk_buff *skb, struct netlink_callback *cb) if (e < s_e) goto next_entry; if (ip6mr_fill_mroute(mrt, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, mfc) < 0) goto done; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 339d921cf3b6..a81c6790a648 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1874,7 +1874,7 @@ static struct rt6_info *rt6_add_route_info(struct net *net, .fc_dst_len = prefixlen, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_ROUTEINFO | RTF_UP | RTF_PREF(pref), - .fc_nlinfo.pid = 0, + .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = net, }; @@ -1924,7 +1924,7 @@ struct rt6_info *rt6_add_dflt_router(const struct in6_addr *gwaddr, .fc_ifindex = dev->ifindex, .fc_flags = RTF_GATEWAY | RTF_ADDRCONF | RTF_DEFAULT | RTF_UP | RTF_EXPIRES | RTF_PREF(pref), - .fc_nlinfo.pid = 0, + .fc_nlinfo.portid = 0, .fc_nlinfo.nlh = NULL, .fc_nlinfo.nl_net = dev_net(dev), }; @@ -2285,7 +2285,7 @@ static int rtm_to_fib6_config(struct sk_buff *skb, struct nlmsghdr *nlh, if (rtm->rtm_type == RTN_LOCAL) cfg->fc_flags |= RTF_LOCAL; - cfg->fc_nlinfo.pid = NETLINK_CB(skb).pid; + cfg->fc_nlinfo.portid = NETLINK_CB(skb).portid; cfg->fc_nlinfo.nlh = nlh; cfg->fc_nlinfo.nl_net = sock_net(skb->sk); @@ -2376,7 +2376,7 @@ static inline size_t rt6_nlmsg_size(void) static int rt6_fill_node(struct net *net, struct sk_buff *skb, struct rt6_info *rt, struct in6_addr *dst, struct in6_addr *src, - int iif, int type, u32 pid, u32 seq, + int iif, int type, u32 portid, u32 seq, int prefix, int nowait, unsigned int flags) { struct rtmsg *rtm; @@ -2392,7 +2392,7 @@ static int rt6_fill_node(struct net *net, } } - nlh = nlmsg_put(skb, pid, seq, type, sizeof(*rtm), flags); + nlh = nlmsg_put(skb, portid, seq, type, sizeof(*rtm), flags); if (!nlh) return -EMSGSIZE; @@ -2537,7 +2537,7 @@ int rt6_dump_route(struct rt6_info *rt, void *p_arg) return rt6_fill_node(arg->net, arg->skb, rt, NULL, NULL, 0, RTM_NEWROUTE, - NETLINK_CB(arg->cb->skb).pid, arg->cb->nlh->nlmsg_seq, + NETLINK_CB(arg->cb->skb).portid, arg->cb->nlh->nlmsg_seq, prefix, 0, NLM_F_MULTI); } @@ -2617,14 +2617,14 @@ static int inet6_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr* nlh, void skb_dst_set(skb, &rt->dst); err = rt6_fill_node(net, skb, rt, &fl6.daddr, &fl6.saddr, iif, - RTM_NEWROUTE, NETLINK_CB(in_skb).pid, + RTM_NEWROUTE, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, 0, 0); if (err < 0) { kfree_skb(skb); goto errout; } - err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).pid); + err = rtnl_unicast(skb, net, NETLINK_CB(in_skb).portid); errout: return err; } @@ -2644,14 +2644,14 @@ void inet6_rt_notify(int event, struct rt6_info *rt, struct nl_info *info) goto errout; err = rt6_fill_node(net, skb, rt, NULL, NULL, 0, - event, info->pid, seq, 0, 0, 0); + event, info->portid, seq, 0, 0, 0); if (err < 0) { /* -EMSGSIZE implies BUG in rt6_nlmsg_size() */ WARN_ON(err == -EMSGSIZE); kfree_skb(skb); goto errout; } - rtnl_notify(skb, net, info->pid, RTNLGRP_IPV6_ROUTE, + rtnl_notify(skb, net, info->portid, RTNLGRP_IPV6_ROUTE, info->nlh, gfp_any()); return; errout: diff --git a/net/irda/irnetlink.c b/net/irda/irnetlink.c index 6c7c4b92e4f8..c32971269280 100644 --- a/net/irda/irnetlink.c +++ b/net/irda/irnetlink.c @@ -100,7 +100,7 @@ static int irda_nl_get_mode(struct sk_buff *skb, struct genl_info *info) goto err_out; } - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &irda_nl_family, 0, IRDA_NL_CMD_GET_MODE); if (hdr == NULL) { ret = -EMSGSIZE; diff --git a/net/key/af_key.c b/net/key/af_key.c index 334f93b8cfcb..2ca7d7f6861c 100644 --- a/net/key/af_key.c +++ b/net/key/af_key.c @@ -54,7 +54,7 @@ struct pfkey_sock { struct { uint8_t msg_version; - uint32_t msg_pid; + uint32_t msg_portid; int (*dump)(struct pfkey_sock *sk); void (*done)(struct pfkey_sock *sk); union { @@ -1447,7 +1447,7 @@ static int key_notify_sa(struct xfrm_state *x, const struct km_event *c) hdr->sadb_msg_errno = 0; hdr->sadb_msg_reserved = 0; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; pfkey_broadcast(skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xs_net(x)); @@ -1486,7 +1486,7 @@ static int pfkey_add(struct sock *sk, struct sk_buff *skb, const struct sadb_msg else c.event = XFRM_MSG_UPDSA; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; km_state_notify(x, &c); out: xfrm_state_put(x); @@ -1523,7 +1523,7 @@ static int pfkey_delete(struct sock *sk, struct sk_buff *skb, const struct sadb_ goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_DELSA; km_state_notify(x, &c); out: @@ -1701,7 +1701,7 @@ static int key_notify_sa_flush(const struct km_event *c) hdr->sadb_msg_satype = pfkey_proto2satype(c->data.proto); hdr->sadb_msg_type = SADB_FLUSH; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -1736,7 +1736,7 @@ static int pfkey_flush(struct sock *sk, struct sk_buff *skb, const struct sadb_m c.data.proto = proto; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.event = XFRM_MSG_FLUSHSA; c.net = net; km_state_notify(NULL, &c); @@ -1764,7 +1764,7 @@ static int dump_sa(struct xfrm_state *x, int count, void *ptr) out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_reserved = 0; out_hdr->sadb_msg_seq = count + 1; - out_hdr->sadb_msg_pid = pfk->dump.msg_pid; + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, @@ -1798,7 +1798,7 @@ static int pfkey_dump(struct sock *sk, struct sk_buff *skb, const struct sadb_ms return -EINVAL; pfk->dump.msg_version = hdr->sadb_msg_version; - pfk->dump.msg_pid = hdr->sadb_msg_pid; + pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sa; pfk->dump.done = pfkey_dump_sa_done; xfrm_state_walk_init(&pfk->dump.u.state, proto); @@ -2157,7 +2157,7 @@ static int key_notify_policy(struct xfrm_policy *xp, int dir, const struct km_ev out_hdr->sadb_msg_type = event2poltype(c->event); out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = c->seq; - out_hdr->sadb_msg_pid = c->pid; + out_hdr->sadb_msg_pid = c->portid; pfkey_broadcast(out_skb, GFP_ATOMIC, BROADCAST_ALL, NULL, xp_net(xp)); return 0; @@ -2272,7 +2272,7 @@ static int pfkey_spdadd(struct sock *sk, struct sk_buff *skb, const struct sadb_ c.event = XFRM_MSG_NEWPOLICY; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); xfrm_pol_put(xp); @@ -2351,7 +2351,7 @@ static int pfkey_spddelete(struct sock *sk, struct sk_buff *skb, const struct sa goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.data.byid = 0; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, pol->sadb_x_policy_dir-1, &c); @@ -2597,7 +2597,7 @@ static int pfkey_spdget(struct sock *sk, struct sk_buff *skb, const struct sadb_ if (err) goto out; c.seq = hdr->sadb_msg_seq; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.data.byid = 1; c.event = XFRM_MSG_DELPOLICY; km_policy_notify(xp, dir, &c); @@ -2634,7 +2634,7 @@ static int dump_sp(struct xfrm_policy *xp, int dir, int count, void *ptr) out_hdr->sadb_msg_satype = SADB_SATYPE_UNSPEC; out_hdr->sadb_msg_errno = 0; out_hdr->sadb_msg_seq = count + 1; - out_hdr->sadb_msg_pid = pfk->dump.msg_pid; + out_hdr->sadb_msg_pid = pfk->dump.msg_portid; if (pfk->dump.skb) pfkey_broadcast(pfk->dump.skb, GFP_ATOMIC, BROADCAST_ONE, @@ -2663,7 +2663,7 @@ static int pfkey_spddump(struct sock *sk, struct sk_buff *skb, const struct sadb return -EBUSY; pfk->dump.msg_version = hdr->sadb_msg_version; - pfk->dump.msg_pid = hdr->sadb_msg_pid; + pfk->dump.msg_portid = hdr->sadb_msg_pid; pfk->dump.dump = pfkey_dump_sp; pfk->dump.done = pfkey_dump_sp_done; xfrm_policy_walk_init(&pfk->dump.u.policy, XFRM_POLICY_TYPE_MAIN); @@ -2682,7 +2682,7 @@ static int key_notify_policy_flush(const struct km_event *c) hdr = (struct sadb_msg *) skb_put(skb_out, sizeof(struct sadb_msg)); hdr->sadb_msg_type = SADB_X_SPDFLUSH; hdr->sadb_msg_seq = c->seq; - hdr->sadb_msg_pid = c->pid; + hdr->sadb_msg_pid = c->portid; hdr->sadb_msg_version = PF_KEY_V2; hdr->sadb_msg_errno = (uint8_t) 0; hdr->sadb_msg_len = (sizeof(struct sadb_msg) / sizeof(uint64_t)); @@ -2711,7 +2711,7 @@ static int pfkey_spdflush(struct sock *sk, struct sk_buff *skb, const struct sad c.data.type = XFRM_POLICY_TYPE_MAIN; c.event = XFRM_MSG_FLUSHPOLICY; - c.pid = hdr->sadb_msg_pid; + c.portid = hdr->sadb_msg_pid; c.seq = hdr->sadb_msg_seq; c.net = net; km_policy_notify(NULL, 0, &c); diff --git a/net/l2tp/l2tp_netlink.c b/net/l2tp/l2tp_netlink.c index d71cd9229a47..6ec3f67ad3f1 100644 --- a/net/l2tp/l2tp_netlink.c +++ b/net/l2tp/l2tp_netlink.c @@ -78,7 +78,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) goto out; } - hdr = genlmsg_put(msg, info->snd_pid, info->snd_seq, + hdr = genlmsg_put(msg, info->snd_portid, info->snd_seq, &l2tp_nl_family, 0, L2TP_CMD_NOOP); if (IS_ERR(hdr)) { ret = PTR_ERR(hdr); @@ -87,7 +87,7 @@ static int l2tp_nl_cmd_noop(struct sk_buff *skb, struct genl_info *info) genlmsg_end(msg, hdr); - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -235,7 +235,7 @@ out: return ret; } -static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, +static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_tunnel *tunnel) { void *hdr; @@ -248,7 +248,7 @@ static int l2tp_nl_tunnel_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, struct l2tp_stats stats; unsigned int start; - hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_TUNNEL_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -359,12 +359,12 @@ static int l2tp_nl_cmd_tunnel_get(struct sk_buff *skb, struct genl_info *info) goto out; } - ret = l2tp_nl_tunnel_send(msg, info->snd_pid, info->snd_seq, + ret = l2tp_nl_tunnel_send(msg, info->snd_portid, info->snd_seq, NLM_F_ACK, tunnel); if (ret < 0) goto err_out; - return genlmsg_unicast(net, msg, info->snd_pid); + return genlmsg_unicast(net, msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -384,7 +384,7 @@ static int l2tp_nl_cmd_tunnel_dump(struct sk_buff *skb, struct netlink_callback if (tunnel == NULL) goto out; - if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).pid, + if (l2tp_nl_tunnel_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, tunnel) <= 0) goto out; @@ -604,7 +604,7 @@ out: return ret; } -static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags, +static int l2tp_nl_session_send(struct sk_buff *skb, u32 portid, u32 seq, int flags, struct l2tp_session *session) { void *hdr; @@ -616,7 +616,7 @@ static int l2tp_nl_session_send(struct sk_buff *skb, u32 pid, u32 seq, int flags sk = tunnel->sock; - hdr = genlmsg_put(skb, pid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); + hdr = genlmsg_put(skb, portid, seq, &l2tp_nl_family, flags, L2TP_CMD_SESSION_GET); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -705,12 +705,12 @@ static int l2tp_nl_cmd_session_get(struct sk_buff *skb, struct genl_info *info) goto out; } - ret = l2tp_nl_session_send(msg, info->snd_pid, info->snd_seq, + ret = l2tp_nl_session_send(msg, info->snd_portid, info->snd_seq, 0, session); if (ret < 0) goto err_out; - return genlmsg_unicast(genl_info_net(info), msg, info->snd_pid); + return genlmsg_unicast(genl_info_net(info), msg, info->snd_portid); err_out: nlmsg_free(msg); @@ -742,7 +742,7 @@ static int l2tp_nl_cmd_session_dump(struct sk_buff *skb, struct netlink_callback continue; } - if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).pid, + if (l2tp_nl_session_send(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, session) <= 0) break; diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c index 9730882697aa..ad39ef406851 100644 --- a/net/netfilter/ipset/ip_set_core.c +++ b/net/netfilter/ipset/ip_set_core.c @@ -563,13 +563,13 @@ flag_exist(const struct nlmsghdr *nlh) } static struct nlmsghdr * -start_msg(struct sk_buff *skb, u32 pid, u32 seq, unsigned int flags, +start_msg(struct sk_buff *skb, u32 portid, u32 seq, unsigned int flags, enum ipset_cmd cmd) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - nlh = nlmsg_put(skb, pid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), + nlh = nlmsg_put(skb, portid, seq, cmd | (NFNL_SUBSYS_IPSET << 8), sizeof(*nfmsg), flags); if (nlh == NULL) return NULL; @@ -1045,7 +1045,7 @@ ip_set_dump_start(struct sk_buff *skb, struct netlink_callback *cb) ip_set_id_t index = IPSET_INVALID_ID, max; struct ip_set *set = NULL; struct nlmsghdr *nlh = NULL; - unsigned int flags = NETLINK_CB(cb->skb).pid ? NLM_F_MULTI : 0; + unsigned int flags = NETLINK_CB(cb->skb).portid ? NLM_F_MULTI : 0; u32 dump_type, dump_flags; int ret = 0; @@ -1093,7 +1093,7 @@ dump_last: pr_debug("reference set\n"); __ip_set_get(index); } - nlh = start_msg(skb, NETLINK_CB(cb->skb).pid, + nlh = start_msg(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, flags, IPSET_CMD_LIST); if (!nlh) { @@ -1226,7 +1226,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, skb2 = nlmsg_new(payload, GFP_KERNEL); if (skb2 == NULL) return -ENOMEM; - rep = __nlmsg_put(skb2, NETLINK_CB(skb).pid, + rep = __nlmsg_put(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = ret; @@ -1241,7 +1241,7 @@ call_ad(struct sock *ctnl, struct sk_buff *skb, struct ip_set *set, *errline = lineno; - netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); /* Signal netlink not to send its ACK/errmsg. */ return -EINTR; } @@ -1416,7 +1416,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_HEADER); if (!nlh2) goto nlmsg_failure; @@ -1428,7 +1428,7 @@ ip_set_header(struct sock *ctnl, struct sk_buff *skb, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; @@ -1476,7 +1476,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_TYPE); if (!nlh2) goto nlmsg_failure; @@ -1489,7 +1489,7 @@ ip_set_type(struct sock *ctnl, struct sk_buff *skb, nlmsg_end(skb2, nlh2); pr_debug("Send TYPE, nlmsg_len: %u\n", nlh2->nlmsg_len); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; @@ -1525,7 +1525,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - nlh2 = start_msg(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, 0, + nlh2 = start_msg(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, 0, IPSET_CMD_PROTOCOL); if (!nlh2) goto nlmsg_failure; @@ -1533,7 +1533,7 @@ ip_set_protocol(struct sock *ctnl, struct sk_buff *skb, goto nla_put_failure; nlmsg_end(skb2, nlh2); - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret < 0) return ret; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 767cc12da0fe..0f924bf19c2b 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -2939,7 +2939,7 @@ static int ip_vs_genl_dump_service(struct sk_buff *skb, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_SERVICE); if (!hdr) @@ -3128,7 +3128,7 @@ static int ip_vs_genl_dump_dest(struct sk_buff *skb, struct ip_vs_dest *dest, { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_DEST); if (!hdr) @@ -3257,7 +3257,7 @@ static int ip_vs_genl_dump_daemon(struct sk_buff *skb, __be32 state, struct netlink_callback *cb) { void *hdr; - hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &ip_vs_genl_family, NLM_F_MULTI, IPVS_CMD_NEW_DAEMON); if (!hdr) diff --git a/net/netfilter/nf_conntrack_ecache.c b/net/netfilter/nf_conntrack_ecache.c index e7be79e640de..de9781b6464f 100644 --- a/net/netfilter/nf_conntrack_ecache.c +++ b/net/netfilter/nf_conntrack_ecache.c @@ -61,7 +61,7 @@ void nf_ct_deliver_cached_events(struct nf_conn *ct) goto out_unlock; item.ct = ct; - item.pid = 0; + item.portid = 0; item.report = 0; ret = notify->fcn(events | missed, &item); diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c index a205bd6ce294..59770039b825 100644 --- a/net/netfilter/nf_conntrack_netlink.c +++ b/net/netfilter/nf_conntrack_netlink.c @@ -418,16 +418,16 @@ nla_put_failure: } static int -ctnetlink_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct nf_conn *ct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; struct nlattr *nest_parms; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_NEW); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -604,7 +604,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) goto errout; type |= NFNL_SUBSYS_CTNETLINK << 8; - nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -680,7 +680,7 @@ ctnetlink_conntrack_event(unsigned int events, struct nf_ct_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - err = nfnetlink_send(skb, net, item->pid, group, item->report, + err = nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); if (err == -ENOBUFS || err == -EAGAIN) return -ENOBUFS; @@ -757,7 +757,7 @@ restart: #endif rcu_read_lock(); res = - ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).pid, + ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), ct); @@ -961,7 +961,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, else { /* Flush the whole table */ nf_conntrack_flush_report(net, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); return 0; } @@ -985,7 +985,7 @@ ctnetlink_del_conntrack(struct sock *ctnl, struct sk_buff *skb, if (del_timer(&ct->timeout)) { if (nf_conntrack_event_report(IPCT_DESTROY, ct, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)) < 0) { nf_ct_delete_from_lists(ct); /* we failed to report the event, try later */ @@ -1069,14 +1069,14 @@ ctnetlink_get_conntrack(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).pid, nlh->nlmsg_seq, + err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), ct); rcu_read_unlock(); nf_ct_put(ct); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -1616,7 +1616,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | (1 << IPCT_MARK) | events, - ct, NETLINK_CB(skb).pid, + ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_put(ct); } @@ -1638,7 +1638,7 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, (1 << IPCT_PROTOINFO) | (1 << IPCT_NATSEQADJ) | (1 << IPCT_MARK), - ct, NETLINK_CB(skb).pid, + ct, NETLINK_CB(skb).portid, nlmsg_report(nlh)); } } @@ -1648,15 +1648,15 @@ ctnetlink_new_conntrack(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +ctnetlink_ct_stat_cpu_fill_info(struct sk_buff *skb, u32 portid, u32 seq, __u16 cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS_CPU); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1708,7 +1708,7 @@ ctnetlink_ct_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) st = per_cpu_ptr(net->ct.stat, cpu); if (ctnetlink_ct_stat_cpu_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; @@ -1734,16 +1734,16 @@ ctnetlink_stat_ct_cpu(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnetlink_stat_ct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, struct net *net) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; unsigned int nr_conntracks = atomic_read(&net->ct.count); event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_CT_GET_STATS); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -1776,14 +1776,14 @@ ctnetlink_stat_ct(struct sock *ctnl, struct sk_buff *skb, if (skb2 == NULL) return -ENOMEM; - err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).pid, + err = ctnetlink_stat_ct_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), sock_net(skb->sk)); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -2073,15 +2073,15 @@ nla_put_failure: } static int -ctnetlink_exp_fill_info(struct sk_buff *skb, u32 pid, u32 seq, +ctnetlink_exp_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int event, const struct nf_conntrack_expect *exp) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; event |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2132,7 +2132,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) goto errout; type |= NFNL_SUBSYS_CTNETLINK_EXP << 8; - nlh = nlmsg_put(skb, item->pid, 0, type, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, item->portid, 0, type, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2147,7 +2147,7 @@ ctnetlink_expect_event(unsigned int events, struct nf_exp_event *item) rcu_read_unlock(); nlmsg_end(skb, nlh); - nfnetlink_send(skb, net, item->pid, group, item->report, GFP_ATOMIC); + nfnetlink_send(skb, net, item->portid, group, item->report, GFP_ATOMIC); return 0; nla_put_failure: @@ -2190,7 +2190,7 @@ restart: cb->args[1] = 0; } if (ctnetlink_exp_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp) < 0) { @@ -2283,14 +2283,14 @@ ctnetlink_get_expect(struct sock *ctnl, struct sk_buff *skb, } rcu_read_lock(); - err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).pid, + err = ctnetlink_exp_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, IPCTNL_MSG_EXP_NEW, exp); rcu_read_unlock(); nf_ct_expect_put(exp); if (err <= 0) goto free; - err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, MSG_DONTWAIT); + err = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (err < 0) goto out; @@ -2344,7 +2344,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, /* after list removal, usage count == 1 */ spin_lock_bh(&nf_conntrack_lock); if (del_timer(&exp->timeout)) { - nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).pid, + nf_ct_unlink_expect_report(exp, NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2366,7 +2366,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, if (!strcmp(m_help->helper->name, name) && del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2382,7 +2382,7 @@ ctnetlink_del_expect(struct sock *ctnl, struct sk_buff *skb, hnode) { if (del_timer(&exp->timeout)) { nf_ct_unlink_expect_report(exp, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); nf_ct_expect_put(exp); } @@ -2447,7 +2447,7 @@ static int ctnetlink_create_expect(struct net *net, u16 zone, const struct nlattr * const cda[], u_int8_t u3, - u32 pid, int report) + u32 portid, int report) { struct nf_conntrack_tuple tuple, mask, master_tuple; struct nf_conntrack_tuple_hash *h = NULL; @@ -2560,7 +2560,7 @@ ctnetlink_create_expect(struct net *net, u16 zone, if (err < 0) goto err_out; } - err = nf_ct_expect_related_report(exp, pid, report); + err = nf_ct_expect_related_report(exp, portid, report); err_out: nf_ct_expect_put(exp); out: @@ -2603,7 +2603,7 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, if (nlh->nlmsg_flags & NLM_F_CREATE) { err = ctnetlink_create_expect(net, zone, cda, u3, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, nlmsg_report(nlh)); } return err; @@ -2618,15 +2618,15 @@ ctnetlink_new_expect(struct sock *ctnl, struct sk_buff *skb, } static int -ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 pid, u32 seq, int cpu, +ctnetlink_exp_stat_fill_info(struct sk_buff *skb, u32 portid, u32 seq, int cpu, const struct ip_conntrack_stat *st) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0, event; + unsigned int flags = portid ? NLM_F_MULTI : 0, event; event = (NFNL_SUBSYS_CTNETLINK << 8 | IPCTNL_MSG_EXP_GET_STATS_CPU); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -2665,7 +2665,7 @@ ctnetlink_exp_stat_cpu_dump(struct sk_buff *skb, struct netlink_callback *cb) continue; st = per_cpu_ptr(net->ct.stat, cpu); - if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnetlink_exp_stat_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cpu, st) < 0) break; diff --git a/net/netfilter/nfnetlink_acct.c b/net/netfilter/nfnetlink_acct.c index d7ec92879071..589d686f0b4c 100644 --- a/net/netfilter/nfnetlink_acct.c +++ b/net/netfilter/nfnetlink_acct.c @@ -91,16 +91,16 @@ nfnl_acct_new(struct sock *nfnl, struct sk_buff *skb, } static int -nfnl_acct_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +nfnl_acct_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_acct *acct) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; u64 pkts, bytes; event |= NFNL_SUBSYS_ACCT << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -150,7 +150,7 @@ nfnl_acct_dump(struct sk_buff *skb, struct netlink_callback *cb) if (last && cur != last) continue; - if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (nfnl_acct_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur) < 0) { @@ -195,7 +195,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).pid, + ret = nfnl_acct_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), NFNL_MSG_ACCT_NEW, cur); @@ -203,7 +203,7 @@ nfnl_acct_get(struct sock *nfnl, struct sk_buff *skb, kfree_skb(skb2); break; } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 32a1ba3f3e27..3678073360a3 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -395,16 +395,16 @@ nla_put_failure: } static int -nfnl_cthelper_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +nfnl_cthelper_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct nf_conntrack_helper *helper) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; int status; event |= NFNL_SUBSYS_CTHELPER << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -468,7 +468,7 @@ restart: cb->args[1] = 0; } if (nfnl_cthelper_fill_info(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur) < 0) { @@ -538,7 +538,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).pid, + ret = nfnl_cthelper_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), NFNL_MSG_CTHELPER_NEW, cur); @@ -547,7 +547,7 @@ nfnl_cthelper_get(struct sock *nfnl, struct sk_buff *skb, break; } - ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(nfnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_cttimeout.c b/net/netfilter/nfnetlink_cttimeout.c index cdecbc8fe965..8847b4d8be06 100644 --- a/net/netfilter/nfnetlink_cttimeout.c +++ b/net/netfilter/nfnetlink_cttimeout.c @@ -155,16 +155,16 @@ err_proto_put: } static int -ctnl_timeout_fill_info(struct sk_buff *skb, u32 pid, u32 seq, u32 type, +ctnl_timeout_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type, int event, struct ctnl_timeout *timeout) { struct nlmsghdr *nlh; struct nfgenmsg *nfmsg; - unsigned int flags = pid ? NLM_F_MULTI : 0; + unsigned int flags = portid ? NLM_F_MULTI : 0; struct nf_conntrack_l4proto *l4proto = timeout->l4proto; event |= NFNL_SUBSYS_CTNETLINK_TIMEOUT << 8; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*nfmsg), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags); if (nlh == NULL) goto nlmsg_failure; @@ -222,7 +222,7 @@ ctnl_timeout_dump(struct sk_buff *skb, struct netlink_callback *cb) if (last && cur != last) continue; - if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).pid, + if (ctnl_timeout_fill_info(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NFNL_MSG_TYPE(cb->nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur) < 0) { @@ -268,7 +268,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, break; } - ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).pid, + ret = ctnl_timeout_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq, NFNL_MSG_TYPE(nlh->nlmsg_type), IPCTNL_MSG_TIMEOUT_NEW, cur); @@ -276,7 +276,7 @@ cttimeout_get_timeout(struct sock *ctnl, struct sk_buff *skb, kfree_skb(skb2); break; } - ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).pid, + ret = netlink_unicast(ctnl, skb2, NETLINK_CB(skb).portid, MSG_DONTWAIT); if (ret > 0) ret = 0; diff --git a/net/netfilter/nfnetlink_log.c b/net/netfilter/nfnetlink_log.c index be194b144297..8cb67c4dbd62 100644 --- a/net/netfilter/nfnetlink_log.c +++ b/net/netfilter/nfnetlink_log.c @@ -56,7 +56,7 @@ struct nfulnl_instance { struct sk_buff *skb; /* pre-allocatd skb */ struct timer_list timer; struct user_namespace *peer_user_ns; /* User namespace of the peer process */ - int peer_pid; /* PID of the peer process */ + int peer_portid; /* PORTID of the peer process */ /* configurable parameters */ unsigned int flushtimeout; /* timeout until queue flush */ @@ -133,7 +133,7 @@ instance_put(struct nfulnl_instance *inst) static void nfulnl_timer(unsigned long data); static struct nfulnl_instance * -instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns) +instance_create(u_int16_t group_num, int portid, struct user_namespace *user_ns) { struct nfulnl_instance *inst; int err; @@ -164,7 +164,7 @@ instance_create(u_int16_t group_num, int pid, struct user_namespace *user_ns) setup_timer(&inst->timer, nfulnl_timer, (unsigned long)inst); inst->peer_user_ns = user_ns; - inst->peer_pid = pid; + inst->peer_portid = portid; inst->group_num = group_num; inst->qthreshold = NFULNL_QTHRESH_DEFAULT; @@ -336,7 +336,7 @@ __nfulnl_send(struct nfulnl_instance *inst) if (!nlh) goto out; } - status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_pid, + status = nfnetlink_unicast(inst->skb, &init_net, inst->peer_portid, MSG_DONTWAIT); inst->qlen = 0; @@ -703,7 +703,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; - /* destroy all instances for this pid */ + /* destroy all instances for this portid */ spin_lock_bh(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; @@ -712,7 +712,7 @@ nfulnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { if ((net_eq(n->net, &init_net)) && - (n->pid == inst->peer_pid)) + (n->portid == inst->peer_portid)) __instance_destroy(inst); } } @@ -774,7 +774,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_lookup_get(group_num); - if (inst && inst->peer_pid != NETLINK_CB(skb).pid) { + if (inst && inst->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto out_put; } @@ -788,7 +788,7 @@ nfulnl_recv_config(struct sock *ctnl, struct sk_buff *skb, } inst = instance_create(group_num, - NETLINK_CB(skb).pid, + NETLINK_CB(skb).portid, sk_user_ns(NETLINK_CB(skb).ssk)); if (IS_ERR(inst)) { ret = PTR_ERR(inst); @@ -947,7 +947,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%5d %6d %5d %1d %5d %6d %2d\n", inst->group_num, - inst->peer_pid, inst->qlen, + inst->peer_portid, inst->qlen, inst->copy_mode, inst->copy_range, inst->flushtimeout, atomic_read(&inst->use)); } diff --git a/net/netfilter/nfnetlink_queue_core.c b/net/netfilter/nfnetlink_queue_core.c index c0496a55ad0c..b8d9995b76a8 100644 --- a/net/netfilter/nfnetlink_queue_core.c +++ b/net/netfilter/nfnetlink_queue_core.c @@ -44,7 +44,7 @@ struct nfqnl_instance { struct hlist_node hlist; /* global list of queues */ struct rcu_head rcu; - int peer_pid; + int peer_portid; unsigned int queue_maxlen; unsigned int copy_range; unsigned int queue_dropped; @@ -92,7 +92,7 @@ instance_lookup(u_int16_t queue_num) } static struct nfqnl_instance * -instance_create(u_int16_t queue_num, int pid) +instance_create(u_int16_t queue_num, int portid) { struct nfqnl_instance *inst; unsigned int h; @@ -111,7 +111,7 @@ instance_create(u_int16_t queue_num, int pid) } inst->queue_num = queue_num; - inst->peer_pid = pid; + inst->peer_portid = portid; inst->queue_maxlen = NFQNL_QMAX_DEFAULT; inst->copy_range = 0xfffff; inst->copy_mode = NFQNL_COPY_NONE; @@ -440,7 +440,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) } spin_lock_bh(&queue->lock); - if (!queue->peer_pid) { + if (!queue->peer_portid) { err = -EINVAL; goto err_out_free_nskb; } @@ -459,7 +459,7 @@ nfqnl_enqueue_packet(struct nf_queue_entry *entry, unsigned int queuenum) *packet_id_ptr = htonl(entry->id); /* nfnetlink_unicast will either free the nskb or add it to a socket */ - err = nfnetlink_unicast(nskb, &init_net, queue->peer_pid, MSG_DONTWAIT); + err = nfnetlink_unicast(nskb, &init_net, queue->peer_portid, MSG_DONTWAIT); if (err < 0) { queue->queue_user_dropped++; goto err_out_unlock; @@ -616,7 +616,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, if (event == NETLINK_URELEASE && n->protocol == NETLINK_NETFILTER) { int i; - /* destroy all instances for this pid */ + /* destroy all instances for this portid */ spin_lock(&instances_lock); for (i = 0; i < INSTANCE_BUCKETS; i++) { struct hlist_node *tmp, *t2; @@ -625,7 +625,7 @@ nfqnl_rcv_nl_event(struct notifier_block *this, hlist_for_each_entry_safe(inst, tmp, t2, head, hlist) { if ((n->net == &init_net) && - (n->pid == inst->peer_pid)) + (n->portid == inst->peer_portid)) __instance_destroy(inst); } } @@ -650,7 +650,7 @@ static const struct nla_policy nfqa_verdict_batch_policy[NFQA_MAX+1] = { [NFQA_MARK] = { .type = NLA_U32 }, }; -static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) +static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlportid) { struct nfqnl_instance *queue; @@ -658,7 +658,7 @@ static struct nfqnl_instance *verdict_instance_lookup(u16 queue_num, int nlpid) if (!queue) return ERR_PTR(-ENODEV); - if (queue->peer_pid != nlpid) + if (queue->peer_portid != nlportid) return ERR_PTR(-EPERM); return queue; @@ -698,7 +698,7 @@ nfqnl_recv_verdict_batch(struct sock *ctnl, struct sk_buff *skb, LIST_HEAD(batch_list); u16 queue_num = ntohs(nfmsg->res_id); - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -749,7 +749,7 @@ nfqnl_recv_verdict(struct sock *ctnl, struct sk_buff *skb, queue = instance_lookup(queue_num); if (!queue) - queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).pid); + queue = verdict_instance_lookup(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) return PTR_ERR(queue); @@ -832,7 +832,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, rcu_read_lock(); queue = instance_lookup(queue_num); - if (queue && queue->peer_pid != NETLINK_CB(skb).pid) { + if (queue && queue->peer_portid != NETLINK_CB(skb).portid) { ret = -EPERM; goto err_out_unlock; } @@ -844,7 +844,7 @@ nfqnl_recv_config(struct sock *ctnl, struct sk_buff *skb, ret = -EBUSY; goto err_out_unlock; } - queue = instance_create(queue_num, NETLINK_CB(skb).pid); + queue = instance_create(queue_num, NETLINK_CB(skb).portid); if (IS_ERR(queue)) { ret = PTR_ERR(queue); goto err_out_unlock; @@ -1016,7 +1016,7 @@ static int seq_show(struct seq_file *s, void *v) return seq_printf(s, "%5d %6d %5d %1d %5d %5d %5d %8d %2d\n", inst->queue_num, - inst->peer_pid, inst->queue_total, + inst->peer_portid, inst->queue_total, inst->copy_mode, inst->copy_range, inst->queue_dropped, inst->queue_user_dropped, inst->id_sequence, 1); diff --git a/net/netlabel/netlabel_cipso_v4.c b/net/netlabel/netlabel_cipso_v4.c index 6bf878335d94..c15042f987bd 100644 --- a/net/netlabel/netlabel_cipso_v4.c +++ b/net/netlabel/netlabel_cipso_v4.c @@ -627,7 +627,7 @@ static int netlbl_cipsov4_listall_cb(struct cipso_v4_doi *doi_def, void *arg) struct netlbl_cipsov4_doiwalk_arg *cb_arg = arg; void *data; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_cipsov4_gnl_family, NLM_F_MULTI, NLBL_CIPSOV4_C_LISTALL); if (data == NULL) diff --git a/net/netlabel/netlabel_mgmt.c b/net/netlabel/netlabel_mgmt.c index 4809e2e48b02..c5384ffc6146 100644 --- a/net/netlabel/netlabel_mgmt.c +++ b/net/netlabel/netlabel_mgmt.c @@ -448,7 +448,7 @@ static int netlbl_mgmt_listall_cb(struct netlbl_dom_map *entry, void *arg) struct netlbl_domhsh_walk_arg *cb_arg = arg; void *data; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_LISTALL); if (data == NULL) @@ -613,7 +613,7 @@ static int netlbl_mgmt_protocols_cb(struct sk_buff *skb, int ret_val = -ENOMEM; void *data; - data = genlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + data = genlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &netlbl_mgmt_gnl_family, NLM_F_MULTI, NLBL_MGMT_C_PROTOCOLS); if (data == NULL) diff --git a/net/netlabel/netlabel_unlabeled.c b/net/netlabel/netlabel_unlabeled.c index e7ff694f1049..b7944413b404 100644 --- a/net/netlabel/netlabel_unlabeled.c +++ b/net/netlabel/netlabel_unlabeled.c @@ -1096,7 +1096,7 @@ static int netlbl_unlabel_staticlist_gen(u32 cmd, char *secctx; u32 secctx_len; - data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).pid, + data = genlmsg_put(cb_arg->skb, NETLINK_CB(cb_arg->nl_cb->skb).portid, cb_arg->seq, &netlbl_unlabel_gnl_family, NLM_F_MULTI, cmd); if (data == NULL) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 4d348e97e131..0f2e3ad69c47 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -67,8 +67,8 @@ struct netlink_sock { /* struct sock has to be the first member of netlink_sock */ struct sock sk; - u32 pid; - u32 dst_pid; + u32 portid; + u32 dst_portid; u32 dst_group; u32 flags; u32 subscriptions; @@ -104,7 +104,7 @@ static inline int netlink_is_kernel(struct sock *sk) return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET; } -struct nl_pid_hash { +struct nl_portid_hash { struct hlist_head *table; unsigned long rehash_time; @@ -118,7 +118,7 @@ struct nl_pid_hash { }; struct netlink_table { - struct nl_pid_hash hash; + struct nl_portid_hash hash; struct hlist_head mc_list; struct listeners __rcu *listeners; unsigned int flags; @@ -145,9 +145,9 @@ static inline u32 netlink_group_mask(u32 group) return group ? 1 << (group - 1) : 0; } -static inline struct hlist_head *nl_pid_hashfn(struct nl_pid_hash *hash, u32 pid) +static inline struct hlist_head *nl_portid_hashfn(struct nl_portid_hash *hash, u32 portid) { - return &hash->table[jhash_1word(pid, hash->rnd) & hash->mask]; + return &hash->table[jhash_1word(portid, hash->rnd) & hash->mask]; } static void netlink_destroy_callback(struct netlink_callback *cb) @@ -239,17 +239,17 @@ netlink_unlock_table(void) wake_up(&nl_table_wait); } -static struct sock *netlink_lookup(struct net *net, int protocol, u32 pid) +static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid) { - struct nl_pid_hash *hash = &nl_table[protocol].hash; + struct nl_portid_hash *hash = &nl_table[protocol].hash; struct hlist_head *head; struct sock *sk; struct hlist_node *node; read_lock(&nl_table_lock); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); sk_for_each(sk, node, head) { - if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->pid == pid)) { + if (net_eq(sock_net(sk), net) && (nlk_sk(sk)->portid == portid)) { sock_hold(sk); goto found; } @@ -260,7 +260,7 @@ found: return sk; } -static struct hlist_head *nl_pid_hash_zalloc(size_t size) +static struct hlist_head *nl_portid_hash_zalloc(size_t size) { if (size <= PAGE_SIZE) return kzalloc(size, GFP_ATOMIC); @@ -270,7 +270,7 @@ static struct hlist_head *nl_pid_hash_zalloc(size_t size) get_order(size)); } -static void nl_pid_hash_free(struct hlist_head *table, size_t size) +static void nl_portid_hash_free(struct hlist_head *table, size_t size) { if (size <= PAGE_SIZE) kfree(table); @@ -278,7 +278,7 @@ static void nl_pid_hash_free(struct hlist_head *table, size_t size) free_pages((unsigned long)table, get_order(size)); } -static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) +static int nl_portid_hash_rehash(struct nl_portid_hash *hash, int grow) { unsigned int omask, mask, shift; size_t osize, size; @@ -296,7 +296,7 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) size *= 2; } - table = nl_pid_hash_zalloc(size); + table = nl_portid_hash_zalloc(size); if (!table) return 0; @@ -311,23 +311,23 @@ static int nl_pid_hash_rehash(struct nl_pid_hash *hash, int grow) struct hlist_node *node, *tmp; sk_for_each_safe(sk, node, tmp, &otable[i]) - __sk_add_node(sk, nl_pid_hashfn(hash, nlk_sk(sk)->pid)); + __sk_add_node(sk, nl_portid_hashfn(hash, nlk_sk(sk)->portid)); } - nl_pid_hash_free(otable, osize); + nl_portid_hash_free(otable, osize); hash->rehash_time = jiffies + 10 * 60 * HZ; return 1; } -static inline int nl_pid_hash_dilute(struct nl_pid_hash *hash, int len) +static inline int nl_portid_hash_dilute(struct nl_portid_hash *hash, int len) { int avg = hash->entries >> hash->shift; - if (unlikely(avg > 1) && nl_pid_hash_rehash(hash, 1)) + if (unlikely(avg > 1) && nl_portid_hash_rehash(hash, 1)) return 1; if (unlikely(len > avg) && time_after(jiffies, hash->rehash_time)) { - nl_pid_hash_rehash(hash, 0); + nl_portid_hash_rehash(hash, 0); return 1; } @@ -356,9 +356,9 @@ netlink_update_listeners(struct sock *sk) * makes sure updates are visible before bind or setsockopt return. */ } -static int netlink_insert(struct sock *sk, struct net *net, u32 pid) +static int netlink_insert(struct sock *sk, struct net *net, u32 portid) { - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; int err = -EADDRINUSE; struct sock *osk; @@ -366,10 +366,10 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) int len; netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); len = 0; sk_for_each(osk, node, head) { - if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->pid == pid)) + if (net_eq(sock_net(osk), net) && (nlk_sk(osk)->portid == portid)) break; len++; } @@ -377,17 +377,17 @@ static int netlink_insert(struct sock *sk, struct net *net, u32 pid) goto err; err = -EBUSY; - if (nlk_sk(sk)->pid) + if (nlk_sk(sk)->portid) goto err; err = -ENOMEM; if (BITS_PER_LONG > 32 && unlikely(hash->entries >= UINT_MAX)) goto err; - if (len && nl_pid_hash_dilute(hash, len)) - head = nl_pid_hashfn(hash, pid); + if (len && nl_portid_hash_dilute(hash, len)) + head = nl_portid_hashfn(hash, portid); hash->entries++; - nlk_sk(sk)->pid = pid; + nlk_sk(sk)->portid = portid; sk_add_node(sk, head); err = 0; @@ -518,11 +518,11 @@ static int netlink_release(struct socket *sock) skb_queue_purge(&sk->sk_write_queue); - if (nlk->pid) { + if (nlk->portid) { struct netlink_notify n = { .net = sock_net(sk), .protocol = sk->sk_protocol, - .pid = nlk->pid, + .portid = nlk->portid, }; atomic_notifier_call_chain(&netlink_chain, NETLINK_URELEASE, &n); @@ -559,24 +559,24 @@ static int netlink_autobind(struct socket *sock) { struct sock *sk = sock->sk; struct net *net = sock_net(sk); - struct nl_pid_hash *hash = &nl_table[sk->sk_protocol].hash; + struct nl_portid_hash *hash = &nl_table[sk->sk_protocol].hash; struct hlist_head *head; struct sock *osk; struct hlist_node *node; - s32 pid = task_tgid_vnr(current); + s32 portid = task_tgid_vnr(current); int err; static s32 rover = -4097; retry: cond_resched(); netlink_table_grab(); - head = nl_pid_hashfn(hash, pid); + head = nl_portid_hashfn(hash, portid); sk_for_each(osk, node, head) { if (!net_eq(sock_net(osk), net)) continue; - if (nlk_sk(osk)->pid == pid) { - /* Bind collision, search negative pid values. */ - pid = rover--; + if (nlk_sk(osk)->portid == portid) { + /* Bind collision, search negative portid values. */ + portid = rover--; if (rover > -4097) rover = -4097; netlink_table_ungrab(); @@ -585,7 +585,7 @@ retry: } netlink_table_ungrab(); - err = netlink_insert(sk, net, pid); + err = netlink_insert(sk, net, portid); if (err == -EADDRINUSE) goto retry; @@ -668,8 +668,8 @@ static int netlink_bind(struct socket *sock, struct sockaddr *addr, return err; } - if (nlk->pid) { - if (nladdr->nl_pid != nlk->pid) + if (nlk->portid) { + if (nladdr->nl_pid != nlk->portid) return -EINVAL; } else { err = nladdr->nl_pid ? @@ -715,7 +715,7 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (addr->sa_family == AF_UNSPEC) { sk->sk_state = NETLINK_UNCONNECTED; - nlk->dst_pid = 0; + nlk->dst_portid = 0; nlk->dst_group = 0; return 0; } @@ -726,12 +726,12 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, if (nladdr->nl_groups && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) return -EPERM; - if (!nlk->pid) + if (!nlk->portid) err = netlink_autobind(sock); if (err == 0) { sk->sk_state = NETLINK_CONNECTED; - nlk->dst_pid = nladdr->nl_pid; + nlk->dst_portid = nladdr->nl_pid; nlk->dst_group = ffs(nladdr->nl_groups); } @@ -750,10 +750,10 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, *addr_len = sizeof(*nladdr); if (peer) { - nladdr->nl_pid = nlk->dst_pid; + nladdr->nl_pid = nlk->dst_portid; nladdr->nl_groups = netlink_group_mask(nlk->dst_group); } else { - nladdr->nl_pid = nlk->pid; + nladdr->nl_pid = nlk->portid; nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; } return 0; @@ -772,19 +772,19 @@ static void netlink_overrun(struct sock *sk) atomic_inc(&sk->sk_drops); } -static struct sock *netlink_getsockbypid(struct sock *ssk, u32 pid) +static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) { struct sock *sock; struct netlink_sock *nlk; - sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, pid); + sock = netlink_lookup(sock_net(ssk), ssk->sk_protocol, portid); if (!sock) return ERR_PTR(-ECONNREFUSED); /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); if (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_pid != nlk_sk(ssk)->pid) { + nlk->dst_portid != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -935,7 +935,7 @@ static int netlink_unicast_kernel(struct sock *sk, struct sk_buff *skb, } int netlink_unicast(struct sock *ssk, struct sk_buff *skb, - u32 pid, int nonblock) + u32 portid, int nonblock) { struct sock *sk; int err; @@ -945,7 +945,7 @@ int netlink_unicast(struct sock *ssk, struct sk_buff *skb, timeo = sock_sndtimeo(ssk, nonblock); retry: - sk = netlink_getsockbypid(ssk, pid); + sk = netlink_getsockbyportid(ssk, portid); if (IS_ERR(sk)) { kfree_skb(skb); return PTR_ERR(sk); @@ -1005,7 +1005,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb) struct netlink_broadcast_data { struct sock *exclude_sk; struct net *net; - u32 pid; + u32 portid; u32 group; int failure; int delivery_failure; @@ -1026,7 +1026,7 @@ static int do_one_broadcast(struct sock *sk, if (p->exclude_sk == sk) goto out; - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1078,7 +1078,7 @@ out: return 0; } -int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, +int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation, int (*filter)(struct sock *dsk, struct sk_buff *skb, void *data), void *filter_data) @@ -1092,7 +1092,7 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, info.exclude_sk = ssk; info.net = net; - info.pid = pid; + info.portid = portid; info.group = group; info.failure = 0; info.delivery_failure = 0; @@ -1130,17 +1130,17 @@ int netlink_broadcast_filtered(struct sock *ssk, struct sk_buff *skb, u32 pid, } EXPORT_SYMBOL(netlink_broadcast_filtered); -int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 pid, +int netlink_broadcast(struct sock *ssk, struct sk_buff *skb, u32 portid, u32 group, gfp_t allocation) { - return netlink_broadcast_filtered(ssk, skb, pid, group, allocation, + return netlink_broadcast_filtered(ssk, skb, portid, group, allocation, NULL, NULL); } EXPORT_SYMBOL(netlink_broadcast); struct netlink_set_err_data { struct sock *exclude_sk; - u32 pid; + u32 portid; u32 group; int code; }; @@ -1156,7 +1156,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p) if (!net_eq(sock_net(sk), sock_net(p->exclude_sk))) goto out; - if (nlk->pid == p->pid || p->group - 1 >= nlk->ngroups || + if (nlk->portid == p->portid || p->group - 1 >= nlk->ngroups || !test_bit(p->group - 1, nlk->groups)) goto out; @@ -1174,14 +1174,14 @@ out: /** * netlink_set_err - report error to broadcast listeners * @ssk: the kernel netlink socket, as returned by netlink_kernel_create() - * @pid: the PID of a process that we want to skip (if any) + * @portid: the PORTID of a process that we want to skip (if any) * @groups: the broadcast group that will notice the error * @code: error code, must be negative (as usual in kernelspace) * * This function returns the number of broadcast listeners that have set the * NETLINK_RECV_NO_ENOBUFS socket option. */ -int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) +int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code) { struct netlink_set_err_data info; struct hlist_node *node; @@ -1189,7 +1189,7 @@ int netlink_set_err(struct sock *ssk, u32 pid, u32 group, int code) int ret = 0; info.exclude_sk = ssk; - info.pid = pid; + info.portid = portid; info.group = group; /* sk->sk_err wants a positive error value */ info.code = -code; @@ -1354,7 +1354,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, struct sock *sk = sock->sk; struct netlink_sock *nlk = nlk_sk(sk); struct sockaddr_nl *addr = msg->msg_name; - u32 dst_pid; + u32 dst_portid; u32 dst_group; struct sk_buff *skb; int err; @@ -1374,18 +1374,18 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, err = -EINVAL; if (addr->nl_family != AF_NETLINK) goto out; - dst_pid = addr->nl_pid; + dst_portid = addr->nl_pid; dst_group = ffs(addr->nl_groups); err = -EPERM; - if ((dst_group || dst_pid) && + if ((dst_group || dst_portid) && !netlink_capable(sock, NL_CFG_F_NONROOT_SEND)) goto out; } else { - dst_pid = nlk->dst_pid; + dst_portid = nlk->dst_portid; dst_group = nlk->dst_group; } - if (!nlk->pid) { + if (!nlk->portid) { err = netlink_autobind(sock); if (err) goto out; @@ -1399,7 +1399,7 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (skb == NULL) goto out; - NETLINK_CB(skb).pid = nlk->pid; + NETLINK_CB(skb).portid = nlk->portid; NETLINK_CB(skb).dst_group = dst_group; NETLINK_CB(skb).creds = siocb->scm->creds; @@ -1417,9 +1417,9 @@ static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock, if (dst_group) { atomic_inc(&skb->users); - netlink_broadcast(sk, skb, dst_pid, dst_group, GFP_KERNEL); + netlink_broadcast(sk, skb, dst_portid, dst_group, GFP_KERNEL); } - err = netlink_unicast(sk, skb, dst_pid, msg->msg_flags&MSG_DONTWAIT); + err = netlink_unicast(sk, skb, dst_portid, msg->msg_flags&MSG_DONTWAIT); out: scm_destroy(siocb->scm); @@ -1482,7 +1482,7 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock, struct sockaddr_nl *addr = (struct sockaddr_nl *)msg->msg_name; addr->nl_family = AF_NETLINK; addr->nl_pad = 0; - addr->nl_pid = NETLINK_CB(skb).pid; + addr->nl_pid = NETLINK_CB(skb).portid; addr->nl_groups = netlink_group_mask(NETLINK_CB(skb).dst_group); msg->msg_namelen = sizeof(*addr); } @@ -1683,7 +1683,7 @@ void netlink_clear_multicast_users(struct sock *ksk, unsigned int group) } struct nlmsghdr * -__nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) +__nlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, int type, int len, int flags) { struct nlmsghdr *nlh; int size = NLMSG_LENGTH(len); @@ -1692,7 +1692,7 @@ __nlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, int type, int len, int flags) nlh->nlmsg_type = type; nlh->nlmsg_len = size; nlh->nlmsg_flags = flags; - nlh->nlmsg_pid = pid; + nlh->nlmsg_pid = portid; nlh->nlmsg_seq = seq; if (!__builtin_constant_p(size) || NLMSG_ALIGN(size) - size != 0) memset(NLMSG_DATA(nlh) + len, 0, NLMSG_ALIGN(size) - size); @@ -1788,7 +1788,7 @@ int netlink_dump_start(struct sock *ssk, struct sk_buff *skb, atomic_inc(&skb->users); cb->skb = skb; - sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).pid); + sk = netlink_lookup(sock_net(ssk), ssk->sk_protocol, NETLINK_CB(skb).portid); if (sk == NULL) { netlink_destroy_callback(cb); return -ECONNREFUSED; @@ -1836,7 +1836,7 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) sk = netlink_lookup(sock_net(in_skb->sk), in_skb->sk->sk_protocol, - NETLINK_CB(in_skb).pid); + NETLINK_CB(in_skb).portid); if (sk) { sk->sk_err = ENOBUFS; sk->sk_error_report(sk); @@ -1845,12 +1845,12 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err) return; } - rep = __nlmsg_put(skb, NETLINK_CB(in_skb).pid, nlh->nlmsg_seq, + rep = __nlmsg_put(skb, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, NLMSG_ERROR, payload, 0); errmsg = nlmsg_data(rep); errmsg->error = err; memcpy(&errmsg->msg, nlh, err ? nlh->nlmsg_len : sizeof(*nlh)); - netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).pid, MSG_DONTWAIT); + netlink_unicast(in_skb->sk, skb, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); } EXPORT_SYMBOL(netlink_ack); @@ -1900,33 +1900,33 @@ EXPORT_SYMBOL(netlink_rcv_skb); * nlmsg_notify - send a notification netlink message * @sk: netlink socket to use * @skb: notification message - * @pid: destination netlink pid for reports or 0 + * @portid: destination netlink portid for reports or 0 * @group: destination multicast group or 0 * @report: 1 to report back, 0 to disable * @flags: allocation flags */ -int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 pid, +int nlmsg_notify(struct sock *sk, struct sk_buff *skb, u32 portid, unsigned int group, int report, gfp_t flags) { int err = 0; if (group) { - int exclude_pid = 0; + int exclude_portid = 0; if (report) { atomic_inc(&skb->users); - exclude_pid = pid; + exclude_portid = portid; } /* errors reported via destination sk->sk_err, but propagate * delivery errors if NETLINK_BROADCAST_ERROR flag is set */ - err = nlmsg_multicast(sk, skb, exclude_pid, group, flags); + err = nlmsg_multicast(sk, skb, exclude_portid, group, flags); } if (report) { int err2; - err2 = nlmsg_unicast(sk, skb, pid); + err2 = nlmsg_unicast(sk, skb, portid); if (!err || err == -ESRCH) err = err2; } @@ -1951,7 +1951,7 @@ static struct sock *netlink_seq_socket_idx(struct seq_file *seq, loff_t pos) loff_t off = 0; for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; for (j = 0; j <= hash->mask; j++) { sk_for_each(s, node, &hash->table[j]) { @@ -1999,7 +1999,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos) j = iter->hash_idx + 1; do { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; for (; j <= hash->mask; j++) { s = sk_head(&hash->table[j]); @@ -2038,7 +2038,7 @@ static int netlink_seq_show(struct seq_file *seq, void *v) seq_printf(seq, "%pK %-3d %-6d %08x %-8d %-8d %pK %-8d %-8d %-8lu\n", s, s->sk_protocol, - nlk->pid, + nlk->portid, nlk->groups ? (u32)nlk->groups[0] : 0, sk_rmem_alloc_get(s), sk_wmem_alloc_get(s), @@ -2183,12 +2183,12 @@ static int __init netlink_proto_init(void) order = get_bitmask_order(min(limit, (unsigned long)UINT_MAX)) - 1; for (i = 0; i < MAX_LINKS; i++) { - struct nl_pid_hash *hash = &nl_table[i].hash; + struct nl_portid_hash *hash = &nl_table[i].hash; - hash->table = nl_pid_hash_zalloc(1 * sizeof(*hash->table)); + hash->table = nl_portid_hash_zalloc(1 * sizeof(*hash->table)); if (!hash->table) { while (i-- > 0) - nl_pid_hash_free(nl_table[i].hash.table, + nl_portid_hash_free(nl_table[i].hash.table, 1 * sizeof(*hash->table)); kfree(nl_table); goto panic; diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c index 19288b7d6135..f2aabb6f4105 100644 --- a/net/netlink/genetlink.c +++ b/net/netlink/genetlink.c @@ -501,7 +501,7 @@ EXPORT_SYMBOL(genl_unregister_family); /** * genlmsg_put - Add generic netlink header to netlink message * @skb: socket buffer holding the message - * @pid: netlink pid the message is addressed to + * @portid: netlink portid the message is addressed to * @seq: sequence number (usually the one of the sender) * @family: generic netlink family * @flags: netlink message flags @@ -509,13 +509,13 @@ EXPORT_SYMBOL(genl_unregister_family); * * Returns pointer to user specific header */ -void *genlmsg_put(struct sk_buff *skb, u32 pid, u32 seq, +void *genlmsg_put(struct sk_buff *skb, u32 portid, u32 seq, struct genl_family *family, int flags, u8 cmd) { struct nlmsghdr *nlh; struct genlmsghdr *hdr; - nlh = nlmsg_put(skb, pid, seq, family->id, GENL_HDRLEN + + nlh = nlmsg_put(skb, portid, seq, family->id, GENL_HDRLEN + family->hdrsize, flags); if (nlh == NULL) return NULL; @@ -585,7 +585,7 @@ static int genl_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh) } info.snd_seq = nlh->nlmsg_seq; - info.snd_pid = NETLINK_CB(skb).pid; + info.snd_portid = NETLINK_CB(skb).portid; info.nlhdr = nlh; info.genlhdr = nlmsg_data(nlh); info.userhdr = nlmsg_data(nlh) + GENL_HDRLEN; @@ -626,12 +626,12 @@ static struct genl_family genl_ctrl = { .netnsok = true, }; -static int ctrl_fill_info(struct genl_family *family, u32 pid, u32 seq, +static int ctrl_fill_info(struct genl_family *family, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { void *hdr; - hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -701,7 +701,7 @@ nla_put_failure: return -EMSGSIZE; } -static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, +static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 portid, u32 seq, u32 flags, struct sk_buff *skb, u8 cmd) { @@ -709,7 +709,7 @@ static int ctrl_fill_mcgrp_info(struct genl_multicast_group *grp, u32 pid, struct nlattr *nla_grps; struct nlattr *nest; - hdr = genlmsg_put(skb, pid, seq, &genl_ctrl, flags, cmd); + hdr = genlmsg_put(skb, portid, seq, &genl_ctrl, flags, cmd); if (hdr == NULL) return -1; @@ -756,7 +756,7 @@ static int ctrl_dumpfamily(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++n < fams_to_skip) continue; - if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).pid, + if (ctrl_fill_info(rt, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, skb, CTRL_CMD_NEWFAMILY) < 0) goto errout; @@ -773,7 +773,7 @@ errout: } static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, - u32 pid, int seq, u8 cmd) + u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -782,7 +782,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_info(family, pid, seq, 0, skb, cmd); + err = ctrl_fill_info(family, portid, seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -792,7 +792,7 @@ static struct sk_buff *ctrl_build_family_msg(struct genl_family *family, } static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, - u32 pid, int seq, u8 cmd) + u32 portid, int seq, u8 cmd) { struct sk_buff *skb; int err; @@ -801,7 +801,7 @@ static struct sk_buff *ctrl_build_mcgrp_msg(struct genl_multicast_group *grp, if (skb == NULL) return ERR_PTR(-ENOBUFS); - err = ctrl_fill_mcgrp_info(grp, pid, seq, 0, skb, cmd); + err = ctrl_fill_mcgrp_info(grp, portid, seq, 0, skb, cmd); if (err < 0) { nlmsg_free(skb); return ERR_PTR(err); @@ -853,7 +853,7 @@ static int ctrl_getfamily(struct sk_buff *skb, struct genl_info *info) return -ENOENT; } - msg = ctrl_build_family_msg(res, info->snd_pid, info->snd_seq, + msg = ctrl_build_family_msg(res, info->snd_portid, info->snd_seq, CTRL_CMD_NEWFAMILY); if (IS_ERR(msg)) return PTR_ERR(msg); @@ -971,7 +971,7 @@ problem: subsys_initcall(genl_init); -static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, +static int genlmsg_mcast(struct sk_buff *skb, u32 portid, unsigned long group, gfp_t flags) { struct sk_buff *tmp; @@ -986,7 +986,7 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, goto error; } err = nlmsg_multicast(prev->genl_sock, tmp, - pid, group, flags); + portid, group, flags); if (err) goto error; } @@ -994,20 +994,20 @@ static int genlmsg_mcast(struct sk_buff *skb, u32 pid, unsigned long group, prev = net; } - return nlmsg_multicast(prev->genl_sock, skb, pid, group, flags); + return nlmsg_multicast(prev->genl_sock, skb, portid, group, flags); error: kfree_skb(skb); return err; } -int genlmsg_multicast_allns(struct sk_buff *skb, u32 pid, unsigned int group, +int genlmsg_multicast_allns(struct sk_buff *skb, u32 portid, unsigned int group, gfp_t flags) { - return genlmsg_mcast(skb, pid, group, flags); + return genlmsg_mcast(skb, portid, group, flags); } EXPORT_SYMBOL(genlmsg_multicast_allns); -void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, +void genl_notify(struct sk_buff *skb, struct net *net, u32 portid, u32 group, struct nlmsghdr *nlh, gfp_t flags) { struct sock *sk = net->genl_sock; @@ -1016,6 +1016,6 @@ void genl_notify(struct sk_buff *skb, struct net *net, u32 pid, u32 group, if (nlh) report = nlmsg_report(nlh); - nlmsg_notify(sk, skb, pid, group, report, flags); + nlmsg_notify(sk, skb, portid, group, report, flags); } EXPORT_SYMBOL(genl_notify); diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index 4c51714ee741..4bbb70e32d1e 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -58,7 +58,7 @@ static int nfc_genl_send_target(struct sk_buff *msg, struct nfc_target *target, { void *hdr; - hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + hdr = genlmsg_put(msg, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, &nfc_genl_family, flags, NFC_CMD_GET_TARGET); if (!hdr) return -EMSGSIZE; @@ -165,7 +165,7 @@ int nfc_genl_targets_found(struct nfc_dev *dev) struct sk_buff *msg; void *hdr; - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_ATOMIC); if (!msg) @@ -347,13 +347,13 @@ free_msg: } static int nfc_genl_send_device(struct sk_buff *msg, struct nfc_dev *dev, - u32 pid, u32 seq, + u32 portid, u32 seq, struct netlink_callback *cb, int flags) { void *hdr; - hdr = genlmsg_put(msg, pid, seq, &nfc_genl_family, flags, + hdr = genlmsg_put(msg, portid, seq, &nfc_genl_family, flags, NFC_CMD_GET_DEVICE); if (!hdr) return -EMSGSIZE; @@ -401,7 +401,7 @@ static int nfc_genl_dump_devices(struct sk_buff *skb, while (dev) { int rc; - rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).pid, + rc = nfc_genl_send_device(skb, dev, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb, NLM_F_MULTI); if (rc < 0) break; @@ -520,7 +520,7 @@ static int nfc_genl_get_device(struct sk_buff *skb, struct genl_info *info) goto out_putdev; } - rc = nfc_genl_send_device(msg, dev, info->snd_pid, info->snd_seq, + rc = nfc_genl_send_device(msg, dev, info->snd_portid, info->snd_seq, NULL, 0); if (rc < 0) goto out_free; @@ -611,7 +611,7 @@ static int nfc_genl_start_poll(struct sk_buff *skb, struct genl_info *info) rc = nfc_start_poll(dev, im_protocols, tm_protocols); if (!rc) - dev->genl_data.poll_req_pid = info->snd_pid; + dev->genl_data.poll_req_portid = info->snd_portid; mutex_unlock(&dev->genl_data.genl_data_mutex); @@ -645,13 +645,13 @@ static int nfc_genl_stop_poll(struct sk_buff *skb, struct genl_info *info) mutex_lock(&dev->genl_data.genl_data_mutex); - if (dev->genl_data.poll_req_pid != info->snd_pid) { + if (dev->genl_data.poll_req_portid != info->snd_portid) { rc = -EBUSY; goto out; } rc = nfc_stop_poll(dev); - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; out: mutex_unlock(&dev->genl_data.genl_data_mutex); @@ -771,15 +771,15 @@ static int nfc_genl_rcv_nl_event(struct notifier_block *this, if (event != NETLINK_URELEASE || n->protocol != NETLINK_GENERIC) goto out; - pr_debug("NETLINK_URELEASE event from id %d\n", n->pid); + pr_debug("NETLINK_URELEASE event from id %d\n", n->portid); nfc_device_iter_init(&iter); dev = nfc_device_iter_next(&iter); while (dev) { - if (dev->genl_data.poll_req_pid == n->pid) { + if (dev->genl_data.poll_req_portid == n->portid) { nfc_stop_poll(dev); - dev->genl_data.poll_req_pid = 0; + dev->genl_data.poll_req_portid = 0; } dev = nfc_device_iter_next(&iter); } @@ -792,7 +792,7 @@ out: void nfc_genl_data_init(struct nfc_genl_data *genl_data) { - genl_data->poll_req_pid = 0; + genl_data->poll_req_portid = 0; mutex_init(&genl_data->genl_data_mutex); } diff --git a/net/openvswitch/actions.c b/net/openvswitch/actions.c index 0da687769f56..c7425f32e7db 100644 --- a/net/openvswitch/actions.c +++ b/net/openvswitch/actions.c @@ -286,7 +286,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, upcall.cmd = OVS_PACKET_CMD_ACTION; upcall.key = &OVS_CB(skb)->flow->key; upcall.userdata = NULL; - upcall.pid = 0; + upcall.portid = 0; for (a = nla_data(attr), rem = nla_len(attr); rem > 0; a = nla_next(a, &rem)) { @@ -296,7 +296,7 @@ static int output_userspace(struct datapath *dp, struct sk_buff *skb, break; case OVS_USERSPACE_ATTR_PID: - upcall.pid = nla_get_u32(a); + upcall.portid = nla_get_u32(a); break; } } diff --git a/net/openvswitch/datapath.c b/net/openvswitch/datapath.c index 105a0b5adc51..56327e877ed9 100644 --- a/net/openvswitch/datapath.c +++ b/net/openvswitch/datapath.c @@ -225,7 +225,7 @@ void ovs_dp_process_received_packet(struct vport *p, struct sk_buff *skb) upcall.cmd = OVS_PACKET_CMD_MISS; upcall.key = &key; upcall.userdata = NULL; - upcall.pid = p->upcall_pid; + upcall.portid = p->upcall_portid; ovs_dp_upcall(dp, skb, &upcall); consume_skb(skb); stats_counter = &stats->n_missed; @@ -261,7 +261,7 @@ int ovs_dp_upcall(struct datapath *dp, struct sk_buff *skb, int dp_ifindex; int err; - if (upcall_info->pid == 0) { + if (upcall_info->portid == 0) { err = -ENOTCONN; goto err; } @@ -395,7 +395,7 @@ static int queue_userspace_packet(struct net *net, int dp_ifindex, skb_copy_and_csum_dev(skb, nla_data(nla)); - err = genlmsg_unicast(net, user_skb, upcall_info->pid); + err = genlmsg_unicast(net, user_skb, upcall_info->portid); out: kfree_skb(nskb); @@ -780,7 +780,7 @@ static struct genl_multicast_group ovs_dp_flow_multicast_group = { /* Called with genl_lock. */ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, - struct sk_buff *skb, u32 pid, + struct sk_buff *skb, u32 portid, u32 seq, u32 flags, u8 cmd) { const int skb_orig_len = skb->len; @@ -795,7 +795,7 @@ static int ovs_flow_cmd_fill_info(struct sw_flow *flow, struct datapath *dp, sf_acts = rcu_dereference_protected(flow->sf_acts, lockdep_genl_is_held()); - ovs_header = genlmsg_put(skb, pid, seq, &dp_flow_genl_family, flags, cmd); + ovs_header = genlmsg_put(skb, portid, seq, &dp_flow_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; @@ -879,7 +879,7 @@ static struct sk_buff *ovs_flow_cmd_alloc_info(struct sw_flow *flow) static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, struct datapath *dp, - u32 pid, u32 seq, u8 cmd) + u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; int retval; @@ -888,7 +888,7 @@ static struct sk_buff *ovs_flow_cmd_build_info(struct sw_flow *flow, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_flow_cmd_fill_info(flow, dp, skb, pid, seq, 0, cmd); + retval = ovs_flow_cmd_fill_info(flow, dp, skb, portid, seq, 0, cmd); BUG_ON(retval < 0); return skb; } @@ -970,7 +970,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) flow->hash = ovs_flow_hash(&key, key_len); ovs_flow_tbl_insert(table, flow); - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); } else { @@ -1008,7 +1008,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) ovs_flow_deferred_free_acts(old_acts); } - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); /* Clear stats. */ @@ -1020,7 +1020,7 @@ static int ovs_flow_cmd_new_or_set(struct sk_buff *skb, struct genl_info *info) } if (!IS_ERR(reply)) - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); else @@ -1061,7 +1061,7 @@ static int ovs_flow_cmd_get(struct sk_buff *skb, struct genl_info *info) if (!flow) return -ENOENT; - reply = ovs_flow_cmd_build_info(flow, dp, info->snd_pid, + reply = ovs_flow_cmd_build_info(flow, dp, info->snd_portid, info->snd_seq, OVS_FLOW_CMD_NEW); if (IS_ERR(reply)) return PTR_ERR(reply); @@ -1103,13 +1103,13 @@ static int ovs_flow_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_flow_tbl_remove(table, flow); - err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_pid, + err = ovs_flow_cmd_fill_info(flow, dp, reply, info->snd_portid, info->snd_seq, 0, OVS_FLOW_CMD_DEL); BUG_ON(err < 0); ovs_flow_deferred_free(flow); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_flow_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; } @@ -1137,7 +1137,7 @@ static int ovs_flow_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) break; if (ovs_flow_cmd_fill_info(flow, dp, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_FLOW_CMD_NEW) < 0) break; @@ -1191,13 +1191,13 @@ static struct genl_multicast_group ovs_dp_datapath_multicast_group = { }; static int ovs_dp_cmd_fill_info(struct datapath *dp, struct sk_buff *skb, - u32 pid, u32 seq, u32 flags, u8 cmd) + u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_dp_stats dp_stats; int err; - ovs_header = genlmsg_put(skb, pid, seq, &dp_datapath_genl_family, + ovs_header = genlmsg_put(skb, portid, seq, &dp_datapath_genl_family, flags, cmd); if (!ovs_header) goto error; @@ -1222,7 +1222,7 @@ error: return -EMSGSIZE; } -static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, +static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; @@ -1232,7 +1232,7 @@ static struct sk_buff *ovs_dp_cmd_build_info(struct datapath *dp, u32 pid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_dp_cmd_fill_info(dp, skb, pid, seq, 0, cmd); + retval = ovs_dp_cmd_fill_info(dp, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1311,7 +1311,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = NULL; parms.dp = dp; parms.port_no = OVSP_LOCAL; - parms.upcall_pid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); + parms.upcall_portid = nla_get_u32(a[OVS_DP_ATTR_UPCALL_PID]); vport = new_vport(&parms); if (IS_ERR(vport)) { @@ -1322,7 +1322,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) goto err_destroy_ports_array; } - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1332,7 +1332,7 @@ static int ovs_dp_cmd_new(struct sk_buff *skb, struct genl_info *info) list_add_tail(&dp->list_node, &ovs_net->dps); rtnl_unlock(); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); return 0; @@ -1394,7 +1394,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return err; - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1402,7 +1402,7 @@ static int ovs_dp_cmd_del(struct sk_buff *skb, struct genl_info *info) __dp_destroy(dp); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); @@ -1419,7 +1419,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return PTR_ERR(dp); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); @@ -1428,7 +1428,7 @@ static int ovs_dp_cmd_set(struct sk_buff *skb, struct genl_info *info) return 0; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_datapath_multicast_group.id, info->nlhdr, GFP_KERNEL); @@ -1444,7 +1444,7 @@ static int ovs_dp_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(dp)) return PTR_ERR(dp); - reply = ovs_dp_cmd_build_info(dp, info->snd_pid, + reply = ovs_dp_cmd_build_info(dp, info->snd_portid, info->snd_seq, OVS_DP_CMD_NEW); if (IS_ERR(reply)) return PTR_ERR(reply); @@ -1461,7 +1461,7 @@ static int ovs_dp_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) list_for_each_entry(dp, &ovs_net->dps, list_node) { if (i >= skip && - ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).pid, + ovs_dp_cmd_fill_info(dp, skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_DP_CMD_NEW) < 0) break; @@ -1521,13 +1521,13 @@ struct genl_multicast_group ovs_dp_vport_multicast_group = { /* Called with RTNL lock or RCU read lock. */ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, - u32 pid, u32 seq, u32 flags, u8 cmd) + u32 portid, u32 seq, u32 flags, u8 cmd) { struct ovs_header *ovs_header; struct ovs_vport_stats vport_stats; int err; - ovs_header = genlmsg_put(skb, pid, seq, &dp_vport_genl_family, + ovs_header = genlmsg_put(skb, portid, seq, &dp_vport_genl_family, flags, cmd); if (!ovs_header) return -EMSGSIZE; @@ -1537,7 +1537,7 @@ static int ovs_vport_cmd_fill_info(struct vport *vport, struct sk_buff *skb, if (nla_put_u32(skb, OVS_VPORT_ATTR_PORT_NO, vport->port_no) || nla_put_u32(skb, OVS_VPORT_ATTR_TYPE, vport->ops->type) || nla_put_string(skb, OVS_VPORT_ATTR_NAME, vport->ops->get_name(vport)) || - nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_pid)) + nla_put_u32(skb, OVS_VPORT_ATTR_UPCALL_PID, vport->upcall_portid)) goto nla_put_failure; ovs_vport_get_stats(vport, &vport_stats); @@ -1559,7 +1559,7 @@ error: } /* Called with RTNL lock or RCU read lock. */ -struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, +struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 portid, u32 seq, u8 cmd) { struct sk_buff *skb; @@ -1569,7 +1569,7 @@ struct sk_buff *ovs_vport_cmd_build_info(struct vport *vport, u32 pid, if (!skb) return ERR_PTR(-ENOMEM); - retval = ovs_vport_cmd_fill_info(vport, skb, pid, seq, 0, cmd); + retval = ovs_vport_cmd_fill_info(vport, skb, portid, seq, 0, cmd); if (retval < 0) { kfree_skb(skb); return ERR_PTR(retval); @@ -1661,21 +1661,21 @@ static int ovs_vport_cmd_new(struct sk_buff *skb, struct genl_info *info) parms.options = a[OVS_VPORT_ATTR_OPTIONS]; parms.dp = dp; parms.port_no = port_no; - parms.upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + parms.upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); vport = new_vport(&parms); err = PTR_ERR(vport); if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { err = PTR_ERR(reply); ovs_dp_detach_port(vport); goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1707,9 +1707,9 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) if (err) goto exit_unlock; if (a[OVS_VPORT_ATTR_UPCALL_PID]) - vport->upcall_pid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); + vport->upcall_portid = nla_get_u32(a[OVS_VPORT_ATTR_UPCALL_PID]); - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); if (IS_ERR(reply)) { netlink_set_err(sock_net(skb->sk)->genl_sock, 0, @@ -1717,7 +1717,7 @@ static int ovs_vport_cmd_set(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1743,7 +1743,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) goto exit_unlock; } - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_DEL); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1751,7 +1751,7 @@ static int ovs_vport_cmd_del(struct sk_buff *skb, struct genl_info *info) ovs_dp_detach_port(vport); - genl_notify(reply, genl_info_net(info), info->snd_pid, + genl_notify(reply, genl_info_net(info), info->snd_portid, ovs_dp_vport_multicast_group.id, info->nlhdr, GFP_KERNEL); exit_unlock: @@ -1773,7 +1773,7 @@ static int ovs_vport_cmd_get(struct sk_buff *skb, struct genl_info *info) if (IS_ERR(vport)) goto exit_unlock; - reply = ovs_vport_cmd_build_info(vport, info->snd_pid, info->snd_seq, + reply = ovs_vport_cmd_build_info(vport, info->snd_portid, info->snd_seq, OVS_VPORT_CMD_NEW); err = PTR_ERR(reply); if (IS_ERR(reply)) @@ -1808,7 +1808,7 @@ static int ovs_vport_cmd_dump(struct sk_buff *skb, struct netlink_callback *cb) hlist_for_each_entry_rcu(vport, n, &dp->ports[i], dp_hash_node) { if (j >= skip && ovs_vport_cmd_fill_info(vport, skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, OVS_VPORT_CMD_NEW) < 0) diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h index 129ec5480758..031dfbf37c93 100644 --- a/net/openvswitch/datapath.h +++ b/net/openvswitch/datapath.h @@ -129,7 +129,7 @@ struct dp_upcall_info { u8 cmd; const struct sw_flow_key *key; const struct nlattr *userdata; - u32 pid; + u32 portid; }; static inline struct net *ovs_dp_get_net(struct datapath *dp) diff --git a/net/openvswitch/vport.c b/net/openvswitch/vport.c index 1abd9609ba78..9055dd698c70 100644 --- a/net/openvswitch/vport.c +++ b/net/openvswitch/vport.c @@ -125,7 +125,7 @@ struct vport *ovs_vport_alloc(int priv_size, const struct vport_ops *ops, vport->dp = parms->dp; vport->port_no = parms->port_no; - vport->upcall_pid = parms->upcall_pid; + vport->upcall_portid = parms->upcall_portid; vport->ops = ops; INIT_HLIST_NODE(&vport->dp_hash_node); diff --git a/net/openvswitch/vport.h b/net/openvswitch/vport.h index c56e4836e93b..3f7961ea3c56 100644 --- a/net/openvswitch/vport.h +++ b/net/openvswitch/vport.h @@ -70,7 +70,7 @@ struct vport_err_stats { * @rcu: RCU callback head for deferred destruction. * @port_no: Index into @dp's @ports array. * @dp: Datapath to which this port belongs. - * @upcall_pid: The Netlink port to use for packets received on this port that + * @upcall_portid: The Netlink port to use for packets received on this port that * miss the flow table. * @hash_node: Element in @dev_table hash table in vport.c. * @dp_hash_node: Element in @datapath->ports hash table in datapath.c. @@ -83,7 +83,7 @@ struct vport { struct rcu_head rcu; u16 port_no; struct datapath *dp; - u32 upcall_pid; + u32 upcall_portid; struct hlist_node hash_node; struct hlist_node dp_hash_node; @@ -113,7 +113,7 @@ struct vport_parms { /* For ovs_vport_alloc(). */ struct datapath *dp; u16 port_no; - u32 upcall_pid; + u32 upcall_portid; }; /** diff --git a/net/packet/diag.c b/net/packet/diag.c index 39bce0d50df9..8db6e21c46bd 100644 --- a/net/packet/diag.c +++ b/net/packet/diag.c @@ -126,13 +126,13 @@ static int pdiag_put_fanout(struct packet_sock *po, struct sk_buff *nlskb) } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct packet_diag_req *req, - u32 pid, u32 seq, u32 flags, int sk_ino) + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct packet_diag_msg *rp; struct packet_sock *po = pkt_sk(sk); - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rp), flags); if (!nlh) return -EMSGSIZE; @@ -184,7 +184,7 @@ static int packet_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (num < s_num) goto next; - if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).pid, + if (sk_diag_fill(sk, skb, req, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, sock_i_ino(sk)) < 0) goto done; diff --git a/net/phonet/pn_netlink.c b/net/phonet/pn_netlink.c index 7dd762a464e5..83a8389619aa 100644 --- a/net/phonet/pn_netlink.c +++ b/net/phonet/pn_netlink.c @@ -33,7 +33,7 @@ /* Device address handling */ static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 pid, u32 seq, int event); + u32 portid, u32 seq, int event); void phonet_address_notify(int event, struct net_device *dev, u8 addr) { @@ -101,12 +101,12 @@ static int addr_doit(struct sk_buff *skb, struct nlmsghdr *nlh, void *attr) } static int fill_addr(struct sk_buff *skb, struct net_device *dev, u8 addr, - u32 pid, u32 seq, int event) + u32 portid, u32 seq, int event) { struct ifaddrmsg *ifm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*ifm), 0); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*ifm), 0); if (nlh == NULL) return -EMSGSIZE; @@ -148,7 +148,7 @@ static int getaddr_dumpit(struct sk_buff *skb, struct netlink_callback *cb) continue; if (fill_addr(skb, pnd->netdev, addr << 2, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWADDR) < 0) goto out; } @@ -165,12 +165,12 @@ out: /* Routes handling */ static int fill_route(struct sk_buff *skb, struct net_device *dev, u8 dst, - u32 pid, u32 seq, int event) + u32 portid, u32 seq, int event) { struct rtmsg *rtm; struct nlmsghdr *nlh; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*rtm), 0); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*rtm), 0); if (nlh == NULL) return -EMSGSIZE; @@ -276,7 +276,7 @@ static int route_dumpit(struct sk_buff *skb, struct netlink_callback *cb) if (addr_idx++ < addr_start_idx) continue; - if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).pid, + if (fill_route(skb, dev, addr << 2, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, RTM_NEWROUTE)) goto out; } diff --git a/net/sched/act_api.c b/net/sched/act_api.c index e3d2c78cb52c..102761d294cb 100644 --- a/net/sched/act_api.c +++ b/net/sched/act_api.c @@ -644,7 +644,7 @@ errout: } static int -tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, +tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 portid, u32 seq, u16 flags, int event, int bind, int ref) { struct tcamsg *t; @@ -652,7 +652,7 @@ tca_get_fill(struct sk_buff *skb, struct tc_action *a, u32 pid, u32 seq, unsigned char *b = skb_tail_pointer(skb); struct nlattr *nest; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_nlmsg_trim; t = nlmsg_data(nlh); @@ -678,7 +678,7 @@ out_nlmsg_trim: } static int -act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, +act_get_notify(struct net *net, u32 portid, struct nlmsghdr *n, struct tc_action *a, int event) { struct sk_buff *skb; @@ -686,16 +686,16 @@ act_get_notify(struct net *net, u32 pid, struct nlmsghdr *n, skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tca_get_fill(skb, a, pid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { + if (tca_get_fill(skb, a, portid, n->nlmsg_seq, 0, event, 0, 0) <= 0) { kfree_skb(skb); return -EINVAL; } - return rtnl_unicast(skb, net, pid); + return rtnl_unicast(skb, net, portid); } static struct tc_action * -tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 pid) +tcf_action_get_1(struct nlattr *nla, struct nlmsghdr *n, u32 portid) { struct nlattr *tb[TCA_ACT_MAX + 1]; struct tc_action *a; @@ -762,7 +762,7 @@ static struct tc_action *create_a(int i) } static int tca_action_flush(struct net *net, struct nlattr *nla, - struct nlmsghdr *n, u32 pid) + struct nlmsghdr *n, u32 portid) { struct sk_buff *skb; unsigned char *b; @@ -799,7 +799,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, if (a->ops == NULL) goto err_out; - nlh = nlmsg_put(skb, pid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); + nlh = nlmsg_put(skb, portid, n->nlmsg_seq, RTM_DELACTION, sizeof(*t), 0); if (!nlh) goto out_module_put; t = nlmsg_data(nlh); @@ -823,7 +823,7 @@ static int tca_action_flush(struct net *net, struct nlattr *nla, nlh->nlmsg_flags |= NLM_F_ROOT; module_put(a->ops->owner); kfree(a); - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (err > 0) return 0; @@ -841,7 +841,7 @@ noflush_out: static int tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 pid, int event) + u32 portid, int event) { int i, ret; struct nlattr *tb[TCA_ACT_MAX_PRIO + 1]; @@ -853,13 +853,13 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, if (event == RTM_DELACTION && n->nlmsg_flags & NLM_F_ROOT) { if (tb[1] != NULL) - return tca_action_flush(net, tb[1], n, pid); + return tca_action_flush(net, tb[1], n, portid); else return -EINVAL; } for (i = 1; i <= TCA_ACT_MAX_PRIO && tb[i]; i++) { - act = tcf_action_get_1(tb[i], n, pid); + act = tcf_action_get_1(tb[i], n, portid); if (IS_ERR(act)) { ret = PTR_ERR(act); goto err; @@ -874,7 +874,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, } if (event == RTM_GETACTION) - ret = act_get_notify(net, pid, n, head, event); + ret = act_get_notify(net, portid, n, head, event); else { /* delete */ struct sk_buff *skb; @@ -884,7 +884,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, goto err; } - if (tca_get_fill(skb, head, pid, n->nlmsg_seq, 0, event, + if (tca_get_fill(skb, head, portid, n->nlmsg_seq, 0, event, 0, 1) <= 0) { kfree_skb(skb); ret = -EINVAL; @@ -893,7 +893,7 @@ tca_action_gd(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* now do the delete */ tcf_action_destroy(head, 0); - ret = rtnetlink_send(skb, net, pid, RTNLGRP_TC, + ret = rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); if (ret > 0) return 0; @@ -905,7 +905,7 @@ err: } static int tcf_add_notify(struct net *net, struct tc_action *a, - u32 pid, u32 seq, int event, u16 flags) + u32 portid, u32 seq, int event, u16 flags) { struct tcamsg *t; struct nlmsghdr *nlh; @@ -920,7 +920,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, b = skb_tail_pointer(skb); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*t), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*t), flags); if (!nlh) goto out_kfree_skb; t = nlmsg_data(nlh); @@ -940,7 +940,7 @@ static int tcf_add_notify(struct net *net, struct tc_action *a, nlh->nlmsg_len = skb_tail_pointer(skb) - b; NETLINK_CB(skb).dst_group = RTNLGRP_TC; - err = rtnetlink_send(skb, net, pid, RTNLGRP_TC, flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); if (err > 0) err = 0; return err; @@ -953,7 +953,7 @@ out_kfree_skb: static int tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, - u32 pid, int ovr) + u32 portid, int ovr) { int ret = 0; struct tc_action *act; @@ -971,7 +971,7 @@ tcf_action_add(struct net *net, struct nlattr *nla, struct nlmsghdr *n, /* dump then free all the actions after update; inserted policy * stays intact */ - ret = tcf_add_notify(net, act, pid, seq, RTM_NEWACTION, n->nlmsg_flags); + ret = tcf_add_notify(net, act, portid, seq, RTM_NEWACTION, n->nlmsg_flags); for (a = act; a; a = act) { act = a->next; kfree(a); @@ -984,7 +984,7 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) { struct net *net = sock_net(skb->sk); struct nlattr *tca[TCA_ACT_MAX + 1]; - u32 pid = skb ? NETLINK_CB(skb).pid : 0; + u32 portid = skb ? NETLINK_CB(skb).portid : 0; int ret = 0, ovr = 0; ret = nlmsg_parse(n, sizeof(struct tcamsg), tca, TCA_ACT_MAX, NULL); @@ -1008,17 +1008,17 @@ static int tc_ctl_action(struct sk_buff *skb, struct nlmsghdr *n, void *arg) if (n->nlmsg_flags & NLM_F_REPLACE) ovr = 1; replay: - ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, pid, ovr); + ret = tcf_action_add(net, tca[TCA_ACT_TAB], n, portid, ovr); if (ret == -EAGAIN) goto replay; break; case RTM_DELACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - pid, RTM_DELACTION); + portid, RTM_DELACTION); break; case RTM_GETACTION: ret = tca_action_gd(net, tca[TCA_ACT_TAB], n, - pid, RTM_GETACTION); + portid, RTM_GETACTION); break; default: BUG(); @@ -1085,7 +1085,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) goto out_module_put; } - nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).pid, cb->nlh->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, cb->nlh->nlmsg_type, sizeof(*t), 0); if (!nlh) goto out_module_put; @@ -1109,7 +1109,7 @@ tc_dump_action(struct sk_buff *skb, struct netlink_callback *cb) nla_nest_cancel(skb, nest); nlh->nlmsg_len = skb_tail_pointer(skb) - b; - if (NETLINK_CB(cb->skb).pid && ret) + if (NETLINK_CB(cb->skb).portid && ret) nlh->nlmsg_flags |= NLM_F_MULTI; module_put(a_o->owner); return skb->len; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index dc3ef5aef355..7ae02892437c 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -343,13 +343,13 @@ errout: } static int tcf_fill_node(struct sk_buff *skb, struct tcf_proto *tp, - unsigned long fh, u32 pid, u32 seq, u16 flags, int event) + unsigned long fh, u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; unsigned char *b = skb_tail_pointer(skb); - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -381,18 +381,18 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, unsigned long fh, int event) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tcf_fill_node(skb, tp, fh, pid, n->nlmsg_seq, 0, event) <= 0) { + if (tcf_fill_node(skb, tp, fh, portid, n->nlmsg_seq, 0, event) <= 0) { kfree_skb(skb); return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -407,7 +407,7 @@ static int tcf_node_dump(struct tcf_proto *tp, unsigned long n, { struct tcf_dump_args *a = (void *)arg; - return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).pid, + return tcf_fill_node(a->skb, tp, n, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER); } @@ -465,7 +465,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) if (t > s_t) memset(&cb->args[1], 0, sizeof(cb->args)-sizeof(cb->args[0])); if (cb->args[1] == 0) { - if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).pid, + if (tcf_fill_node(skb, tp, 0, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTFILTER) <= 0) break; diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index a08b4ab3e421..a18d975db59c 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1185,7 +1185,7 @@ graft: } static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, - u32 pid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1193,7 +1193,7 @@ static int tc_fill_qdisc(struct sk_buff *skb, struct Qdisc *q, u32 clid, struct gnet_dump d; struct qdisc_size_table *stab; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -1248,25 +1248,25 @@ static int qdisc_notify(struct net *net, struct sk_buff *oskb, struct Qdisc *old, struct Qdisc *new) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; if (old && !tc_qdisc_dump_ignore(old)) { - if (tc_fill_qdisc(skb, old, clid, pid, n->nlmsg_seq, + if (tc_fill_qdisc(skb, old, clid, portid, n->nlmsg_seq, 0, RTM_DELQDISC) < 0) goto err_out; } if (new && !tc_qdisc_dump_ignore(new)) { - if (tc_fill_qdisc(skb, new, clid, pid, n->nlmsg_seq, + if (tc_fill_qdisc(skb, new, clid, portid, n->nlmsg_seq, old ? NLM_F_REPLACE : 0, RTM_NEWQDISC) < 0) goto err_out; } if (skb->len) - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); err_out: @@ -1289,7 +1289,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, q_idx++; } else { if (!tc_qdisc_dump_ignore(q) && - tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; q_idx++; @@ -1300,7 +1300,7 @@ static int tc_dump_qdisc_root(struct Qdisc *root, struct sk_buff *skb, continue; } if (!tc_qdisc_dump_ignore(q) && - tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).pid, + tc_fill_qdisc(skb, q, q->parent, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWQDISC) <= 0) goto done; q_idx++; @@ -1375,7 +1375,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) const struct Qdisc_class_ops *cops; unsigned long cl = 0; unsigned long new_cl; - u32 pid = tcm->tcm_parent; + u32 portid = tcm->tcm_parent; u32 clid = tcm->tcm_handle; u32 qid = TC_H_MAJ(clid); int err; @@ -1403,8 +1403,8 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Step 1. Determine qdisc handle X:0 */ - if (pid != TC_H_ROOT) { - u32 qid1 = TC_H_MAJ(pid); + if (portid != TC_H_ROOT) { + u32 qid1 = TC_H_MAJ(portid); if (qid && qid1) { /* If both majors are known, they must be identical. */ @@ -1418,10 +1418,10 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Now qid is genuine qdisc handle consistent * both with parent and child. * - * TC_H_MAJ(pid) still may be unspecified, complete it now. + * TC_H_MAJ(portid) still may be unspecified, complete it now. */ - if (pid) - pid = TC_H_MAKE(qid, pid); + if (portid) + portid = TC_H_MAKE(qid, portid); } else { if (qid == 0) qid = dev->qdisc->handle; @@ -1439,7 +1439,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) /* Now try to get class */ if (clid == 0) { - if (pid == TC_H_ROOT) + if (portid == TC_H_ROOT) clid = qid; } else clid = TC_H_MAKE(qid, clid); @@ -1478,7 +1478,7 @@ static int tc_ctl_tclass(struct sk_buff *skb, struct nlmsghdr *n, void *arg) new_cl = cl; err = -EOPNOTSUPP; if (cops->change) - err = cops->change(q, clid, pid, tca, &new_cl); + err = cops->change(q, clid, portid, tca, &new_cl); if (err == 0) tclass_notify(net, skb, n, q, new_cl, RTM_NEWTCLASS); @@ -1492,7 +1492,7 @@ out: static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, unsigned long cl, - u32 pid, u32 seq, u16 flags, int event) + u32 portid, u32 seq, u16 flags, int event) { struct tcmsg *tcm; struct nlmsghdr *nlh; @@ -1500,7 +1500,7 @@ static int tc_fill_tclass(struct sk_buff *skb, struct Qdisc *q, struct gnet_dump d; const struct Qdisc_class_ops *cl_ops = q->ops->cl_ops; - nlh = nlmsg_put(skb, pid, seq, event, sizeof(*tcm), flags); + nlh = nlmsg_put(skb, portid, seq, event, sizeof(*tcm), flags); if (!nlh) goto out_nlmsg_trim; tcm = nlmsg_data(nlh); @@ -1540,18 +1540,18 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, unsigned long cl, int event) { struct sk_buff *skb; - u32 pid = oskb ? NETLINK_CB(oskb).pid : 0; + u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) return -ENOBUFS; - if (tc_fill_tclass(skb, q, cl, pid, n->nlmsg_seq, 0, event) < 0) { + if (tc_fill_tclass(skb, q, cl, portid, n->nlmsg_seq, 0, event) < 0) { kfree_skb(skb); return -EINVAL; } - return rtnetlink_send(skb, net, pid, RTNLGRP_TC, + return rtnetlink_send(skb, net, portid, RTNLGRP_TC, n->nlmsg_flags & NLM_F_ECHO); } @@ -1565,7 +1565,7 @@ static int qdisc_class_dump(struct Qdisc *q, unsigned long cl, struct qdisc_walk { struct qdisc_dump_args *a = (struct qdisc_dump_args *)arg; - return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).pid, + return tc_fill_tclass(a->skb, q, cl, NETLINK_CB(a->cb->skb).portid, a->cb->nlh->nlmsg_seq, NLM_F_MULTI, RTM_NEWTCLASS); } diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c index 47a839df27dc..6675914dc592 100644 --- a/net/tipc/netlink.c +++ b/net/tipc/netlink.c @@ -62,7 +62,7 @@ static int handle_cmd(struct sk_buff *skb, struct genl_info *info) rep_nlh = nlmsg_hdr(rep_buf); memcpy(rep_nlh, req_nlh, hdr_space); rep_nlh->nlmsg_len = rep_buf->len; - genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).pid); + genlmsg_unicast(&init_net, rep_buf, NETLINK_CB(skb).portid); } return 0; diff --git a/net/unix/diag.c b/net/unix/diag.c index 750b13408449..06748f108a57 100644 --- a/net/unix/diag.c +++ b/net/unix/diag.c @@ -110,12 +110,12 @@ static int sk_diag_show_rqlen(struct sock *sk, struct sk_buff *nlskb) } static int sk_diag_fill(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 pid, u32 seq, u32 flags, int sk_ino) + u32 portid, u32 seq, u32 flags, int sk_ino) { struct nlmsghdr *nlh; struct unix_diag_msg *rep; - nlh = nlmsg_put(skb, pid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), + nlh = nlmsg_put(skb, portid, seq, SOCK_DIAG_BY_FAMILY, sizeof(*rep), flags); if (!nlh) return -EMSGSIZE; @@ -159,7 +159,7 @@ out_nlmsg_trim: } static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_req *req, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { int sk_ino; @@ -170,7 +170,7 @@ static int sk_diag_dump(struct sock *sk, struct sk_buff *skb, struct unix_diag_r if (!sk_ino) return 0; - return sk_diag_fill(sk, skb, req, pid, seq, flags, sk_ino); + return sk_diag_fill(sk, skb, req, portid, seq, flags, sk_ino); } static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) @@ -200,7 +200,7 @@ static int unix_diag_dump(struct sk_buff *skb, struct netlink_callback *cb) if (!(req->udiag_states & (1 << sk->sk_state))) goto next; if (sk_diag_dump(sk, skb, req, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI) < 0) goto done; @@ -267,7 +267,7 @@ again: if (!rep) goto out; - err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).pid, + err = sk_diag_fill(sk, rep, req, NETLINK_CB(in_skb).portid, nlh->nlmsg_seq, 0, req->udiag_ino); if (err < 0) { nlmsg_free(rep); @@ -277,7 +277,7 @@ again: goto again; } - err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).pid, + err = netlink_unicast(net->diag_nlsk, rep, NETLINK_CB(in_skb).portid, MSG_DONTWAIT); if (err > 0) err = 0; diff --git a/net/wireless/core.h b/net/wireless/core.h index bc7430b54771..a343be4a52bd 100644 --- a/net/wireless/core.h +++ b/net/wireless/core.h @@ -55,7 +55,7 @@ struct cfg80211_registered_device { int opencount; /* also protected by devlist_mtx */ wait_queue_head_t dev_wait; - u32 ap_beacons_nlpid; + u32 ap_beacons_nlportid; /* protected by RTNL only */ int num_running_ifaces; diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c index 8fd0242ee169..ec7fcee5bad6 100644 --- a/net/wireless/mlme.c +++ b/net/wireless/mlme.c @@ -615,7 +615,7 @@ EXPORT_SYMBOL(cfg80211_del_sta); struct cfg80211_mgmt_registration { struct list_head list; - u32 nlpid; + u32 nlportid; int match_len; @@ -624,7 +624,7 @@ struct cfg80211_mgmt_registration { u8 match[]; }; -int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, +int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid, u16 frame_type, const u8 *match_data, int match_len) { @@ -672,7 +672,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, memcpy(nreg->match, match_data, match_len); nreg->match_len = match_len; - nreg->nlpid = snd_pid; + nreg->nlportid = snd_portid; nreg->frame_type = cpu_to_le16(frame_type); list_add(&nreg->list, &wdev->mgmt_registrations); @@ -685,7 +685,7 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid, return err; } -void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) +void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid) { struct wiphy *wiphy = wdev->wiphy; struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); @@ -694,7 +694,7 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) spin_lock_bh(&wdev->mgmt_registrations_lock); list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) { - if (reg->nlpid != nlpid) + if (reg->nlportid != nlportid) continue; if (rdev->ops->mgmt_frame_register) { @@ -710,8 +710,8 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid) spin_unlock_bh(&wdev->mgmt_registrations_lock); - if (nlpid == wdev->ap_unexpected_nlpid) - wdev->ap_unexpected_nlpid = 0; + if (nlportid == wdev->ap_unexpected_nlportid) + wdev->ap_unexpected_nlportid = 0; } void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev) @@ -872,7 +872,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_mbm, /* found match! */ /* Indicate the received Action frame to user space */ - if (nl80211_send_mgmt(rdev, wdev, reg->nlpid, + if (nl80211_send_mgmt(rdev, wdev, reg->nlportid, freq, sig_mbm, buf, len, gfp)) continue; diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c index 787aeaa902fe..34eb5c07a567 100644 --- a/net/wireless/nl80211.c +++ b/net/wireless/nl80211.c @@ -496,11 +496,11 @@ static bool is_valid_ie_attr(const struct nlattr *attr) } /* message building helper */ -static inline void *nl80211hdr_put(struct sk_buff *skb, u32 pid, u32 seq, +static inline void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq, int flags, u8 cmd) { /* since there is no private header just add the generic one */ - return genlmsg_put(skb, pid, seq, &nl80211_fam, flags, cmd); + return genlmsg_put(skb, portid, seq, &nl80211_fam, flags, cmd); } static int nl80211_msg_put_channel(struct sk_buff *msg, @@ -851,7 +851,7 @@ nla_put_failure: return -ENOBUFS; } -static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, +static int nl80211_send_wiphy(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *dev) { void *hdr; @@ -866,7 +866,7 @@ static int nl80211_send_wiphy(struct sk_buff *msg, u32 pid, u32 seq, int flags, const struct ieee80211_txrx_stypes *mgmt_stypes = dev->wiphy.mgmt_stypes; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_WIPHY); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_WIPHY); if (!hdr) return -1; @@ -1267,7 +1267,7 @@ static int nl80211_dump_wiphy(struct sk_buff *skb, struct netlink_callback *cb) continue; if (++idx <= start) continue; - if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_wiphy(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev) < 0) { idx--; @@ -1290,7 +1290,7 @@ static int nl80211_get_wiphy(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_wiphy(msg, info->snd_pid, info->snd_seq, 0, dev) < 0) { + if (nl80211_send_wiphy(msg, info->snd_portid, info->snd_seq, 0, dev) < 0) { nlmsg_free(msg); return -ENOBUFS; } @@ -1736,14 +1736,14 @@ static inline u64 wdev_id(struct wireless_dev *wdev) ((u64)wiphy_to_dev(wdev->wiphy)->wiphy_idx << 32); } -static int nl80211_send_iface(struct sk_buff *msg, u32 pid, u32 seq, int flags, +static int nl80211_send_iface(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev) { struct net_device *dev = wdev->netdev; void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_INTERFACE); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_INTERFACE); if (!hdr) return -1; @@ -1807,7 +1807,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback * if_idx++; continue; } - if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_iface(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, rdev, wdev) < 0) { mutex_unlock(&rdev->devlist_mtx); @@ -1838,7 +1838,7 @@ static int nl80211_get_interface(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, dev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -2056,7 +2056,7 @@ static int nl80211_new_interface(struct sk_buff *skb, struct genl_info *info) break; } - if (nl80211_send_iface(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_iface(msg, info->snd_portid, info->snd_seq, 0, rdev, wdev) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -2191,7 +2191,7 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_NEW_KEY); if (IS_ERR(hdr)) return PTR_ERR(hdr); @@ -2769,7 +2769,7 @@ nla_put_failure: return false; } -static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_station(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct cfg80211_registered_device *rdev, struct net_device *dev, @@ -2778,7 +2778,7 @@ static int nl80211_send_station(struct sk_buff *msg, u32 pid, u32 seq, void *hdr; struct nlattr *sinfoattr, *bss_param; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) return -1; @@ -2931,7 +2931,7 @@ static int nl80211_dump_station(struct sk_buff *skb, goto out_err; if (nl80211_send_station(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, dev, netdev, mac_addr, &sinfo) < 0) @@ -2977,7 +2977,7 @@ static int nl80211_get_station(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_station(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_station(msg, info->snd_portid, info->snd_seq, 0, rdev, dev, mac_addr, &sinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -3303,7 +3303,7 @@ static int nl80211_del_station(struct sk_buff *skb, struct genl_info *info) return rdev->ops->del_station(&rdev->wiphy, dev, mac_addr); } -static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_mpath(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, u8 *dst, u8 *next_hop, struct mpath_info *pinfo) @@ -3311,7 +3311,7 @@ static int nl80211_send_mpath(struct sk_buff *msg, u32 pid, u32 seq, void *hdr; struct nlattr *pinfoattr; - hdr = nl80211hdr_put(msg, pid, seq, flags, NL80211_CMD_NEW_STATION); + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_STATION); if (!hdr) return -1; @@ -3389,7 +3389,7 @@ static int nl80211_dump_mpath(struct sk_buff *skb, if (err) goto out_err; - if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).pid, + if (nl80211_send_mpath(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, netdev, dst, next_hop, &pinfo) < 0) @@ -3438,7 +3438,7 @@ static int nl80211_get_mpath(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - if (nl80211_send_mpath(msg, info->snd_pid, info->snd_seq, 0, + if (nl80211_send_mpath(msg, info->snd_portid, info->snd_seq, 0, dev, dst, next_hop, &pinfo) < 0) { nlmsg_free(msg); return -ENOBUFS; @@ -3679,7 +3679,7 @@ static int nl80211_get_mesh_config(struct sk_buff *skb, msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL); if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_MESH_CONFIG); if (!hdr) goto out; @@ -3998,7 +3998,7 @@ static int nl80211_get_reg(struct sk_buff *skb, struct genl_info *info) goto out; } - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_REG); if (!hdr) goto put_failure; @@ -4616,7 +4616,7 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb, ASSERT_WDEV_LOCK(wdev); - hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).pid, seq, flags, + hdr = nl80211hdr_put(msg, NETLINK_CB(cb->skb).portid, seq, flags, NL80211_CMD_NEW_SCAN_RESULTS); if (!hdr) return -1; @@ -4735,14 +4735,14 @@ static int nl80211_dump_scan(struct sk_buff *skb, return skb->len; } -static int nl80211_send_survey(struct sk_buff *msg, u32 pid, u32 seq, +static int nl80211_send_survey(struct sk_buff *msg, u32 portid, u32 seq, int flags, struct net_device *dev, struct survey_info *survey) { void *hdr; struct nlattr *infoattr; - hdr = nl80211hdr_put(msg, pid, seq, flags, + hdr = nl80211hdr_put(msg, portid, seq, flags, NL80211_CMD_NEW_SURVEY_RESULTS); if (!hdr) return -ENOMEM; @@ -4836,7 +4836,7 @@ static int nl80211_dump_survey(struct sk_buff *skb, } if (nl80211_send_survey(skb, - NETLINK_CB(cb->skb).pid, + NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, netdev, &survey) < 0) @@ -5451,7 +5451,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, } while (1) { - void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).pid, + void *hdr = nl80211hdr_put(skb, NETLINK_CB(cb->skb).portid, cb->nlh->nlmsg_seq, NLM_F_MULTI, NL80211_CMD_TESTMODE); struct nlattr *tmdata; @@ -5491,7 +5491,7 @@ static int nl80211_testmode_dump(struct sk_buff *skb, static struct sk_buff * __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, - int approxlen, u32 pid, u32 seq, gfp_t gfp) + int approxlen, u32 portid, u32 seq, gfp_t gfp) { struct sk_buff *skb; void *hdr; @@ -5501,7 +5501,7 @@ __cfg80211_testmode_alloc_skb(struct cfg80211_registered_device *rdev, if (!skb) return NULL; - hdr = nl80211hdr_put(skb, pid, seq, 0, NL80211_CMD_TESTMODE); + hdr = nl80211hdr_put(skb, portid, seq, 0, NL80211_CMD_TESTMODE); if (!hdr) { kfree_skb(skb); return NULL; @@ -5531,7 +5531,7 @@ struct sk_buff *cfg80211_testmode_alloc_reply_skb(struct wiphy *wiphy, return NULL; return __cfg80211_testmode_alloc_skb(rdev, approxlen, - rdev->testmode_info->snd_pid, + rdev->testmode_info->snd_portid, rdev->testmode_info->snd_seq, GFP_KERNEL); } @@ -5867,7 +5867,7 @@ static int nl80211_remain_on_channel(struct sk_buff *skb, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_REMAIN_ON_CHANNEL); if (IS_ERR(hdr)) { @@ -6086,7 +6086,7 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info) if (!rdev->ops->mgmt_tx) return -EOPNOTSUPP; - return cfg80211_mlme_register_mgmt(wdev, info->snd_pid, frame_type, + return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type, nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]), nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH])); } @@ -6167,7 +6167,7 @@ static int nl80211_tx_mgmt(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_FRAME); if (IS_ERR(hdr)) { @@ -6284,7 +6284,7 @@ static int nl80211_get_power_save(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_POWER_SAVE); if (!hdr) { err = -ENOBUFS; @@ -6486,7 +6486,7 @@ static int nl80211_get_wowlan(struct sk_buff *skb, struct genl_info *info) if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_GET_WOWLAN); if (!hdr) goto nla_put_failure; @@ -6760,10 +6760,10 @@ static int nl80211_register_unexpected_frame(struct sk_buff *skb, wdev->iftype != NL80211_IFTYPE_P2P_GO) return -EINVAL; - if (wdev->ap_unexpected_nlpid) + if (wdev->ap_unexpected_nlportid) return -EBUSY; - wdev->ap_unexpected_nlpid = info->snd_pid; + wdev->ap_unexpected_nlportid = info->snd_portid; return 0; } @@ -6793,7 +6793,7 @@ static int nl80211_probe_client(struct sk_buff *skb, if (!msg) return -ENOMEM; - hdr = nl80211hdr_put(msg, info->snd_pid, info->snd_seq, 0, + hdr = nl80211hdr_put(msg, info->snd_portid, info->snd_seq, 0, NL80211_CMD_PROBE_CLIENT); if (IS_ERR(hdr)) { @@ -6828,10 +6828,10 @@ static int nl80211_register_beacons(struct sk_buff *skb, struct genl_info *info) if (!(rdev->wiphy.flags & WIPHY_FLAG_REPORTS_OBSS)) return -EOPNOTSUPP; - if (rdev->ap_beacons_nlpid) + if (rdev->ap_beacons_nlportid) return -EBUSY; - rdev->ap_beacons_nlpid = info->snd_pid; + rdev->ap_beacons_nlportid = info->snd_portid; return 0; } @@ -7628,12 +7628,12 @@ static int nl80211_add_scan_req(struct sk_buff *msg, static int nl80211_send_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct wireless_dev *wdev, - u32 pid, u32 seq, int flags, + u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -7657,11 +7657,11 @@ static int nl80211_send_sched_scan_msg(struct sk_buff *msg, struct cfg80211_registered_device *rdev, struct net_device *netdev, - u32 pid, u32 seq, int flags, u32 cmd) + u32 portid, u32 seq, int flags, u32 cmd) { void *hdr; - hdr = nl80211hdr_put(msg, pid, seq, flags, cmd); + hdr = nl80211hdr_put(msg, portid, seq, flags, cmd); if (!hdr) return -1; @@ -8370,9 +8370,9 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, struct sk_buff *msg; void *hdr; int err; - u32 nlpid = ACCESS_ONCE(wdev->ap_unexpected_nlpid); + u32 nlportid = ACCESS_ONCE(wdev->ap_unexpected_nlportid); - if (!nlpid) + if (!nlportid) return false; msg = nlmsg_new(100, gfp); @@ -8396,7 +8396,7 @@ static bool __nl80211_unexpected_frame(struct net_device *dev, u8 cmd, return true; } - genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return true; nla_put_failure: @@ -8420,7 +8420,7 @@ bool nl80211_unexpected_4addr_frame(struct net_device *dev, } int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, - struct wireless_dev *wdev, u32 nlpid, + struct wireless_dev *wdev, u32 nlportid, int freq, int sig_dbm, const u8 *buf, size_t len, gfp_t gfp) { @@ -8449,7 +8449,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev, genlmsg_end(msg, hdr); - return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + return genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); nla_put_failure: genlmsg_cancel(msg, hdr); @@ -8804,9 +8804,9 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, struct cfg80211_registered_device *rdev = wiphy_to_dev(wiphy); struct sk_buff *msg; void *hdr; - u32 nlpid = ACCESS_ONCE(rdev->ap_beacons_nlpid); + u32 nlportid = ACCESS_ONCE(rdev->ap_beacons_nlportid); - if (!nlpid) + if (!nlportid) return; msg = nlmsg_new(len + 100, gfp); @@ -8829,7 +8829,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy, genlmsg_end(msg, hdr); - genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlpid); + genlmsg_unicast(wiphy_net(&rdev->wiphy), msg, nlportid); return; nla_put_failure: @@ -8853,9 +8853,9 @@ static int nl80211_netlink_notify(struct notifier_block * nb, list_for_each_entry_rcu(rdev, &cfg80211_rdev_list, list) { list_for_each_entry_rcu(wdev, &rdev->wdev_list, list) - cfg80211_mlme_unregister_socket(wdev, notify->pid); - if (rdev->ap_beacons_nlpid == notify->pid) - rdev->ap_beacons_nlpid = 0; + cfg80211_mlme_unregister_socket(wdev, notify->portid); + if (rdev->ap_beacons_nlportid == notify->portid) + rdev->ap_beacons_nlportid = 0; } rcu_read_unlock(); diff --git a/net/xfrm/xfrm_state.c b/net/xfrm/xfrm_state.c index 7856c33898fa..30edad44e7fc 100644 --- a/net/xfrm/xfrm_state.c +++ b/net/xfrm/xfrm_state.c @@ -166,7 +166,7 @@ static DEFINE_SPINLOCK(xfrm_state_gc_lock); int __xfrm_state_delete(struct xfrm_state *x); int km_query(struct xfrm_state *x, struct xfrm_tmpl *t, struct xfrm_policy *pol); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid); +void km_state_expired(struct xfrm_state *x, int hard, u32 portid); static struct xfrm_state_afinfo *xfrm_state_lock_afinfo(unsigned int family) { @@ -1674,13 +1674,13 @@ void km_state_notify(struct xfrm_state *x, const struct km_event *c) EXPORT_SYMBOL(km_policy_notify); EXPORT_SYMBOL(km_state_notify); -void km_state_expired(struct xfrm_state *x, int hard, u32 pid) +void km_state_expired(struct xfrm_state *x, int hard, u32 portid) { struct net *net = xs_net(x); struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_EXPIRE; km_state_notify(x, &c); @@ -1726,13 +1726,13 @@ int km_new_mapping(struct xfrm_state *x, xfrm_address_t *ipaddr, __be16 sport) } EXPORT_SYMBOL(km_new_mapping); -void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 pid) +void km_policy_expired(struct xfrm_policy *pol, int dir, int hard, u32 portid) { struct net *net = xp_net(pol); struct km_event c; c.data.hard = hard; - c.pid = pid; + c.portid = portid; c.event = XFRM_MSG_POLEXPIRE; km_policy_notify(pol, dir, &c); diff --git a/net/xfrm/xfrm_user.c b/net/xfrm/xfrm_user.c index 354070adb5ef..b313d932d678 100644 --- a/net/xfrm/xfrm_user.c +++ b/net/xfrm/xfrm_user.c @@ -603,7 +603,7 @@ static int xfrm_add_sa(struct sk_buff *skb, struct nlmsghdr *nlh, } c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -676,7 +676,7 @@ static int xfrm_del_sa(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.event = nlh->nlmsg_type; km_state_notify(x, &c); @@ -826,7 +826,7 @@ static int dump_one_state(struct xfrm_state *x, int count, void *ptr) struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWSA, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -904,7 +904,7 @@ static inline size_t xfrm_spdinfo_msgsize(void) } static int build_spdinfo(struct sk_buff *skb, struct net *net, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { struct xfrmk_spdinfo si; struct xfrmu_spdinfo spc; @@ -913,7 +913,7 @@ static int build_spdinfo(struct sk_buff *skb, struct net *net, int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSPDINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; @@ -946,17 +946,17 @@ static int xfrm_get_spdinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_spdinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_spdinfo(r_skb, net, spid, seq, *flags) < 0) + if (build_spdinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static inline size_t xfrm_sadinfo_msgsize(void) @@ -967,7 +967,7 @@ static inline size_t xfrm_sadinfo_msgsize(void) } static int build_sadinfo(struct sk_buff *skb, struct net *net, - u32 pid, u32 seq, u32 flags) + u32 portid, u32 seq, u32 flags) { struct xfrmk_sadinfo si; struct xfrmu_sadhinfo sh; @@ -975,7 +975,7 @@ static int build_sadinfo(struct sk_buff *skb, struct net *net, int err; u32 *f; - nlh = nlmsg_put(skb, pid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); + nlh = nlmsg_put(skb, portid, seq, XFRM_MSG_NEWSADINFO, sizeof(u32), 0); if (nlh == NULL) /* shouldn't really happen ... */ return -EMSGSIZE; @@ -1003,17 +1003,17 @@ static int xfrm_get_sadinfo(struct sk_buff *skb, struct nlmsghdr *nlh, struct net *net = sock_net(skb->sk); struct sk_buff *r_skb; u32 *flags = nlmsg_data(nlh); - u32 spid = NETLINK_CB(skb).pid; + u32 sportid = NETLINK_CB(skb).portid; u32 seq = nlh->nlmsg_seq; r_skb = nlmsg_new(xfrm_sadinfo_msgsize(), GFP_ATOMIC); if (r_skb == NULL) return -ENOMEM; - if (build_sadinfo(r_skb, net, spid, seq, *flags) < 0) + if (build_sadinfo(r_skb, net, sportid, seq, *flags) < 0) BUG(); - return nlmsg_unicast(net->xfrm.nlsk, r_skb, spid); + return nlmsg_unicast(net->xfrm.nlsk, r_skb, sportid); } static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, @@ -1033,7 +1033,7 @@ static int xfrm_get_sa(struct sk_buff *skb, struct nlmsghdr *nlh, if (IS_ERR(resp_skb)) { err = PTR_ERR(resp_skb); } else { - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); } xfrm_state_put(x); out_noput: @@ -1114,7 +1114,7 @@ static int xfrm_alloc_userspi(struct sk_buff *skb, struct nlmsghdr *nlh, goto out; } - err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, NETLINK_CB(skb).portid); out: xfrm_state_put(x); @@ -1401,7 +1401,7 @@ static int xfrm_add_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); xfrm_pol_put(xp); @@ -1486,7 +1486,7 @@ static int dump_one_policy(struct xfrm_policy *xp, int dir, int count, void *ptr struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, NETLINK_CB(in_skb).pid, sp->nlmsg_seq, + nlh = nlmsg_put(skb, NETLINK_CB(in_skb).portid, sp->nlmsg_seq, XFRM_MSG_NEWPOLICY, sizeof(*p), sp->nlmsg_flags); if (nlh == NULL) return -EMSGSIZE; @@ -1621,7 +1621,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, err = PTR_ERR(resp_skb); } else { err = nlmsg_unicast(net->xfrm.nlsk, resp_skb, - NETLINK_CB(skb).pid); + NETLINK_CB(skb).portid); } } else { uid_t loginuid = audit_get_loginuid(current); @@ -1638,7 +1638,7 @@ static int xfrm_get_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.byid = p->index; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; km_policy_notify(xp, p->dir, &c); } @@ -1668,7 +1668,7 @@ static int xfrm_flush_sa(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.proto = p->proto; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.net = net; km_state_notify(NULL, &c); @@ -1695,7 +1695,7 @@ static int build_aevent(struct sk_buff *skb, struct xfrm_state *x, const struct struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_NEWAE, sizeof(*id), 0); if (nlh == NULL) return -EMSGSIZE; @@ -1777,11 +1777,11 @@ static int xfrm_get_ae(struct sk_buff *skb, struct nlmsghdr *nlh, spin_lock_bh(&x->lock); c.data.aevent = p->flags; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; if (build_aevent(r_skb, x, &c) < 0) BUG(); - err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).pid); + err = nlmsg_unicast(net->xfrm.nlsk, r_skb, NETLINK_CB(skb).portid); spin_unlock_bh(&x->lock); xfrm_state_put(x); return err; @@ -1827,7 +1827,7 @@ static int xfrm_new_ae(struct sk_buff *skb, struct nlmsghdr *nlh, c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.data.aevent = XFRM_AE_CU; km_state_notify(x, &c); err = 0; @@ -1862,7 +1862,7 @@ static int xfrm_flush_policy(struct sk_buff *skb, struct nlmsghdr *nlh, c.data.type = type; c.event = nlh->nlmsg_type; c.seq = nlh->nlmsg_seq; - c.pid = nlh->nlmsg_pid; + c.portid = nlh->nlmsg_pid; c.net = net; km_policy_notify(NULL, 0, &c); return 0; @@ -2370,7 +2370,7 @@ static int build_expire(struct sk_buff *skb, struct xfrm_state *x, const struct struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_EXPIRE, sizeof(*ue), 0); if (nlh == NULL) return -EMSGSIZE; @@ -2429,7 +2429,7 @@ static int xfrm_notify_sa_flush(const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHSA, sizeof(*p), 0); if (nlh == NULL) { kfree_skb(skb); return -EMSGSIZE; @@ -2497,7 +2497,7 @@ static int xfrm_notify_sa(struct xfrm_state *x, const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2696,7 +2696,7 @@ static int build_polexpire(struct sk_buff *skb, struct xfrm_policy *xp, struct nlmsghdr *nlh; int err; - nlh = nlmsg_put(skb, c->pid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); + nlh = nlmsg_put(skb, c->portid, 0, XFRM_MSG_POLEXPIRE, sizeof(*upe), 0); if (nlh == NULL) return -EMSGSIZE; @@ -2756,7 +2756,7 @@ static int xfrm_notify_policy(struct xfrm_policy *xp, int dir, const struct km_e if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, c->event, headlen, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, c->event, headlen, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; @@ -2810,7 +2810,7 @@ static int xfrm_notify_policy_flush(const struct km_event *c) if (skb == NULL) return -ENOMEM; - nlh = nlmsg_put(skb, c->pid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); + nlh = nlmsg_put(skb, c->portid, c->seq, XFRM_MSG_FLUSHPOLICY, 0, 0); err = -EMSGSIZE; if (nlh == NULL) goto out_free_skb; -- cgit v1.2.3 From fdd6681d92a70b3db73cdb61c6b4053f2f8003b3 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Mon, 10 Sep 2012 02:48:44 +0000 Subject: ipv6: remove some useless RCU read lock After this commit: commit 97cac0821af4474ec4ba3a9e7a36b98ed9b6db88 Author: David S. Miller Date: Mon Jul 2 22:43:47 2012 -0700 ipv6: Store route neighbour in rt6_info struct. we no longer use RCU to protect route neighbour. Cc: "David S. Miller" Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 13 ++----------- net/ipv6/route.c | 15 ++------------- 2 files changed, 4 insertions(+), 24 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index a4f6263fddca..3dd4a37488d5 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -123,16 +123,11 @@ static int ip6_finish_output2(struct sk_buff *skb) skb->len); } - rcu_read_lock(); rt = (struct rt6_info *) dst; neigh = rt->n; - if (neigh) { - int res = dst_neigh_output(dst, neigh, skb); + if (neigh) + return dst_neigh_output(dst, neigh, skb); - rcu_read_unlock(); - return res; - } - rcu_read_unlock(); IP6_INC_STATS_BH(dev_net(dst->dev), ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); kfree_skb(skb); @@ -983,7 +978,6 @@ static int ip6_dst_lookup_tail(struct sock *sk, * dst entry and replace it instead with the * dst entry of the nexthop router */ - rcu_read_lock(); rt = (struct rt6_info *) *dst; n = rt->n; if (n && !(n->nud_state & NUD_VALID)) { @@ -991,7 +985,6 @@ static int ip6_dst_lookup_tail(struct sock *sk, struct flowi6 fl_gw6; int redirect; - rcu_read_unlock(); ifp = ipv6_get_ifaddr(net, &fl6->saddr, (*dst)->dev, 1); @@ -1011,8 +1004,6 @@ static int ip6_dst_lookup_tail(struct sock *sk, if ((err = (*dst)->error)) goto out_err_release; } - } else { - rcu_read_unlock(); } #endif diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a81c6790a648..399613b7972f 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -451,10 +451,9 @@ static void rt6_probe(struct rt6_info *rt) * Router Reachability Probe MUST be rate-limited * to no more than one per minute. */ - rcu_read_lock(); neigh = rt ? rt->n : NULL; if (!neigh || (neigh->nud_state & NUD_VALID)) - goto out; + return; read_lock_bh(&neigh->lock); if (!(neigh->nud_state & NUD_VALID) && time_after(jiffies, neigh->updated + rt->rt6i_idev->cnf.rtr_probe_interval)) { @@ -470,8 +469,6 @@ static void rt6_probe(struct rt6_info *rt) } else { read_unlock_bh(&neigh->lock); } -out: - rcu_read_unlock(); } #else static inline void rt6_probe(struct rt6_info *rt) @@ -498,7 +495,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt) struct neighbour *neigh; int m; - rcu_read_lock(); neigh = rt->n; if (rt->rt6i_flags & RTF_NONEXTHOP || !(rt->rt6i_flags & RTF_GATEWAY)) @@ -516,7 +512,6 @@ static inline int rt6_check_neigh(struct rt6_info *rt) read_unlock_bh(&neigh->lock); } else m = 0; - rcu_read_unlock(); return m; } @@ -2496,15 +2491,11 @@ static int rt6_fill_node(struct net *net, if (rtnetlink_put_metrics(skb, dst_metrics_ptr(&rt->dst)) < 0) goto nla_put_failure; - rcu_read_lock(); n = rt->n; if (n) { - if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) { - rcu_read_unlock(); + if (nla_put(skb, RTA_GATEWAY, 16, &n->primary_key) < 0) goto nla_put_failure; - } } - rcu_read_unlock(); if (rt->dst.dev && nla_put_u32(skb, RTA_OIF, rt->dst.dev->ifindex)) @@ -2706,14 +2697,12 @@ static int rt6_info_route(struct rt6_info *rt, void *p_arg) #else seq_puts(m, "00000000000000000000000000000000 00 "); #endif - rcu_read_lock(); n = rt->n; if (n) { seq_printf(m, "%pi6", n->primary_key); } else { seq_puts(m, "00000000000000000000000000000000"); } - rcu_read_unlock(); seq_printf(m, " %08x %08x %08x %08x %8s\n", rt->rt6i_metric, atomic_read(&rt->dst.__refcnt), rt->dst.__use, rt->rt6i_flags, -- cgit v1.2.3 From 417962a02ba283c8532d61474dc08e0a966cd269 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Mon, 10 Sep 2012 18:16:49 +0000 Subject: ipv6: Add labels for site-local and 6bone testing addresses (RFC6724) Added labels for site-local addresses (fec0::/10) and 6bone testing addresses (3ffe::/16) in order to depreference them. Note that the RFC introduced new rows for Teredo, ULA and 6to4 addresses in the default policy table. Some of them have different labels from ours. For backward compatibility, we do not change the "default" labels. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrlabel.c | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrlabel.c b/net/ipv6/addrlabel.c index 0b0171570d17..4be23da32b89 100644 --- a/net/ipv6/addrlabel.c +++ b/net/ipv6/addrlabel.c @@ -57,7 +57,7 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) } /* - * Default policy table (RFC3484 + extensions) + * Default policy table (RFC6724 + extensions) * * prefix addr_type label * ------------------------------------------------------------------------- @@ -69,8 +69,12 @@ struct net *ip6addrlbl_net(const struct ip6addrlbl_entry *lbl) * fc00::/7 N/A 5 ULA (RFC 4193) * 2001::/32 N/A 6 Teredo (RFC 4380) * 2001:10::/28 N/A 7 ORCHID (RFC 4843) + * fec0::/10 N/A 11 Site-local + * (deprecated by RFC3879) + * 3ffe::/16 N/A 12 6bone * * Note: 0xffffffff is used if we do not have any policies. + * Note: Labels for ULA and 6to4 are different from labels listed in RFC6724. */ #define IPV6_ADDR_LABEL_DEFAULT 0xffffffffUL @@ -88,10 +92,18 @@ static const __net_initdata struct ip6addrlbl_init_table .prefix = &(struct in6_addr){{{ 0xfc }}}, .prefixlen = 7, .label = 5, + },{ /* fec0::/10 */ + .prefix = &(struct in6_addr){{{ 0xfe, 0xc0 }}}, + .prefixlen = 10, + .label = 11, },{ /* 2002::/16 */ .prefix = &(struct in6_addr){{{ 0x20, 0x02 }}}, .prefixlen = 16, .label = 2, + },{ /* 3ffe::/16 */ + .prefix = &(struct in6_addr){{{ 0x3f, 0xfe }}}, + .prefixlen = 16, + .label = 12, },{ /* 2001::/32 */ .prefix = &(struct in6_addr){{{ 0x20, 0x01 }}}, .prefixlen = 32, -- cgit v1.2.3 From 91b4b04ff85de9086c959138d747d2808cc83a46 Mon Sep 17 00:00:00 2001 From: YOSHIFUJI Hideaki / 吉藤英明 Date: Mon, 10 Sep 2012 18:41:07 +0000 Subject: ipv6: Compare addresses only bits up to the prefix length (RFC6724). Compare bits up to the source address's prefix length only to allows DNS load balancing to continue to be used as a tie breaker. Signed-off-by: YOSHIFUJI Hideaki Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 1237d5d037d8..5fd8ec895c8b 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1070,8 +1070,10 @@ static int ipv6_get_saddr_eval(struct net *net, break; case IPV6_SADDR_RULE_PREFIX: /* Rule 8: Use longest matching prefix */ - score->matchlen = ret = ipv6_addr_diff(&score->ifa->addr, - dst->addr); + ret = ipv6_addr_diff(&score->ifa->addr, dst->addr); + if (ret > score->ifa->prefix_len) + ret = score->ifa->prefix_len; + score->matchlen = ret; break; default: ret = 0; -- cgit v1.2.3 From fb0af4c74f200e3c4846d91d8f2f8b265450bba7 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 11 Sep 2012 21:47:51 +0000 Subject: ipv6: route templates can be const We kmemdup() templates, so they can be const. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 399613b7972f..f568ac697987 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -222,7 +222,7 @@ static const u32 ip6_template_metrics[RTAX_MAX] = { [RTAX_HOPLIMIT - 1] = 255, }; -static struct rt6_info ip6_null_entry_template = { +static const struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, @@ -242,7 +242,7 @@ static struct rt6_info ip6_null_entry_template = { static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct sk_buff *skb); -static struct rt6_info ip6_prohibit_entry_template = { +static const struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, @@ -257,7 +257,7 @@ static struct rt6_info ip6_prohibit_entry_template = { .rt6i_ref = ATOMIC_INIT(1), }; -static struct rt6_info ip6_blk_hole_entry_template = { +static const struct rt6_info ip6_blk_hole_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, -- cgit v1.2.3 From 5744dd9b71c6b9df944c6a32a000d4a564a2abd7 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 11 Sep 2012 21:59:01 +0000 Subject: ipv6: replace write lock with read lock when get route info geting route info does not write rt->rt6i_table, so replace write lock with read lock Suggested-by: Eric Dumazet Signed-off-by: Li RongQing Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 4 ++-- net/ipv6/route.c | 8 ++++---- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 5fd8ec895c8b..719a828fb67f 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1708,7 +1708,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, if (table == NULL) return NULL; - write_lock_bh(&table->tb6_lock); + read_lock_bh(&table->tb6_lock); fn = fib6_locate(&table->tb6_root, pfx, plen, NULL, 0); if (!fn) goto out; @@ -1723,7 +1723,7 @@ static struct rt6_info *addrconf_get_prefix_route(const struct in6_addr *pfx, break; } out: - write_unlock_bh(&table->tb6_lock); + read_unlock_bh(&table->tb6_lock); return rt; } diff --git a/net/ipv6/route.c b/net/ipv6/route.c index f568ac697987..83dafa528936 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1837,7 +1837,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, if (!table) return NULL; - write_lock_bh(&table->tb6_lock); + read_lock_bh(&table->tb6_lock); fn = fib6_locate(&table->tb6_root, prefix ,prefixlen, NULL, 0); if (!fn) goto out; @@ -1853,7 +1853,7 @@ static struct rt6_info *rt6_get_route_info(struct net *net, break; } out: - write_unlock_bh(&table->tb6_lock); + read_unlock_bh(&table->tb6_lock); return rt; } @@ -1896,7 +1896,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev if (!table) return NULL; - write_lock_bh(&table->tb6_lock); + read_lock_bh(&table->tb6_lock); for (rt = table->tb6_root.leaf; rt; rt=rt->dst.rt6_next) { if (dev == rt->dst.dev && ((rt->rt6i_flags & (RTF_ADDRCONF | RTF_DEFAULT)) == (RTF_ADDRCONF | RTF_DEFAULT)) && @@ -1905,7 +1905,7 @@ struct rt6_info *rt6_get_dflt_router(const struct in6_addr *addr, struct net_dev } if (rt) dst_hold(&rt->dst); - write_unlock_bh(&table->tb6_lock); + read_unlock_bh(&table->tb6_lock); return rt; } -- cgit v1.2.3 From 6f3118b571b8a4c06c7985dc3172c3526cb86253 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:46 +0000 Subject: ipv6: use net->rt_genid to check dst validity IPv6 dst should take care of rt_genid too. When a xfrm policy is inserted or deleted, all dst should be invalidated. To force the validation, dst entries should be created with ->obsolete set to DST_OBSOLETE_FORCE_CHK. This was already the case for all functions calling ip6_dst_alloc(), except for ip6_rt_copy(). As a consequence, we can remove the specific code in inet6_connection_sock. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/net/ip6_fib.h | 5 ++--- net/ipv6/inet6_connection_sock.c | 23 +---------------------- net/ipv6/route.c | 13 +++++++++---- 3 files changed, 12 insertions(+), 29 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h index 0fedbd8d747a..9fc7114159e8 100644 --- a/include/net/ip6_fib.h +++ b/include/net/ip6_fib.h @@ -111,9 +111,8 @@ struct rt6_info { struct inet6_dev *rt6i_idev; unsigned long _rt6i_peer; -#ifdef CONFIG_XFRM - u32 rt6i_flow_cache_genid; -#endif + u32 rt6i_genid; + /* more non-fragment space at head required */ unsigned short rt6i_nfheader_len; diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index 0251a6005be8..c4f934176cab 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -175,33 +175,12 @@ void __inet6_csk_dst_store(struct sock *sk, struct dst_entry *dst, const struct in6_addr *saddr) { __ip6_dst_store(sk, dst, daddr, saddr); - -#ifdef CONFIG_XFRM - { - struct rt6_info *rt = (struct rt6_info *)dst; - rt->rt6i_flow_cache_genid = atomic_read(&flow_cache_genid); - } -#endif } static inline struct dst_entry *__inet6_csk_dst_check(struct sock *sk, u32 cookie) { - struct dst_entry *dst; - - dst = __sk_dst_check(sk, cookie); - -#ifdef CONFIG_XFRM - if (dst) { - struct rt6_info *rt = (struct rt6_info *)dst; - if (rt->rt6i_flow_cache_genid != atomic_read(&flow_cache_genid)) { - __sk_dst_reset(sk); - dst = NULL; - } - } -#endif - - return dst; + return __sk_dst_check(sk, cookie); } static struct dst_entry *inet6_csk_route_socket(struct sock *sk, diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 8e80fd279100..fb29e2215a19 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -281,13 +281,14 @@ static inline struct rt6_info *ip6_dst_alloc(struct net *net, struct fib6_table *table) { struct rt6_info *rt = dst_alloc(&net->ipv6.ip6_dst_ops, dev, - 0, DST_OBSOLETE_NONE, flags); + 0, DST_OBSOLETE_FORCE_CHK, flags); if (rt) { struct dst_entry *dst = &rt->dst; memset(dst + 1, 0, sizeof(*rt) - sizeof(*dst)); rt6_init_peer(rt, table ? &table->tb6_peers : net->ipv6.peers); + rt->rt6i_genid = rt_genid(net); } return rt; } @@ -1031,6 +1032,13 @@ static struct dst_entry *ip6_dst_check(struct dst_entry *dst, u32 cookie) rt = (struct rt6_info *) dst; + /* All IPV6 dsts are created with ->obsolete set to the value + * DST_OBSOLETE_FORCE_CHK which forces validation calls down + * into this function always. + */ + if (rt->rt6i_genid != rt_genid(dev_net(rt->dst.dev))) + return NULL; + if (rt->rt6i_node && (rt->rt6i_node->fn_sernum == cookie)) { if (rt->rt6i_peer_genid != rt6_peer_genid()) { if (!rt6_has_peer(rt)) @@ -1397,8 +1405,6 @@ int ip6_route_add(struct fib6_config *cfg) goto out; } - rt->dst.obsolete = -1; - if (cfg->fc_flags & RTF_EXPIRES) rt6_set_expires(rt, jiffies + clock_t_to_jiffies(cfg->fc_expires)); @@ -2080,7 +2086,6 @@ struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, rt->dst.input = ip6_input; rt->dst.output = ip6_output; rt->rt6i_idev = idev; - rt->dst.obsolete = -1; rt->rt6i_flags = RTF_UP | RTF_NONEXTHOP; if (anycast) -- cgit v1.2.3 From 2c20cbd7e3aa6e9dddc07975d3f3a89fe1f69c00 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 10 Sep 2012 22:09:47 +0000 Subject: ipv6: use DST_* macro to set obselete field Signed-off-by: Nicolas Dichtel Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index fb29e2215a19..854e4018d205 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -226,7 +226,7 @@ static struct rt6_info ip6_null_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -ENETUNREACH, .input = ip6_pkt_discard, .output = ip6_pkt_discard_out, @@ -246,7 +246,7 @@ static struct rt6_info ip6_prohibit_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EACCES, .input = ip6_pkt_prohibit, .output = ip6_pkt_prohibit_out, @@ -261,7 +261,7 @@ static struct rt6_info ip6_blk_hole_entry_template = { .dst = { .__refcnt = ATOMIC_INIT(1), .__use = 1, - .obsolete = -1, + .obsolete = DST_OBSOLETE_FORCE_CHK, .error = -EINVAL, .input = dst_discard, .output = dst_discard, -- cgit v1.2.3 From 3fd91fb35847a8b3287f00970efc069de16df8b4 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Thu, 13 Sep 2012 19:54:57 +0000 Subject: ipv6: recursive check rt->dst.from when call rt6_check_expired If dst cache dst_a copies from dst_b, and dst_b copies from dst_c, check if dst_a is expired or not, we should not end with dst_a->dst.from, dst_b, we should check dst_c. CC: Gao feng Signed-off-by: Li RongQing Signed-off-by: David S. Miller --- net/ipv6/route.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 83dafa528936..0607ee3a0eac 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -369,15 +369,11 @@ static void ip6_dst_ifdown(struct dst_entry *dst, struct net_device *dev, static bool rt6_check_expired(const struct rt6_info *rt) { - struct rt6_info *ort = NULL; - if (rt->rt6i_flags & RTF_EXPIRES) { if (time_after(jiffies, rt->dst.expires)) return true; } else if (rt->dst.from) { - ort = (struct rt6_info *) rt->dst.from; - return (ort->rt6i_flags & RTF_EXPIRES) && - time_after(jiffies, ort->dst.expires); + return rt6_check_expired((struct rt6_info *) rt->dst.from); } return false; } -- cgit v1.2.3 From c038a767cd697238b09f7a4ea5a504b4891774e9 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 18 Sep 2012 16:50:08 +0000 Subject: ipv6: add a new namespace for nf_conntrack_reasm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit As pointed by Michal, it is necessary to add a new namespace for nf_conntrack_reasm code, this prepares for the second patch. Cc: Herbert Xu Cc: Michal Kubeček Cc: David Miller Cc: Patrick McHardy Cc: Pablo Neira Ayuso Cc: netfilter-devel@vger.kernel.org Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/net_namespace.h | 3 + include/net/netns/ipv6.h | 8 ++ net/ipv6/netfilter/nf_conntrack_reasm.c | 137 ++++++++++++++++++++++---------- 3 files changed, 106 insertions(+), 42 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h index 5ae57f1ab755..d61e2b36d9e3 100644 --- a/include/net/net_namespace.h +++ b/include/net/net_namespace.h @@ -92,6 +92,9 @@ struct net { struct netns_xt xt; #if defined(CONFIG_NF_CONNTRACK) || defined(CONFIG_NF_CONNTRACK_MODULE) struct netns_ct ct; +#endif +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) + struct netns_nf_frag nf_frag; #endif struct sock *nfnl; struct sock *nfnl_stash; diff --git a/include/net/netns/ipv6.h b/include/net/netns/ipv6.h index 0318104a9458..214cb0a53359 100644 --- a/include/net/netns/ipv6.h +++ b/include/net/netns/ipv6.h @@ -71,4 +71,12 @@ struct netns_ipv6 { #endif #endif }; + +#if IS_ENABLED(CONFIG_NF_DEFRAG_IPV6) +struct netns_nf_frag { + struct netns_sysctl_ipv6 sysctl; + struct netns_frags frags; +}; +#endif + #endif diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index f94fb3ac2a79..f40f327ccc0c 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -71,27 +71,26 @@ struct nf_ct_frag6_queue }; static struct inet_frags nf_frags; -static struct netns_frags nf_init_frags; #ifdef CONFIG_SYSCTL static struct ctl_table nf_ct_frag6_sysctl_table[] = { { .procname = "nf_conntrack_frag6_timeout", - .data = &nf_init_frags.timeout, + .data = &init_net.nf_frag.frags.timeout, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, { .procname = "nf_conntrack_frag6_low_thresh", - .data = &nf_init_frags.low_thresh, + .data = &init_net.nf_frag.frags.low_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, }, { .procname = "nf_conntrack_frag6_high_thresh", - .data = &nf_init_frags.high_thresh, + .data = &init_net.nf_frag.frags.high_thresh, .maxlen = sizeof(unsigned int), .mode = 0644, .proc_handler = proc_dointvec, @@ -99,7 +98,55 @@ static struct ctl_table nf_ct_frag6_sysctl_table[] = { { } }; -static struct ctl_table_header *nf_ct_frag6_sysctl_header; +static int __net_init nf_ct_frag6_sysctl_register(struct net *net) +{ + struct ctl_table *table; + struct ctl_table_header *hdr; + + table = nf_ct_frag6_sysctl_table; + if (!net_eq(net, &init_net)) { + table = kmemdup(table, sizeof(nf_ct_frag6_sysctl_table), + GFP_KERNEL); + if (table == NULL) + goto err_alloc; + + table[0].data = &net->ipv6.frags.high_thresh; + table[1].data = &net->ipv6.frags.low_thresh; + table[2].data = &net->ipv6.frags.timeout; + } + + hdr = register_net_sysctl(net, "net/netfilter", table); + if (hdr == NULL) + goto err_reg; + + net->ipv6.sysctl.frags_hdr = hdr; + return 0; + +err_reg: + if (!net_eq(net, &init_net)) + kfree(table); +err_alloc: + return -ENOMEM; +} + +static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) +{ + struct ctl_table *table; + + table = net->nf_frag.sysctl.frags_hdr->ctl_table_arg; + unregister_net_sysctl_table(net->nf_frag.sysctl.frags_hdr); + if (!net_eq(net, &init_net)) + kfree(table); +} + +#else +static int __net_init nf_ct_frag6_sysctl_register(struct net *net) +{ + return 0; +} +static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) +{ +} #endif static unsigned int nf_hashfn(struct inet_frag_queue *q) @@ -131,13 +178,6 @@ static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) inet_frag_kill(&fq->q, &nf_frags); } -static void nf_ct_frag6_evictor(void) -{ - local_bh_disable(); - inet_frag_evictor(&nf_init_frags, &nf_frags); - local_bh_enable(); -} - static void nf_ct_frag6_expire(unsigned long data) { struct nf_ct_frag6_queue *fq; @@ -158,9 +198,9 @@ out: } /* Creation primitives. */ - -static __inline__ struct nf_ct_frag6_queue * -fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) +static inline struct nf_ct_frag6_queue *fq_find(struct net *net, __be32 id, + u32 user, struct in6_addr *src, + struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -174,7 +214,7 @@ fq_find(__be32 id, u32 user, struct in6_addr *src, struct in6_addr *dst) read_lock_bh(&nf_frags.lock); hash = inet6_hash_frag(id, src, dst, nf_frags.rnd); - q = inet_frag_find(&nf_init_frags, &nf_frags, &arg, hash); + q = inet_frag_find(&net->nf_frag.frags, &nf_frags, &arg, hash); local_bh_enable(); if (q == NULL) goto oom; @@ -312,7 +352,7 @@ found: fq->q.meat += skb->len; if (payload_len > fq->q.max_size) fq->q.max_size = payload_len; - atomic_add(skb->truesize, &nf_init_frags.mem); + atomic_add(skb->truesize, &fq->q.net->mem); /* The first fragment. * nhoffset is obtained from the first fragment, of course. @@ -322,7 +362,7 @@ found: fq->q.last_in |= INET_FRAG_FIRST_IN; } write_lock(&nf_frags.lock); - list_move_tail(&fq->q.lru_list, &nf_init_frags.lru_list); + list_move_tail(&fq->q.lru_list, &fq->q.net->lru_list); write_unlock(&nf_frags.lock); return 0; @@ -391,7 +431,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) clone->ip_summed = head->ip_summed; NFCT_FRAG6_CB(clone)->orig = NULL; - atomic_add(clone->truesize, &nf_init_frags.mem); + atomic_add(clone->truesize, &fq->q.net->mem); } /* We have to remove fragment header from datagram and to relocate @@ -415,7 +455,7 @@ nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) head->csum = csum_add(head->csum, fp->csum); head->truesize += fp->truesize; } - atomic_sub(head->truesize, &nf_init_frags.mem); + atomic_sub(head->truesize, &fq->q.net->mem); head->local_df = 1; head->next = NULL; @@ -527,6 +567,8 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) { struct sk_buff *clone; struct net_device *dev = skb->dev; + struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev) + : dev_net(skb->dev); struct frag_hdr *fhdr; struct nf_ct_frag6_queue *fq; struct ipv6hdr *hdr; @@ -560,10 +602,13 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - if (atomic_read(&nf_init_frags.mem) > nf_init_frags.high_thresh) - nf_ct_frag6_evictor(); + if (atomic_read(&net->nf_frag.frags.mem) > net->nf_frag.frags.high_thresh) { + local_bh_disable(); + inet_frag_evictor(&net->nf_frag.frags, &nf_frags); + local_bh_enable(); + } - fq = fq_find(fhdr->identification, user, &hdr->saddr, &hdr->daddr); + fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); if (fq == NULL) { pr_debug("Can't find and can't create new queue\n"); goto ret_orig; @@ -621,8 +666,31 @@ void nf_ct_frag6_output(unsigned int hooknum, struct sk_buff *skb, nf_conntrack_put_reasm(skb); } +static int nf_ct_net_init(struct net *net) +{ + net->nf_frag.frags.high_thresh = IPV6_FRAG_HIGH_THRESH; + net->nf_frag.frags.low_thresh = IPV6_FRAG_LOW_THRESH; + net->nf_frag.frags.timeout = IPV6_FRAG_TIMEOUT; + inet_frags_init_net(&net->nf_frag.frags); + + return nf_ct_frag6_sysctl_register(net); +} + +static void nf_ct_net_exit(struct net *net) +{ + nf_ct_frags6_sysctl_unregister(net); + inet_frags_exit_net(&net->nf_frag.frags, &nf_frags); +} + +static struct pernet_operations nf_ct_net_ops = { + .init = nf_ct_net_init, + .exit = nf_ct_net_exit, +}; + int nf_ct_frag6_init(void) { + int ret = 0; + nf_frags.hashfn = nf_hashfn; nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; @@ -631,32 +699,17 @@ int nf_ct_frag6_init(void) nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.secret_interval = 10 * 60 * HZ; - nf_init_frags.timeout = IPV6_FRAG_TIMEOUT; - nf_init_frags.high_thresh = IPV6_FRAG_HIGH_THRESH; - nf_init_frags.low_thresh = IPV6_FRAG_LOW_THRESH; - inet_frags_init_net(&nf_init_frags); inet_frags_init(&nf_frags); -#ifdef CONFIG_SYSCTL - nf_ct_frag6_sysctl_header = register_net_sysctl(&init_net, "net/netfilter", - nf_ct_frag6_sysctl_table); - if (!nf_ct_frag6_sysctl_header) { + ret = register_pernet_subsys(&nf_ct_net_ops); + if (ret) inet_frags_fini(&nf_frags); - return -ENOMEM; - } -#endif - return 0; + return ret; } void nf_ct_frag6_cleanup(void) { -#ifdef CONFIG_SYSCTL - unregister_net_sysctl_table(nf_ct_frag6_sysctl_header); - nf_ct_frag6_sysctl_header = NULL; -#endif + unregister_pernet_subsys(&nf_ct_net_ops); inet_frags_fini(&nf_frags); - - nf_init_frags.low_thresh = 0; - nf_ct_frag6_evictor(); } -- cgit v1.2.3 From b836c99fd6c9dfe52a69fa0ba36ec918f80ce02a Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 18 Sep 2012 16:50:09 +0000 Subject: ipv6: unify conntrack reassembly expire code with standard one MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two years ago, Shan Wei tried to fix this: http://patchwork.ozlabs.org/patch/43905/ The problem is that RFC2460 requires an ICMP Time Exceeded -- Fragment Reassembly Time Exceeded message should be sent to the source of that fragment, if the defragmentation times out. " If insufficient fragments are received to complete reassembly of a packet within 60 seconds of the reception of the first-arriving fragment of that packet, reassembly of that packet must be abandoned and all the fragments that have been received for that packet must be discarded. If the first fragment (i.e., the one with a Fragment Offset of zero) has been received, an ICMP Time Exceeded -- Fragment Reassembly Time Exceeded message should be sent to the source of that fragment. " As Herbert suggested, we could actually use the standard IPv6 reassembly code which follows RFC2460. With this patch applied, I can see ICMP Time Exceeded sent from the receiver when the sender sent out 3/4 fragmented IPv6 UDP packet. Cc: Herbert Xu Cc: Michal Kubeček Cc: David Miller Cc: Hideaki YOSHIFUJI Cc: Patrick McHardy Cc: Pablo Neira Ayuso Cc: netfilter-devel@vger.kernel.org Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/ipv6.h | 19 +++++++++ net/ipv6/netfilter/nf_conntrack_reasm.c | 74 +++++++++------------------------ net/ipv6/reassembly.c | 63 +++++++++------------------- 3 files changed, 57 insertions(+), 99 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 9bed5d483405..81d4455f6e14 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -411,6 +411,25 @@ struct ip6_create_arg { void ip6_frag_init(struct inet_frag_queue *q, void *a); bool ip6_frag_match(struct inet_frag_queue *q, void *a); +/* + * Equivalent of ipv4 struct ip + */ +struct frag_queue { + struct inet_frag_queue q; + + __be32 id; /* fragment id */ + u32 user; + struct in6_addr saddr; + struct in6_addr daddr; + + int iif; + unsigned int csum; + __u16 nhoffset; +}; + +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, + struct inet_frags *frags); + static inline bool ipv6_addr_any(const struct in6_addr *a) { #if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index f40f327ccc0c..54274c33a0b1 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -57,19 +57,6 @@ struct nf_ct_frag6_skb_cb #define NFCT_FRAG6_CB(skb) ((struct nf_ct_frag6_skb_cb*)((skb)->cb)) -struct nf_ct_frag6_queue -{ - struct inet_frag_queue q; - - __be32 id; /* fragment id */ - u32 user; - struct in6_addr saddr; - struct in6_addr daddr; - - unsigned int csum; - __u16 nhoffset; -}; - static struct inet_frags nf_frags; #ifdef CONFIG_SYSCTL @@ -151,9 +138,9 @@ static void __net_exit nf_ct_frags6_sysctl_unregister(struct net *net) static unsigned int nf_hashfn(struct inet_frag_queue *q) { - const struct nf_ct_frag6_queue *nq; + const struct frag_queue *nq; - nq = container_of(q, struct nf_ct_frag6_queue, q); + nq = container_of(q, struct frag_queue, q); return inet6_hash_frag(nq->id, &nq->saddr, &nq->daddr, nf_frags.rnd); } @@ -163,44 +150,21 @@ static void nf_skb_free(struct sk_buff *skb) kfree_skb(NFCT_FRAG6_CB(skb)->orig); } -/* Destruction primitives. */ - -static __inline__ void fq_put(struct nf_ct_frag6_queue *fq) -{ - inet_frag_put(&fq->q, &nf_frags); -} - -/* Kill fq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static __inline__ void fq_kill(struct nf_ct_frag6_queue *fq) -{ - inet_frag_kill(&fq->q, &nf_frags); -} - static void nf_ct_frag6_expire(unsigned long data) { - struct nf_ct_frag6_queue *fq; - - fq = container_of((struct inet_frag_queue *)data, - struct nf_ct_frag6_queue, q); + struct frag_queue *fq; + struct net *net; - spin_lock(&fq->q.lock); + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + net = container_of(fq->q.net, struct net, nf_frag.frags); - if (fq->q.last_in & INET_FRAG_COMPLETE) - goto out; - - fq_kill(fq); - -out: - spin_unlock(&fq->q.lock); - fq_put(fq); + ip6_expire_frag_queue(net, fq, &nf_frags); } /* Creation primitives. */ -static inline struct nf_ct_frag6_queue *fq_find(struct net *net, __be32 id, - u32 user, struct in6_addr *src, - struct in6_addr *dst) +static inline struct frag_queue *fq_find(struct net *net, __be32 id, + u32 user, struct in6_addr *src, + struct in6_addr *dst) { struct inet_frag_queue *q; struct ip6_create_arg arg; @@ -219,14 +183,14 @@ static inline struct nf_ct_frag6_queue *fq_find(struct net *net, __be32 id, if (q == NULL) goto oom; - return container_of(q, struct nf_ct_frag6_queue, q); + return container_of(q, struct frag_queue, q); oom: return NULL; } -static int nf_ct_frag6_queue(struct nf_ct_frag6_queue *fq, struct sk_buff *skb, +static int nf_ct_frag6_queue(struct frag_queue *fq, struct sk_buff *skb, const struct frag_hdr *fhdr, int nhoff) { struct sk_buff *prev, *next; @@ -367,7 +331,7 @@ found: return 0; discard_fq: - fq_kill(fq); + inet_frag_kill(&fq->q, &nf_frags); err: return -1; } @@ -382,12 +346,12 @@ err: * the last and the first frames arrived and all the bits are here. */ static struct sk_buff * -nf_ct_frag6_reasm(struct nf_ct_frag6_queue *fq, struct net_device *dev) +nf_ct_frag6_reasm(struct frag_queue *fq, struct net_device *dev) { struct sk_buff *fp, *op, *head = fq->q.fragments; int payload_len; - fq_kill(fq); + inet_frag_kill(&fq->q, &nf_frags); WARN_ON(head == NULL); WARN_ON(NFCT_FRAG6_CB(head)->offset != 0); @@ -570,7 +534,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) struct net *net = skb_dst(skb) ? dev_net(skb_dst(skb)->dev) : dev_net(skb->dev); struct frag_hdr *fhdr; - struct nf_ct_frag6_queue *fq; + struct frag_queue *fq; struct ipv6hdr *hdr; int fhoff, nhoff; u8 prevhdr; @@ -619,7 +583,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) if (nf_ct_frag6_queue(fq, clone, fhdr, nhoff) < 0) { spin_unlock_bh(&fq->q.lock); pr_debug("Can't insert skb to queue\n"); - fq_put(fq); + inet_frag_put(&fq->q, &nf_frags); goto ret_orig; } @@ -631,7 +595,7 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) } spin_unlock_bh(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, &nf_frags); return ret_skb; ret_orig: @@ -695,7 +659,7 @@ int nf_ct_frag6_init(void) nf_frags.constructor = ip6_frag_init; nf_frags.destructor = NULL; nf_frags.skb_free = nf_skb_free; - nf_frags.qsize = sizeof(struct nf_ct_frag6_queue); + nf_frags.qsize = sizeof(struct frag_queue); nf_frags.match = ip6_frag_match; nf_frags.frag_expire = nf_ct_frag6_expire; nf_frags.secret_interval = 10 * 60 * HZ; diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 4ff9af628e72..0ee553354ed5 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -65,24 +65,6 @@ struct ip6frag_skb_cb #define FRAG6_CB(skb) ((struct ip6frag_skb_cb*)((skb)->cb)) -/* - * Equivalent of ipv4 struct ipq - */ - -struct frag_queue -{ - struct inet_frag_queue q; - - __be32 id; /* fragment id */ - u32 user; - struct in6_addr saddr; - struct in6_addr daddr; - - int iif; - unsigned int csum; - __u16 nhoffset; -}; - static struct inet_frags ip6_frags; int ip6_frag_nqueues(struct net *net) @@ -159,21 +141,6 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_init); -/* Destruction primitives. */ - -static __inline__ void fq_put(struct frag_queue *fq) -{ - inet_frag_put(&fq->q, &ip6_frags); -} - -/* Kill fq entry. It is not destroyed immediately, - * because caller (and someone more) holds reference count. - */ -static __inline__ void fq_kill(struct frag_queue *fq) -{ - inet_frag_kill(&fq->q, &ip6_frags); -} - static void ip6_evictor(struct net *net, struct inet6_dev *idev) { int evicted; @@ -183,22 +150,18 @@ static void ip6_evictor(struct net *net, struct inet6_dev *idev) IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted); } -static void ip6_frag_expire(unsigned long data) +void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, + struct inet_frags *frags) { - struct frag_queue *fq; struct net_device *dev = NULL; - struct net *net; - - fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); spin_lock(&fq->q.lock); if (fq->q.last_in & INET_FRAG_COMPLETE) goto out; - fq_kill(fq); + inet_frag_kill(&fq->q, frags); - net = container_of(fq->q.net, struct net, ipv6.frags); rcu_read_lock(); dev = dev_get_by_index_rcu(net, fq->iif); if (!dev) @@ -222,7 +185,19 @@ out_rcu_unlock: rcu_read_unlock(); out: spin_unlock(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, frags); +} +EXPORT_SYMBOL(ip6_expire_frag_queue); + +static void ip6_frag_expire(unsigned long data) +{ + struct frag_queue *fq; + struct net *net; + + fq = container_of((struct inet_frag_queue *)data, struct frag_queue, q); + net = container_of(fq->q.net, struct net, ipv6.frags); + + ip6_expire_frag_queue(net, fq, &ip6_frags); } static __inline__ struct frag_queue * @@ -391,7 +366,7 @@ found: return -1; discard_fq: - fq_kill(fq); + inet_frag_kill(&fq->q, &ip6_frags); err: IP6_INC_STATS(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMFAILS); @@ -417,7 +392,7 @@ static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, unsigned int nhoff; int sum_truesize; - fq_kill(fq); + inet_frag_kill(&fq->q, &ip6_frags); /* Make the one we just received the head. */ if (prev) { @@ -586,7 +561,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) ret = ip6_frag_queue(fq, skb, fhdr, IP6CB(skb)->nhoff); spin_unlock(&fq->q.lock); - fq_put(fq); + inet_frag_put(&fq->q, &ip6_frags); return ret; } -- cgit v1.2.3 From d4915c087f7c2457c580efc16fe0bfa1a576274d Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 18 Sep 2012 16:50:10 +0000 Subject: ipv6: make ip6_frag_nqueues() and ip6_frag_mem() static inline MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Herbert Xu Cc: Michal Kubeček Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/ipv6.h | 13 +++++++++++-- net/ipv6/reassembly.c | 10 ---------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index 81d4455f6e14..979bf6c13141 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -271,8 +271,17 @@ struct ipv6_txoptions *ipv6_fixup_options(struct ipv6_txoptions *opt_space, extern bool ipv6_opt_accepted(const struct sock *sk, const struct sk_buff *skb); -int ip6_frag_nqueues(struct net *net); -int ip6_frag_mem(struct net *net); +#if IS_ENABLED(CONFIG_IPV6) +static inline int ip6_frag_nqueues(struct net *net) +{ + return net->ipv6.frags.nqueues; +} + +static inline int ip6_frag_mem(struct net *net) +{ + return atomic_read(&net->ipv6.frags.mem); +} +#endif #define IPV6_FRAG_HIGH_THRESH (256 * 1024) /* 262144 */ #define IPV6_FRAG_LOW_THRESH (192 * 1024) /* 196608 */ diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index 0ee553354ed5..cf74f4e79356 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -67,16 +67,6 @@ struct ip6frag_skb_cb static struct inet_frags ip6_frags; -int ip6_frag_nqueues(struct net *net) -{ - return net->ipv6.frags.nqueues; -} - -int ip6_frag_mem(struct net *net) -{ - return atomic_read(&net->ipv6.frags.mem); -} - static int ip6_frag_reasm(struct frag_queue *fq, struct sk_buff *prev, struct net_device *dev); -- cgit v1.2.3 From 6b102865e7ba9ff1e3c49c32c7187bb427d91798 Mon Sep 17 00:00:00 2001 From: Amerigo Wang Date: Tue, 18 Sep 2012 16:50:11 +0000 Subject: ipv6: unify fragment thresh handling code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Cc: Herbert Xu Cc: Michal Kubeček Cc: David Miller Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/inet_frag.h | 2 +- net/ipv4/inet_fragment.c | 9 +++++++-- net/ipv4/ip_fragment.c | 5 ++--- net/ipv6/netfilter/nf_conntrack_reasm.c | 8 +++----- net/ipv6/reassembly.c | 16 +++++----------- 5 files changed, 18 insertions(+), 22 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_frag.h b/include/net/inet_frag.h index 5098ee7b7e0e..32786a044718 100644 --- a/include/net/inet_frag.h +++ b/include/net/inet_frag.h @@ -61,7 +61,7 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f); void inet_frag_kill(struct inet_frag_queue *q, struct inet_frags *f); void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, int *work); -int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f); +int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force); struct inet_frag_queue *inet_frag_find(struct netns_frags *nf, struct inet_frags *f, void *key, unsigned int hash) __releases(&f->lock); diff --git a/net/ipv4/inet_fragment.c b/net/ipv4/inet_fragment.c index 85190e69297b..4750d2b74d79 100644 --- a/net/ipv4/inet_fragment.c +++ b/net/ipv4/inet_fragment.c @@ -89,7 +89,7 @@ void inet_frags_exit_net(struct netns_frags *nf, struct inet_frags *f) nf->low_thresh = 0; local_bh_disable(); - inet_frag_evictor(nf, f); + inet_frag_evictor(nf, f, true); local_bh_enable(); } EXPORT_SYMBOL(inet_frags_exit_net); @@ -158,11 +158,16 @@ void inet_frag_destroy(struct inet_frag_queue *q, struct inet_frags *f, } EXPORT_SYMBOL(inet_frag_destroy); -int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f) +int inet_frag_evictor(struct netns_frags *nf, struct inet_frags *f, bool force) { struct inet_frag_queue *q; int work, evicted = 0; + if (!force) { + if (atomic_read(&nf->mem) <= nf->high_thresh) + return 0; + } + work = atomic_read(&nf->mem) - nf->low_thresh; while (work > 0) { read_lock(&f->lock); diff --git a/net/ipv4/ip_fragment.c b/net/ipv4/ip_fragment.c index fa6a12c51066..448e68546827 100644 --- a/net/ipv4/ip_fragment.c +++ b/net/ipv4/ip_fragment.c @@ -219,7 +219,7 @@ static void ip_evictor(struct net *net) { int evicted; - evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags); + evicted = inet_frag_evictor(&net->ipv4.frags, &ip4_frags, false); if (evicted) IP_ADD_STATS_BH(net, IPSTATS_MIB_REASMFAILS, evicted); } @@ -684,8 +684,7 @@ int ip_defrag(struct sk_buff *skb, u32 user) IP_INC_STATS_BH(net, IPSTATS_MIB_REASMREQDS); /* Start by cleaning up the memory. */ - if (atomic_read(&net->ipv4.frags.mem) > net->ipv4.frags.high_thresh) - ip_evictor(net); + ip_evictor(net); /* Lookup (or create) queue header */ if ((qp = ip_find(net, ip_hdr(skb), user)) != NULL) { diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 54274c33a0b1..1af12fde08df 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -566,11 +566,9 @@ struct sk_buff *nf_ct_frag6_gather(struct sk_buff *skb, u32 user) hdr = ipv6_hdr(clone); fhdr = (struct frag_hdr *)skb_transport_header(clone); - if (atomic_read(&net->nf_frag.frags.mem) > net->nf_frag.frags.high_thresh) { - local_bh_disable(); - inet_frag_evictor(&net->nf_frag.frags, &nf_frags); - local_bh_enable(); - } + local_bh_disable(); + inet_frag_evictor(&net->nf_frag.frags, &nf_frags, false); + local_bh_enable(); fq = fq_find(net, fhdr->identification, user, &hdr->saddr, &hdr->daddr); if (fq == NULL) { diff --git a/net/ipv6/reassembly.c b/net/ipv6/reassembly.c index cf74f4e79356..da8a4e301b1b 100644 --- a/net/ipv6/reassembly.c +++ b/net/ipv6/reassembly.c @@ -131,15 +131,6 @@ void ip6_frag_init(struct inet_frag_queue *q, void *a) } EXPORT_SYMBOL(ip6_frag_init); -static void ip6_evictor(struct net *net, struct inet6_dev *idev) -{ - int evicted; - - evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags); - if (evicted) - IP6_ADD_STATS_BH(net, idev, IPSTATS_MIB_REASMFAILS, evicted); -} - void ip6_expire_frag_queue(struct net *net, struct frag_queue *fq, struct inet_frags *frags) { @@ -515,6 +506,7 @@ static int ipv6_frag_rcv(struct sk_buff *skb) struct frag_queue *fq; const struct ipv6hdr *hdr = ipv6_hdr(skb); struct net *net = dev_net(skb_dst(skb)->dev); + int evicted; IP6_INC_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), IPSTATS_MIB_REASMREQDS); @@ -539,8 +531,10 @@ static int ipv6_frag_rcv(struct sk_buff *skb) return 1; } - if (atomic_read(&net->ipv6.frags.mem) > net->ipv6.frags.high_thresh) - ip6_evictor(net, ip6_dst_idev(skb_dst(skb))); + evicted = inet_frag_evictor(&net->ipv6.frags, &ip6_frags, false); + if (evicted) + IP6_ADD_STATS_BH(net, ip6_dst_idev(skb_dst(skb)), + IPSTATS_MIB_REASMFAILS, evicted); fq = fq_find(net, fhdr->identification, &hdr->saddr, &hdr->daddr); if (fq != NULL) { -- cgit v1.2.3 From b0041d1b8e73d419f4165c189af7c28aff3a02ec Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 18 Sep 2012 21:03:39 +0200 Subject: netfilter: fix IPv6 NAT dependencies in Kconfig * NF_NAT_IPV6 requires IP6_NF_IPTABLES * IP6_NF_TARGET_MASQUERADE, IP6_NF_TARGET_NETMAP, IP6_NF_TARGET_REDIRECT and IP6_NF_TARGET_NPT require NF_NAT_IPV6. This change just mirrors what IPv4 does in Kconfig, for consistency. Reported-by: Randy Dunlap Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/Kconfig | 110 ++++++++++++++++++++++----------------------- 1 file changed, 55 insertions(+), 55 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 3b73254d7bf1..d8f276b9fd8a 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -25,18 +25,6 @@ config NF_CONNTRACK_IPV6 To compile it as a module, choose M here. If unsure, say N. -config NF_NAT_IPV6 - tristate "IPv6 NAT" - depends on NF_CONNTRACK_IPV6 - depends on NETFILTER_ADVANCED - select NF_NAT - help - The IPv6 NAT option allows masquerading, port forwarding and other - forms of full Network Address Port Translation. It is controlled by - the `nat' table in ip6tables, see the man page for ip6tables(8). - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_IPTABLES tristate "IP6 tables support (required for filtering)" depends on INET && IPV6 @@ -144,48 +132,6 @@ config IP6_NF_TARGET_HL (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_TARGET_HL. -config IP6_NF_TARGET_MASQUERADE - tristate "MASQUERADE target support" - depends on NF_NAT_IPV6 - help - Masquerading is a special case of NAT: all outgoing connections are - changed to seem to come from a particular interface's address, and - if the interface goes down, those connections are lost. This is - only useful for dialup accounts with dynamic IP address (ie. your IP - address will be different on next dialup). - - To compile it as a module, choose M here. If unsure, say N. - -config IP6_NF_TARGET_NETMAP - tristate "NETMAP target support" - depends on NF_NAT_IPV6 - help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. - - To compile it as a module, choose M here. If unsure, say N. - -config IP6_NF_TARGET_REDIRECT - tristate "REDIRECT target support" - depends on NF_NAT_IPV6 - help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. - - To compile it as a module, choose M here. If unsure, say N. - -config IP6_NF_TARGET_NPT - tristate "NPT (Network Prefix translation) target support" - depends on NETFILTER_ADVANCED - help - This option adds the `SNPT' and `DNPT' target, which perform - stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_FILTER tristate "Packet filtering" default m if NETFILTER_ADVANCED=n @@ -235,9 +181,63 @@ config IP6_NF_SECURITY help This option adds a `security' table to iptables, for use with Mandatory Access Control (MAC) policy. - + If unsure, say N. +config NF_NAT_IPV6 + tristate "IPv6 NAT" + depends on NF_CONNTRACK_IPV6 + depends on NETFILTER_ADVANCED + select NF_NAT + help + The IPv6 NAT option allows masquerading, port forwarding and other + forms of full Network Address Port Translation. It is controlled by + the `nat' table in ip6tables, see the man page for ip6tables(8). + + To compile it as a module, choose M here. If unsure, say N. + +if NF_NAT_IPV6 + +config IP6_NF_TARGET_MASQUERADE + tristate "MASQUERADE target support" + help + Masquerading is a special case of NAT: all outgoing connections are + changed to seem to come from a particular interface's address, and + if the interface goes down, those connections are lost. This is + only useful for dialup accounts with dynamic IP address (ie. your IP + address will be different on next dialup). + + To compile it as a module, choose M here. If unsure, say N. + +config IP6_NF_TARGET_NETMAP + tristate "NETMAP target support" + help + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. + + To compile it as a module, choose M here. If unsure, say N. + +config IP6_NF_TARGET_REDIRECT + tristate "REDIRECT target support" + help + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. + + To compile it as a module, choose M here. If unsure, say N. + +config IP6_NF_TARGET_NPT + tristate "NPT (Network Prefix translation) target support" + help + This option adds the `SNPT' and `DNPT' target, which perform + stateless IPv6-to-IPv6 Network Prefix Translation per RFC 6296. + + To compile it as a module, choose M here. If unsure, say N. + +endif # NF_NAT_IPV6 + endif # IP6_NF_IPTABLES endmenu -- cgit v1.2.3 From b3d54b3e406b5d6ac391590bf7524e887e8e13c3 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 21 Sep 2012 11:37:59 +0200 Subject: netfilter: combine ipt_NETMAP and ip6t_NETMAP Combine more modules since the actual code is so small anyway that the kmod metadata and the module in its loaded state totally outweighs the combined actual code size. IP_NF_TARGET_NETMAP becomes a compat option; IP6_NF_TARGET_NETMAP is completely eliminated since it has not see a release yet. Signed-off-by: Jan Engelhardt Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 11 ++- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_NETMAP.c | 101 ------------------------ net/ipv6/netfilter/Kconfig | 9 --- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_NETMAP.c | 94 ---------------------- net/netfilter/Kconfig | 10 +++ net/netfilter/Makefile | 1 + net/netfilter/xt_NETMAP.c | 165 +++++++++++++++++++++++++++++++++++++++ 9 files changed, 181 insertions(+), 212 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_NETMAP.c delete mode 100644 net/ipv6/netfilter/ip6t_NETMAP.c create mode 100644 net/netfilter/xt_NETMAP.c (limited to 'net/ipv6') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 131e53702e77..6f140084004f 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -172,12 +172,11 @@ config IP_NF_TARGET_MASQUERADE config IP_NF_TARGET_NETMAP tristate "NETMAP target support" depends on NETFILTER_ADVANCED - help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. - - To compile it as a module, choose M here. If unsure, say N. + select NETFILTER_XT_TARGET_NETMAP + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_NETMAP. config IP_NF_TARGET_REDIRECT tristate "REDIRECT target support" diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index b7dd18987237..f4446c5d6595 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o -obj-$(CONFIG_IP_NF_TARGET_NETMAP) += ipt_NETMAP.o obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o diff --git a/net/ipv4/netfilter/ipt_NETMAP.c b/net/ipv4/netfilter/ipt_NETMAP.c deleted file mode 100644 index 85028dc0425d..000000000000 --- a/net/ipv4/netfilter/ipt_NETMAP.c +++ /dev/null @@ -1,101 +0,0 @@ -/* NETMAP - static NAT mapping of IP network addresses (1:1). - * The mapping can be applied to source (POSTROUTING), - * destination (PREROUTING), or both (with separate rules). - */ - -/* (C) 2000-2001 Svenning Soerensen - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Svenning Soerensen "); -MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv4 subnets"); - -static int netmap_tg_check(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { - pr_debug("bad MAP_IPS.\n"); - return -EINVAL; - } - if (mr->rangesize != 1) { - pr_debug("bad rangesize %u.\n", mr->rangesize); - return -EINVAL; - } - return 0; -} - -static unsigned int -netmap_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 new_ip, netmask; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_range newrange; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_POST_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT || - par->hooknum == NF_INET_LOCAL_IN); - ct = nf_ct_get(skb, &ctinfo); - - netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); - - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) - new_ip = ip_hdr(skb)->daddr & ~netmask; - else - new_ip = ip_hdr(skb)->saddr & ~netmask; - new_ip |= mr->range[0].min_ip & netmask; - - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = new_ip; - newrange.max_addr.ip = new_ip; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); -} - -static struct xt_target netmap_tg_reg __read_mostly = { - .name = "NETMAP", - .family = NFPROTO_IPV4, - .target = netmap_tg, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_LOCAL_IN), - .checkentry = netmap_tg_check, - .me = THIS_MODULE -}; - -static int __init netmap_tg_init(void) -{ - return xt_register_target(&netmap_tg_reg); -} - -static void __exit netmap_tg_exit(void) -{ - xt_unregister_target(&netmap_tg_reg); -} - -module_init(netmap_tg_init); -module_exit(netmap_tg_exit); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index d8f276b9fd8a..007bb450f04f 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -209,15 +209,6 @@ config IP6_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_NETMAP - tristate "NETMAP target support" - help - NETMAP is an implementation of static 1:1 NAT mapping of network - addresses. It maps the network address part, while keeping the host - address part intact. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_TARGET_REDIRECT tristate "REDIRECT target support" help diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index 5752132ca159..de8e0d11338d 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -35,7 +35,6 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o -obj-$(CONFIG_IP6_NF_TARGET_NETMAP) += ip6t_NETMAP.o obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_NETMAP.c b/net/ipv6/netfilter/ip6t_NETMAP.c deleted file mode 100644 index 4f3bf360e50f..000000000000 --- a/net/ipv6/netfilter/ip6t_NETMAP.c +++ /dev/null @@ -1,94 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Svenning Soerensen's IPv4 NETMAP target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include -#include -#include -#include -#include -#include -#include - -static unsigned int -netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct nf_nat_range *range = par->targinfo; - struct nf_nat_range newrange; - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - union nf_inet_addr new_addr, netmask; - unsigned int i; - - ct = nf_ct_get(skb, &ctinfo); - for (i = 0; i < ARRAY_SIZE(range->min_addr.ip6); i++) - netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ - range->max_addr.ip6[i]); - - if (par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT) - new_addr.in6 = ipv6_hdr(skb)->daddr; - else - new_addr.in6 = ipv6_hdr(skb)->saddr; - - for (i = 0; i < ARRAY_SIZE(new_addr.ip6); i++) { - new_addr.ip6[i] &= ~netmask.ip6[i]; - new_addr.ip6[i] |= range->min_addr.ip6[i] & - netmask.ip6[i]; - } - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr = new_addr; - newrange.max_addr = new_addr; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); -} - -static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_range *range = par->targinfo; - - if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) - return -EINVAL; - return 0; -} - -static struct xt_target netmap_tg6_reg __read_mostly = { - .name = "NETMAP", - .family = NFPROTO_IPV6, - .target = netmap_tg6, - .targetsize = sizeof(struct nf_nat_range), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | - (1 << NF_INET_POST_ROUTING) | - (1 << NF_INET_LOCAL_OUT) | - (1 << NF_INET_LOCAL_IN), - .checkentry = netmap_tg6_checkentry, - .me = THIS_MODULE, -}; - -static int __init netmap_tg6_init(void) -{ - return xt_register_target(&netmap_tg6_reg); -} - -static void netmap_tg6_exit(void) -{ - xt_unregister_target(&netmap_tg6_reg); -} - -module_init(netmap_tg6_init); -module_exit(netmap_tg6_exit); - -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of IPv6 subnets"); -MODULE_AUTHOR("Patrick McHardy "); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 3f4b3b4a7762..ad0e0da62ede 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -648,6 +648,16 @@ config NETFILTER_XT_TARGET_MARK (e.g. when running oldconfig). It selects CONFIG_NETFILTER_XT_MARK (combined mark/MARK module). +config NETFILTER_XT_TARGET_NETMAP + tristate '"NETMAP" target support' + depends on NF_NAT + ---help--- + NETMAP is an implementation of static 1:1 NAT mapping of network + addresses. It maps the network address part, while keeping the host + address part intact. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_NFLOG tristate '"NFLOG" target support' default m if NETFILTER_ADVANCED=n diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 0baa3f104fcb..600d28ba514c 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -83,6 +83,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_HL) += xt_HL.o obj-$(CONFIG_NETFILTER_XT_TARGET_HMARK) += xt_HMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_LED) += xt_LED.o obj-$(CONFIG_NETFILTER_XT_TARGET_LOG) += xt_LOG.o +obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o diff --git a/net/netfilter/xt_NETMAP.c b/net/netfilter/xt_NETMAP.c new file mode 100644 index 000000000000..b253e07cb1c5 --- /dev/null +++ b/net/netfilter/xt_NETMAP.c @@ -0,0 +1,165 @@ +/* + * (C) 2000-2001 Svenning Soerensen + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static unsigned int +netmap_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + struct nf_nat_range newrange; + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + union nf_inet_addr new_addr, netmask; + unsigned int i; + + ct = nf_ct_get(skb, &ctinfo); + for (i = 0; i < ARRAY_SIZE(range->min_addr.ip6); i++) + netmask.ip6[i] = ~(range->min_addr.ip6[i] ^ + range->max_addr.ip6[i]); + + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) + new_addr.in6 = ipv6_hdr(skb)->daddr; + else + new_addr.in6 = ipv6_hdr(skb)->saddr; + + for (i = 0; i < ARRAY_SIZE(new_addr.ip6); i++) { + new_addr.ip6[i] &= ~netmask.ip6[i]; + new_addr.ip6[i] |= range->min_addr.ip6[i] & + netmask.ip6[i]; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr = new_addr; + newrange.max_addr = new_addr; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); +} + +static int netmap_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (!(range->flags & NF_NAT_RANGE_MAP_IPS)) + return -EINVAL; + return 0; +} + +static unsigned int +netmap_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + __be32 new_ip, netmask; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range newrange; + + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_POST_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT || + par->hooknum == NF_INET_LOCAL_IN); + ct = nf_ct_get(skb, &ctinfo); + + netmask = ~(mr->range[0].min_ip ^ mr->range[0].max_ip); + + if (par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT) + new_ip = ip_hdr(skb)->daddr & ~netmask; + else + new_ip = ip_hdr(skb)->saddr & ~netmask; + new_ip |= mr->range[0].min_ip & netmask; + + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = new_ip; + newrange.max_addr.ip = new_ip; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, HOOK2MANIP(par->hooknum)); +} + +static int netmap_tg4_check(const struct xt_tgchk_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + + if (!(mr->range[0].flags & NF_NAT_RANGE_MAP_IPS)) { + pr_debug("bad MAP_IPS.\n"); + return -EINVAL; + } + if (mr->rangesize != 1) { + pr_debug("bad rangesize %u.\n", mr->rangesize); + return -EINVAL; + } + return 0; +} + +static struct xt_target netmap_tg_reg[] __read_mostly = { + { + .name = "NETMAP", + .family = NFPROTO_IPV6, + .revision = 0, + .target = netmap_tg6, + .targetsize = sizeof(struct nf_nat_range), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .checkentry = netmap_tg6_checkentry, + .me = THIS_MODULE, + }, + { + .name = "NETMAP", + .family = NFPROTO_IPV4, + .revision = 0, + .target = netmap_tg4, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .table = "nat", + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_POST_ROUTING) | + (1 << NF_INET_LOCAL_OUT) | + (1 << NF_INET_LOCAL_IN), + .checkentry = netmap_tg4_check, + .me = THIS_MODULE, + }, +}; + +static int __init netmap_tg_init(void) +{ + return xt_register_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg)); +} + +static void netmap_tg_exit(void) +{ + xt_unregister_targets(netmap_tg_reg, ARRAY_SIZE(netmap_tg_reg)); +} + +module_init(netmap_tg_init); +module_exit(netmap_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_DESCRIPTION("Xtables: 1:1 NAT mapping of subnets"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_ALIAS("ip6t_NETMAP"); +MODULE_ALIAS("ipt_NETMAP"); -- cgit v1.2.3 From 2cbc78a29e76a2e92c172651204f3117491877d2 Mon Sep 17 00:00:00 2001 From: Jan Engelhardt Date: Fri, 21 Sep 2012 11:41:34 +0200 Subject: netfilter: combine ipt_REDIRECT and ip6t_REDIRECT Combine more modules since the actual code is so small anyway that the kmod metadata and the module in its loaded state totally outweighs the combined actual code size. IP_NF_TARGET_REDIRECT becomes a compat option; IP6_NF_TARGET_REDIRECT is completely eliminated since it has not see a release yet. Signed-off-by: Jan Engelhardt Acked-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/Kconfig | 12 +-- net/ipv4/netfilter/Makefile | 1 - net/ipv4/netfilter/ipt_REDIRECT.c | 113 ---------------------- net/ipv6/netfilter/Kconfig | 10 -- net/ipv6/netfilter/Makefile | 1 - net/ipv6/netfilter/ip6t_REDIRECT.c | 98 ------------------- net/netfilter/Kconfig | 11 +++ net/netfilter/Makefile | 1 + net/netfilter/xt_REDIRECT.c | 190 +++++++++++++++++++++++++++++++++++++ 9 files changed, 207 insertions(+), 230 deletions(-) delete mode 100644 net/ipv4/netfilter/ipt_REDIRECT.c delete mode 100644 net/ipv6/netfilter/ip6t_REDIRECT.c create mode 100644 net/netfilter/xt_REDIRECT.c (limited to 'net/ipv6') diff --git a/net/ipv4/netfilter/Kconfig b/net/ipv4/netfilter/Kconfig index 6f140084004f..d8d6f2a5bf12 100644 --- a/net/ipv4/netfilter/Kconfig +++ b/net/ipv4/netfilter/Kconfig @@ -181,13 +181,11 @@ config IP_NF_TARGET_NETMAP config IP_NF_TARGET_REDIRECT tristate "REDIRECT target support" depends on NETFILTER_ADVANCED - help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. - - To compile it as a module, choose M here. If unsure, say N. + select NETFILTER_XT_TARGET_REDIRECT + ---help--- + This is a backwards-compat option for the user's convenience + (e.g. when running oldconfig). It selects + CONFIG_NETFILTER_XT_TARGET_REDIRECT. endif diff --git a/net/ipv4/netfilter/Makefile b/net/ipv4/netfilter/Makefile index f4446c5d6595..007b128eecc9 100644 --- a/net/ipv4/netfilter/Makefile +++ b/net/ipv4/netfilter/Makefile @@ -45,7 +45,6 @@ obj-$(CONFIG_IP_NF_MATCH_RPFILTER) += ipt_rpfilter.o obj-$(CONFIG_IP_NF_TARGET_CLUSTERIP) += ipt_CLUSTERIP.o obj-$(CONFIG_IP_NF_TARGET_ECN) += ipt_ECN.o obj-$(CONFIG_IP_NF_TARGET_MASQUERADE) += ipt_MASQUERADE.o -obj-$(CONFIG_IP_NF_TARGET_REDIRECT) += ipt_REDIRECT.o obj-$(CONFIG_IP_NF_TARGET_REJECT) += ipt_REJECT.o obj-$(CONFIG_IP_NF_TARGET_ULOG) += ipt_ULOG.o diff --git a/net/ipv4/netfilter/ipt_REDIRECT.c b/net/ipv4/netfilter/ipt_REDIRECT.c deleted file mode 100644 index 11407d7d2472..000000000000 --- a/net/ipv4/netfilter/ipt_REDIRECT.c +++ /dev/null @@ -1,113 +0,0 @@ -/* Redirect. Simple mapping which alters dst to a local IP address. */ -/* (C) 1999-2001 Paul `Rusty' Russell - * (C) 2002-2006 Netfilter Core Team - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Netfilter Core Team "); -MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); - -/* FIXME: Take multiple ranges --RR */ -static int redirect_tg_check(const struct xt_tgchk_param *par) -{ - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - - if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { - pr_debug("bad MAP_IPS.\n"); - return -EINVAL; - } - if (mr->rangesize != 1) { - pr_debug("bad rangesize %u.\n", mr->rangesize); - return -EINVAL; - } - return 0; -} - -static unsigned int -redirect_tg(struct sk_buff *skb, const struct xt_action_param *par) -{ - struct nf_conn *ct; - enum ip_conntrack_info ctinfo; - __be32 newdst; - const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; - struct nf_nat_range newrange; - - NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || - par->hooknum == NF_INET_LOCAL_OUT); - - ct = nf_ct_get(skb, &ctinfo); - NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); - - /* Local packets: make them go to loopback */ - if (par->hooknum == NF_INET_LOCAL_OUT) - newdst = htonl(0x7F000001); - else { - struct in_device *indev; - struct in_ifaddr *ifa; - - newdst = 0; - - rcu_read_lock(); - indev = __in_dev_get_rcu(skb->dev); - if (indev && (ifa = indev->ifa_list)) - newdst = ifa->ifa_local; - rcu_read_unlock(); - - if (!newdst) - return NF_DROP; - } - - /* Transfer from original range. */ - memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); - memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); - newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.ip = newdst; - newrange.max_addr.ip = newdst; - newrange.min_proto = mr->range[0].min; - newrange.max_proto = mr->range[0].max; - - /* Hand modified range to generic setup. */ - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); -} - -static struct xt_target redirect_tg_reg __read_mostly = { - .name = "REDIRECT", - .family = NFPROTO_IPV4, - .target = redirect_tg, - .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), - .checkentry = redirect_tg_check, - .me = THIS_MODULE, -}; - -static int __init redirect_tg_init(void) -{ - return xt_register_target(&redirect_tg_reg); -} - -static void __exit redirect_tg_exit(void) -{ - xt_unregister_target(&redirect_tg_reg); -} - -module_init(redirect_tg_init); -module_exit(redirect_tg_exit); diff --git a/net/ipv6/netfilter/Kconfig b/net/ipv6/netfilter/Kconfig index 007bb450f04f..c72532a60d88 100644 --- a/net/ipv6/netfilter/Kconfig +++ b/net/ipv6/netfilter/Kconfig @@ -209,16 +209,6 @@ config IP6_NF_TARGET_MASQUERADE To compile it as a module, choose M here. If unsure, say N. -config IP6_NF_TARGET_REDIRECT - tristate "REDIRECT target support" - help - REDIRECT is a special case of NAT: all incoming connections are - mapped onto the incoming interface's address, causing the packets to - come to the local machine instead of passing through. This is - useful for transparent proxies. - - To compile it as a module, choose M here. If unsure, say N. - config IP6_NF_TARGET_NPT tristate "NPT (Network Prefix translation) target support" help diff --git a/net/ipv6/netfilter/Makefile b/net/ipv6/netfilter/Makefile index de8e0d11338d..2d11fcc2cf3c 100644 --- a/net/ipv6/netfilter/Makefile +++ b/net/ipv6/netfilter/Makefile @@ -36,5 +36,4 @@ obj-$(CONFIG_IP6_NF_MATCH_RT) += ip6t_rt.o # targets obj-$(CONFIG_IP6_NF_TARGET_MASQUERADE) += ip6t_MASQUERADE.o obj-$(CONFIG_IP6_NF_TARGET_NPT) += ip6t_NPT.o -obj-$(CONFIG_IP6_NF_TARGET_REDIRECT) += ip6t_REDIRECT.o obj-$(CONFIG_IP6_NF_TARGET_REJECT) += ip6t_REJECT.o diff --git a/net/ipv6/netfilter/ip6t_REDIRECT.c b/net/ipv6/netfilter/ip6t_REDIRECT.c deleted file mode 100644 index 60497a3c6004..000000000000 --- a/net/ipv6/netfilter/ip6t_REDIRECT.c +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2011 Patrick McHardy - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 - * NAT funded by Astaro. - */ - -#include -#include -#include -#include -#include -#include -#include - -static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; - -static unsigned int -redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) -{ - const struct nf_nat_range *range = par->targinfo; - struct nf_nat_range newrange; - struct in6_addr newdst; - enum ip_conntrack_info ctinfo; - struct nf_conn *ct; - - ct = nf_ct_get(skb, &ctinfo); - if (par->hooknum == NF_INET_LOCAL_OUT) - newdst = loopback_addr; - else { - struct inet6_dev *idev; - struct inet6_ifaddr *ifa; - bool addr = false; - - rcu_read_lock(); - idev = __in6_dev_get(skb->dev); - if (idev != NULL) { - list_for_each_entry(ifa, &idev->addr_list, if_list) { - newdst = ifa->addr; - addr = true; - break; - } - } - rcu_read_unlock(); - - if (!addr) - return NF_DROP; - } - - newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; - newrange.min_addr.in6 = newdst; - newrange.max_addr.in6 = newdst; - newrange.min_proto = range->min_proto; - newrange.max_proto = range->max_proto; - - return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); -} - -static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) -{ - const struct nf_nat_range *range = par->targinfo; - - if (range->flags & NF_NAT_RANGE_MAP_IPS) - return -EINVAL; - return 0; -} - -static struct xt_target redirect_tg6_reg __read_mostly = { - .name = "REDIRECT", - .family = NFPROTO_IPV6, - .checkentry = redirect_tg6_checkentry, - .target = redirect_tg6, - .targetsize = sizeof(struct nf_nat_range), - .table = "nat", - .hooks = (1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_OUT), - .me = THIS_MODULE, -}; - -static int __init redirect_tg6_init(void) -{ - return xt_register_target(&redirect_tg6_reg); -} - -static void __exit redirect_tg6_exit(void) -{ - xt_unregister_target(&redirect_tg6_reg); -} - -module_init(redirect_tg6_init); -module_exit(redirect_tg6_exit); - -MODULE_LICENSE("GPL"); -MODULE_AUTHOR("Patrick McHardy "); -MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index ad0e0da62ede..fefa514b9917 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -690,6 +690,17 @@ config NETFILTER_XT_TARGET_RATEEST To compile it as a module, choose M here. If unsure, say N. +config NETFILTER_XT_TARGET_REDIRECT + tristate "REDIRECT target support" + depends on NF_NAT + ---help--- + REDIRECT is a special case of NAT: all incoming connections are + mapped onto the incoming interface's address, causing the packets to + come to the local machine instead of passing through. This is + useful for transparent proxies. + + To compile it as a module, choose M here. If unsure, say N. + config NETFILTER_XT_TARGET_TEE tristate '"TEE" - packet cloning to alternate destination' depends on NETFILTER_ADVANCED diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 600d28ba514c..32596978df1d 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -87,6 +87,7 @@ obj-$(CONFIG_NETFILTER_XT_TARGET_NETMAP) += xt_NETMAP.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFLOG) += xt_NFLOG.o obj-$(CONFIG_NETFILTER_XT_TARGET_NFQUEUE) += xt_NFQUEUE.o obj-$(CONFIG_NETFILTER_XT_TARGET_RATEEST) += xt_RATEEST.o +obj-$(CONFIG_NETFILTER_XT_TARGET_REDIRECT) += xt_REDIRECT.o obj-$(CONFIG_NETFILTER_XT_TARGET_SECMARK) += xt_SECMARK.o obj-$(CONFIG_NETFILTER_XT_TARGET_TPROXY) += xt_TPROXY.o obj-$(CONFIG_NETFILTER_XT_TARGET_TCPMSS) += xt_TCPMSS.o diff --git a/net/netfilter/xt_REDIRECT.c b/net/netfilter/xt_REDIRECT.c new file mode 100644 index 000000000000..22a10309297c --- /dev/null +++ b/net/netfilter/xt_REDIRECT.c @@ -0,0 +1,190 @@ +/* + * (C) 1999-2001 Paul `Rusty' Russell + * (C) 2002-2006 Netfilter Core Team + * Copyright (c) 2011 Patrick McHardy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Based on Rusty Russell's IPv4 REDIRECT target. Development of IPv6 + * NAT funded by Astaro. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static const struct in6_addr loopback_addr = IN6ADDR_LOOPBACK_INIT; + +static unsigned int +redirect_tg6(struct sk_buff *skb, const struct xt_action_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + struct nf_nat_range newrange; + struct in6_addr newdst; + enum ip_conntrack_info ctinfo; + struct nf_conn *ct; + + ct = nf_ct_get(skb, &ctinfo); + if (par->hooknum == NF_INET_LOCAL_OUT) + newdst = loopback_addr; + else { + struct inet6_dev *idev; + struct inet6_ifaddr *ifa; + bool addr = false; + + rcu_read_lock(); + idev = __in6_dev_get(skb->dev); + if (idev != NULL) { + list_for_each_entry(ifa, &idev->addr_list, if_list) { + newdst = ifa->addr; + addr = true; + break; + } + } + rcu_read_unlock(); + + if (!addr) + return NF_DROP; + } + + newrange.flags = range->flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.in6 = newdst; + newrange.max_addr.in6 = newdst; + newrange.min_proto = range->min_proto; + newrange.max_proto = range->max_proto; + + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + +static int redirect_tg6_checkentry(const struct xt_tgchk_param *par) +{ + const struct nf_nat_range *range = par->targinfo; + + if (range->flags & NF_NAT_RANGE_MAP_IPS) + return -EINVAL; + return 0; +} + +/* FIXME: Take multiple ranges --RR */ +static int redirect_tg4_check(const struct xt_tgchk_param *par) +{ + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + + if (mr->range[0].flags & NF_NAT_RANGE_MAP_IPS) { + pr_debug("bad MAP_IPS.\n"); + return -EINVAL; + } + if (mr->rangesize != 1) { + pr_debug("bad rangesize %u.\n", mr->rangesize); + return -EINVAL; + } + return 0; +} + +static unsigned int +redirect_tg4(struct sk_buff *skb, const struct xt_action_param *par) +{ + struct nf_conn *ct; + enum ip_conntrack_info ctinfo; + __be32 newdst; + const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; + struct nf_nat_range newrange; + + NF_CT_ASSERT(par->hooknum == NF_INET_PRE_ROUTING || + par->hooknum == NF_INET_LOCAL_OUT); + + ct = nf_ct_get(skb, &ctinfo); + NF_CT_ASSERT(ct && (ctinfo == IP_CT_NEW || ctinfo == IP_CT_RELATED)); + + /* Local packets: make them go to loopback */ + if (par->hooknum == NF_INET_LOCAL_OUT) + newdst = htonl(0x7F000001); + else { + struct in_device *indev; + struct in_ifaddr *ifa; + + newdst = 0; + + rcu_read_lock(); + indev = __in_dev_get_rcu(skb->dev); + if (indev && (ifa = indev->ifa_list)) + newdst = ifa->ifa_local; + rcu_read_unlock(); + + if (!newdst) + return NF_DROP; + } + + /* Transfer from original range. */ + memset(&newrange.min_addr, 0, sizeof(newrange.min_addr)); + memset(&newrange.max_addr, 0, sizeof(newrange.max_addr)); + newrange.flags = mr->range[0].flags | NF_NAT_RANGE_MAP_IPS; + newrange.min_addr.ip = newdst; + newrange.max_addr.ip = newdst; + newrange.min_proto = mr->range[0].min; + newrange.max_proto = mr->range[0].max; + + /* Hand modified range to generic setup. */ + return nf_nat_setup_info(ct, &newrange, NF_NAT_MANIP_DST); +} + +static struct xt_target redirect_tg_reg[] __read_mostly = { + { + .name = "REDIRECT", + .family = NFPROTO_IPV6, + .revision = 0, + .table = "nat", + .checkentry = redirect_tg6_checkentry, + .target = redirect_tg6, + .targetsize = sizeof(struct nf_nat_range), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, + { + .name = "REDIRECT", + .family = NFPROTO_IPV4, + .revision = 0, + .table = "nat", + .target = redirect_tg4, + .checkentry = redirect_tg4_check, + .targetsize = sizeof(struct nf_nat_ipv4_multi_range_compat), + .hooks = (1 << NF_INET_PRE_ROUTING) | + (1 << NF_INET_LOCAL_OUT), + .me = THIS_MODULE, + }, +}; + +static int __init redirect_tg_init(void) +{ + return xt_register_targets(redirect_tg_reg, + ARRAY_SIZE(redirect_tg_reg)); +} + +static void __exit redirect_tg_exit(void) +{ + xt_unregister_targets(redirect_tg_reg, ARRAY_SIZE(redirect_tg_reg)); +} + +module_init(redirect_tg_init); +module_exit(redirect_tg_exit); + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Patrick McHardy "); +MODULE_DESCRIPTION("Xtables: Connection redirection to localhost"); +MODULE_ALIAS("ip6t_REDIRECT"); +MODULE_ALIAS("ipt_REDIRECT"); -- cgit v1.2.3 From f950c0ecc78f745e490d615280e031de4dbb1306 Mon Sep 17 00:00:00 2001 From: Wei Yongjun Date: Thu, 20 Sep 2012 18:29:56 +0000 Subject: ipv6: fix return value check in fib6_add() In case of error, the function fib6_add_1() returns ERR_PTR() or NULL pointer. The ERR_PTR() case check is missing in fib6_add(). dpatch engine is used to generated this patch. (https://github.com/weiyj/dpatch) Signed-off-by: Wei Yongjun Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 13690d650c3e..286acfc21250 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -819,6 +819,10 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) offsetof(struct rt6_info, rt6i_src), allow_create, replace_required); + if (IS_ERR(sn)) { + err = PTR_ERR(sn); + sn = NULL; + } if (!sn) { /* If it is failed, discard just allocated root, and then (in st_failure) stale node -- cgit v1.2.3 From 623df484a777f3c00c1ea3d6a7565b8d8ac688a1 Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 22 Sep 2012 04:18:54 +0000 Subject: tcp: extract code to compute SYNACK RTT In preparation for adding another spot where we compute the SYNACK RTT, extract this code so that it can be shared. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 9 +++++++++ net/ipv4/tcp_ipv4.c | 4 +--- net/ipv6/tcp_ipv6.c | 4 +--- 3 files changed, 11 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/tcp.h b/include/net/tcp.h index a8cb00c0c6d9..a718d0e3d8e7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1137,6 +1137,15 @@ static inline void tcp_openreq_init(struct request_sock *req, ireq->loc_port = tcp_hdr(skb)->dest; } +/* Compute time elapsed between SYNACK and the ACK completing 3WHS */ +static inline void tcp_synack_rtt_meas(struct sock *sk, + struct request_sock *req) +{ + if (tcp_rsk(req)->snt_synack) + tcp_valid_rtt_meas(sk, + tcp_time_stamp - tcp_rsk(req)->snt_synack); +} + extern void tcp_enter_memory_pressure(struct sock *sk); static inline int keepalive_intvl_when(const struct tcp_sock *tp) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index e64abed249cc..1e66f7fb4fe6 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1733,9 +1733,7 @@ struct sock *tcp_v4_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; tcp_initialize_rcv_mss(newsk); - if (tcp_rsk(req)->snt_synack) - tcp_valid_rtt_meas(newsk, - tcp_time_stamp - tcp_rsk(req)->snt_synack); + tcp_synack_rtt_meas(newsk, req); newtp->total_retrans = req->retrans; #ifdef CONFIG_TCP_MD5SIG diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index f3bfb8bbfdec..cfeeeb7fcf54 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1348,9 +1348,7 @@ static struct sock * tcp_v6_syn_recv_sock(struct sock *sk, struct sk_buff *skb, newtp->advmss = tcp_sk(sk)->rx_opt.user_mss; tcp_initialize_rcv_mss(newsk); - if (tcp_rsk(req)->snt_synack) - tcp_valid_rtt_meas(newsk, - tcp_time_stamp - tcp_rsk(req)->snt_synack); + tcp_synack_rtt_meas(newsk, req); newtp->total_retrans = req->retrans; newinet->inet_daddr = newinet->inet_saddr = LOOPBACK4_IPV6; -- cgit v1.2.3 From 016818d076871c4ee34db1e8d74dc17ac1de626a Mon Sep 17 00:00:00 2001 From: Neal Cardwell Date: Sat, 22 Sep 2012 04:18:55 +0000 Subject: tcp: TCP Fast Open Server - take SYNACK RTT after completing 3WHS When taking SYNACK RTT samples for servers using TCP Fast Open, fix the code to ensure that we only call tcp_valid_rtt_meas() after we receive the ACK that completes the 3-way handshake. Previously we were always taking an RTT sample in tcp_v4_syn_recv_sock(). However, for TCP Fast Open connections tcp_v4_conn_req_fastopen() calls tcp_v4_syn_recv_sock() at the time we receive the SYN. So for TFO we must wait until tcp_rcv_state_process() to take the RTT sample. To fix this, we wait until after TFO calls tcp_v4_syn_recv_sock() before we set the snt_synack timestamp, since tcp_synack_rtt_meas() already ensures that we only take a SYNACK RTT sample if snt_synack is non-zero. To be careful, we only take a snt_synack timestamp when a SYNACK transmit or retransmit succeeds. Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/net/tcp.h | 1 + net/ipv4/tcp_input.c | 1 + net/ipv4/tcp_ipv4.c | 12 +++++++++--- net/ipv6/tcp_ipv6.c | 2 +- 4 files changed, 12 insertions(+), 4 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/tcp.h b/include/net/tcp.h index a718d0e3d8e7..6feeccd83dd7 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -1125,6 +1125,7 @@ static inline void tcp_openreq_init(struct request_sock *req, req->cookie_ts = 0; tcp_rsk(req)->rcv_isn = TCP_SKB_CB(skb)->seq; tcp_rsk(req)->rcv_nxt = TCP_SKB_CB(skb)->seq + 1; + tcp_rsk(req)->snt_synack = 0; req->mss = rx_opt->mss_clamp; req->ts_recent = rx_opt->saw_tstamp ? rx_opt->rcv_tsval : 0; ireq->tstamp_ok = rx_opt->tstamp_ok; diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c index e2bec815ff23..bb326684495b 100644 --- a/net/ipv4/tcp_input.c +++ b/net/ipv4/tcp_input.c @@ -5983,6 +5983,7 @@ int tcp_rcv_state_process(struct sock *sk, struct sk_buff *skb, * need req so release it. */ if (req) { + tcp_synack_rtt_meas(sk, req); reqsk_fastopen_remove(sk, req, false); } else { /* Make sure socket is routed, for diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 1e66f7fb4fe6..0a7e020f16b5 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -868,6 +868,8 @@ static int tcp_v4_send_synack(struct sock *sk, struct dst_entry *dst, ireq->rmt_addr, ireq->opt); err = net_xmit_eval(err); + if (!tcp_rsk(req)->snt_synack && !err) + tcp_rsk(req)->snt_synack = tcp_time_stamp; } return err; @@ -1382,6 +1384,7 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; const struct inet_request_sock *ireq = inet_rsk(req); struct sock *child; + int err; req->retrans = 0; req->sk = NULL; @@ -1393,8 +1396,11 @@ static int tcp_v4_conn_req_fastopen(struct sock *sk, kfree_skb(skb_synack); return -1; } - ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, - ireq->rmt_addr, ireq->opt); + err = ip_build_and_send_pkt(skb_synack, sk, ireq->loc_addr, + ireq->rmt_addr, ireq->opt); + err = net_xmit_eval(err); + if (!err) + tcp_rsk(req)->snt_synack = tcp_time_stamp; /* XXX (TFO) - is it ok to ignore error and continue? */ spin_lock(&queue->fastopenq->lock); @@ -1612,7 +1618,6 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) isn = tcp_v4_init_sequence(skb); } tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; if (dst == NULL) { dst = inet_csk_route_req(sk, &fl4, req); @@ -1650,6 +1655,7 @@ int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb) if (err || want_cookie) goto drop_and_free; + tcp_rsk(req)->snt_synack = tcp_time_stamp; tcp_rsk(req)->listener = NULL; /* Add the request_sock to the SYN table */ inet_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index cfeeeb7fcf54..d6212d6bc8d8 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1169,7 +1169,6 @@ static int tcp_v6_conn_request(struct sock *sk, struct sk_buff *skb) } have_isn: tcp_rsk(req)->snt_isn = isn; - tcp_rsk(req)->snt_synack = tcp_time_stamp; if (security_inet_conn_request(sk, skb, req)) goto drop_and_release; @@ -1180,6 +1179,7 @@ have_isn: want_cookie) goto drop_and_free; + tcp_rsk(req)->snt_synack = tcp_time_stamp; tcp_rsk(req)->listener = NULL; inet6_csk_reqsk_queue_hash_add(sk, req, TCP_TIMEOUT_INIT); return 0; -- cgit v1.2.3 From 5640f7685831e088fe6c2e1f863a6805962f8e81 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 23 Sep 2012 23:04:42 +0000 Subject: net: use a per task frag allocator We currently use a per socket order-0 page cache for tcp_sendmsg() operations. This page is used to build fragments for skbs. Its done to increase probability of coalescing small write() into single segments in skbs still in write queue (not yet sent) But it wastes a lot of memory for applications handling many mostly idle sockets, since each socket holds one page in sk->sk_sndmsg_page Its also quite inefficient to build TSO 64KB packets, because we need about 16 pages per skb on arches where PAGE_SIZE = 4096, so we hit page allocator more than wanted. This patch adds a per task frag allocator and uses bigger pages, if available. An automatic fallback is done in case of memory pressure. (up to 32768 bytes per frag, thats order-3 pages on x86) This increases TCP stream performance by 20% on loopback device, but also benefits on other network devices, since 8x less frags are mapped on transmit and unmapped on tx completion. Alexander Duyck mentioned a probable performance win on systems with IOMMU enabled. Its possible some SG enabled hardware cant cope with bigger fragments, but their ndo_start_xmit() should already handle this, splitting a fragment in sub fragments, since some arches have PAGE_SIZE=65536 Successfully tested on various ethernet devices. (ixgbe, igb, bnx2x, tg3, mellanox mlx4) Signed-off-by: Eric Dumazet Cc: Ben Hutchings Cc: Vijay Subramanian Cc: Alexander Duyck Tested-by: Vijay Subramanian Signed-off-by: David S. Miller --- include/linux/sched.h | 3 ++ include/net/inet_sock.h | 4 +-- include/net/sock.h | 27 +++++++++-------- kernel/exit.c | 3 ++ kernel/fork.c | 1 + net/core/skbuff.c | 37 ++++++----------------- net/core/sock.c | 49 ++++++++++++++++++++++++++++-- net/ipv4/ip_output.c | 70 ++++++++++++++++++------------------------- net/ipv4/raw.c | 19 +++++++----- net/ipv4/tcp.c | 79 ++++++++++++++----------------------------------- net/ipv4/tcp_ipv4.c | 8 ----- net/ipv6/ip6_output.c | 65 ++++++++++++++++------------------------ net/sched/em_meta.c | 2 +- 13 files changed, 167 insertions(+), 200 deletions(-) (limited to 'net/ipv6') diff --git a/include/linux/sched.h b/include/linux/sched.h index b8c86648a2f9..a8e2413f6bc3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1530,6 +1530,9 @@ struct task_struct { * cache last used pipe for splice */ struct pipe_inode_info *splice_pipe; + + struct page_frag task_frag; + #ifdef CONFIG_TASK_DELAY_ACCT struct task_delay_info *delays; #endif diff --git a/include/net/inet_sock.h b/include/net/inet_sock.h index 613cfa401672..256c1ed2d69a 100644 --- a/include/net/inet_sock.h +++ b/include/net/inet_sock.h @@ -101,10 +101,8 @@ struct inet_cork { __be32 addr; struct ip_options *opt; unsigned int fragsize; - struct dst_entry *dst; int length; /* Total length of all frames */ - struct page *page; - u32 off; + struct dst_entry *dst; u8 tx_flags; }; diff --git a/include/net/sock.h b/include/net/sock.h index 84bdaeca1314..f036493b9a61 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -247,8 +247,7 @@ struct cg_proto; * @sk_stamp: time stamp of last packet received * @sk_socket: Identd and reporting IO signals * @sk_user_data: RPC layer private data - * @sk_sndmsg_page: cached page for sendmsg - * @sk_sndmsg_off: cached offset for sendmsg + * @sk_frag: cached page frag * @sk_peek_off: current peek_offset value * @sk_send_head: front of stuff to transmit * @sk_security: used by security modules @@ -362,9 +361,8 @@ struct sock { ktime_t sk_stamp; struct socket *sk_socket; void *sk_user_data; - struct page *sk_sndmsg_page; + struct page_frag sk_frag; struct sk_buff *sk_send_head; - __u32 sk_sndmsg_off; __s32 sk_peek_off; int sk_write_pending; #ifdef CONFIG_SECURITY @@ -2034,18 +2032,23 @@ static inline void sk_stream_moderate_sndbuf(struct sock *sk) struct sk_buff *sk_stream_alloc_skb(struct sock *sk, int size, gfp_t gfp); -static inline struct page *sk_stream_alloc_page(struct sock *sk) +/** + * sk_page_frag - return an appropriate page_frag + * @sk: socket + * + * If socket allocation mode allows current thread to sleep, it means its + * safe to use the per task page_frag instead of the per socket one. + */ +static inline struct page_frag *sk_page_frag(struct sock *sk) { - struct page *page = NULL; + if (sk->sk_allocation & __GFP_WAIT) + return ¤t->task_frag; - page = alloc_pages(sk->sk_allocation, 0); - if (!page) { - sk_enter_memory_pressure(sk); - sk_stream_moderate_sndbuf(sk); - } - return page; + return &sk->sk_frag; } +extern bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag); + /* * Default write policy as shown to user space via poll/select/SIGIO */ diff --git a/kernel/exit.c b/kernel/exit.c index f65345f9e5bb..42f25952edd9 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -1046,6 +1046,9 @@ void do_exit(long code) if (tsk->splice_pipe) __free_pipe_info(tsk->splice_pipe); + if (tsk->task_frag.page) + put_page(tsk->task_frag.page); + validate_creds_for_do_exit(tsk); preempt_disable(); diff --git a/kernel/fork.c b/kernel/fork.c index 2c8857e12855..01565b9ce0f3 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -330,6 +330,7 @@ static struct task_struct *dup_task_struct(struct task_struct *orig) tsk->btrace_seq = 0; #endif tsk->splice_pipe = NULL; + tsk->task_frag.page = NULL; account_kernel_stack(ti, 1); diff --git a/net/core/skbuff.c b/net/core/skbuff.c index fe00d1208167..2ede3cfa8ffa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1655,38 +1655,19 @@ static struct page *linear_to_page(struct page *page, unsigned int *len, unsigned int *offset, struct sk_buff *skb, struct sock *sk) { - struct page *p = sk->sk_sndmsg_page; - unsigned int off; + struct page_frag *pfrag = sk_page_frag(sk); - if (!p) { -new_page: - p = sk->sk_sndmsg_page = alloc_pages(sk->sk_allocation, 0); - if (!p) - return NULL; - - off = sk->sk_sndmsg_off = 0; - /* hold one ref to this page until it's full */ - } else { - unsigned int mlen; - - /* If we are the only user of the page, we can reset offset */ - if (page_count(p) == 1) - sk->sk_sndmsg_off = 0; - off = sk->sk_sndmsg_off; - mlen = PAGE_SIZE - off; - if (mlen < 64 && mlen < *len) { - put_page(p); - goto new_page; - } + if (!sk_page_frag_refill(sk, pfrag)) + return NULL; - *len = min_t(unsigned int, *len, mlen); - } + *len = min_t(unsigned int, *len, pfrag->size - pfrag->offset); - memcpy(page_address(p) + off, page_address(page) + *offset, *len); - sk->sk_sndmsg_off += *len; - *offset = off; + memcpy(page_address(pfrag->page) + pfrag->offset, + page_address(page) + *offset, *len); + *offset = pfrag->offset; + pfrag->offset += *len; - return p; + return pfrag->page; } static bool spd_can_coalesce(const struct splice_pipe_desc *spd, diff --git a/net/core/sock.c b/net/core/sock.c index 2693f7649222..727114cd6f7e 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -1744,6 +1744,45 @@ struct sk_buff *sock_alloc_send_skb(struct sock *sk, unsigned long size, } EXPORT_SYMBOL(sock_alloc_send_skb); +/* On 32bit arches, an skb frag is limited to 2^15 */ +#define SKB_FRAG_PAGE_ORDER get_order(32768) + +bool sk_page_frag_refill(struct sock *sk, struct page_frag *pfrag) +{ + int order; + + if (pfrag->page) { + if (atomic_read(&pfrag->page->_count) == 1) { + pfrag->offset = 0; + return true; + } + if (pfrag->offset < pfrag->size) + return true; + put_page(pfrag->page); + } + + /* We restrict high order allocations to users that can afford to wait */ + order = (sk->sk_allocation & __GFP_WAIT) ? SKB_FRAG_PAGE_ORDER : 0; + + do { + gfp_t gfp = sk->sk_allocation; + + if (order) + gfp |= __GFP_COMP | __GFP_NOWARN; + pfrag->page = alloc_pages(gfp, order); + if (likely(pfrag->page)) { + pfrag->offset = 0; + pfrag->size = PAGE_SIZE << order; + return true; + } + } while (--order >= 0); + + sk_enter_memory_pressure(sk); + sk_stream_moderate_sndbuf(sk); + return false; +} +EXPORT_SYMBOL(sk_page_frag_refill); + static void __lock_sock(struct sock *sk) __releases(&sk->sk_lock.slock) __acquires(&sk->sk_lock.slock) @@ -2173,8 +2212,8 @@ void sock_init_data(struct socket *sock, struct sock *sk) sk->sk_error_report = sock_def_error_report; sk->sk_destruct = sock_def_destruct; - sk->sk_sndmsg_page = NULL; - sk->sk_sndmsg_off = 0; + sk->sk_frag.page = NULL; + sk->sk_frag.offset = 0; sk->sk_peek_off = -1; sk->sk_peer_pid = NULL; @@ -2417,6 +2456,12 @@ void sk_common_release(struct sock *sk) xfrm_sk_free_policy(sk); sk_refcnt_debug_release(sk); + + if (sk->sk_frag.page) { + put_page(sk->sk_frag.page); + sk->sk_frag.page = NULL; + } + sock_put(sk); } EXPORT_SYMBOL(sk_common_release); diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c index a5beab1dc958..24a29a39e9a8 100644 --- a/net/ipv4/ip_output.c +++ b/net/ipv4/ip_output.c @@ -793,6 +793,7 @@ static int __ip_append_data(struct sock *sk, struct flowi4 *fl4, struct sk_buff_head *queue, struct inet_cork *cork, + struct page_frag *pfrag, int getfrag(void *from, char *to, int offset, int len, int odd, struct sk_buff *skb), void *from, int length, int transhdrlen, @@ -987,47 +988,30 @@ alloc_new_skb: } } else { int i = skb_shinfo(skb)->nr_frags; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = cork->page; - int off = cork->off; - unsigned int left; - - if (page && (left = PAGE_SIZE - off) > 0) { - if (copy >= left) - copy = left; - if (page != skb_frag_page(frag)) { - if (i == MAX_SKB_FRAGS) { - err = -EMSGSIZE; - goto error; - } - skb_fill_page_desc(skb, i, page, off, 0); - skb_frag_ref(skb, i); - frag = &skb_shinfo(skb)->frags[i]; - } - } else if (i < MAX_SKB_FRAGS) { - if (copy > PAGE_SIZE) - copy = PAGE_SIZE; - page = alloc_pages(sk->sk_allocation, 0); - if (page == NULL) { - err = -ENOMEM; - goto error; - } - cork->page = page; - cork->off = 0; - skb_fill_page_desc(skb, i, page, 0, 0); - frag = &skb_shinfo(skb)->frags[i]; - } else { - err = -EMSGSIZE; - goto error; - } - if (getfrag(from, skb_frag_address(frag)+skb_frag_size(frag), - offset, copy, skb->len, skb) < 0) { - err = -EFAULT; + err = -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) goto error; + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + err = -EMSGSIZE; + if (i == MAX_SKB_FRAGS) + goto error; + + __skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, 0); + skb_shinfo(skb)->nr_frags = ++i; + get_page(pfrag->page); } - cork->off += copy; - skb_frag_size_add(frag, copy); + copy = min_t(int, copy, pfrag->size - pfrag->offset); + if (getfrag(from, + page_address(pfrag->page) + pfrag->offset, + offset, copy, skb->len, skb) < 0) + goto error_efault; + + pfrag->offset += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1039,6 +1023,8 @@ alloc_new_skb: return 0; +error_efault: + err = -EFAULT; error: cork->length -= length; IP_INC_STATS(sock_net(sk), IPSTATS_MIB_OUTDISCARDS); @@ -1079,8 +1065,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork, cork->dst = &rt->dst; cork->length = 0; cork->tx_flags = ipc->tx_flags; - cork->page = NULL; - cork->off = 0; return 0; } @@ -1117,7 +1101,8 @@ int ip_append_data(struct sock *sk, struct flowi4 *fl4, transhdrlen = 0; } - return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, getfrag, + return __ip_append_data(sk, fl4, &sk->sk_write_queue, &inet->cork.base, + sk_page_frag(sk), getfrag, from, length, transhdrlen, flags); } @@ -1439,7 +1424,8 @@ struct sk_buff *ip_make_skb(struct sock *sk, if (err) return ERR_PTR(err); - err = __ip_append_data(sk, fl4, &queue, &cork, getfrag, + err = __ip_append_data(sk, fl4, &queue, &cork, + ¤t->task_frag, getfrag, from, length, transhdrlen, flags); if (err) { __ip_flush_pending_frames(sk, &queue, &cork); diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c index f2425785d40a..a80740ba4248 100644 --- a/net/ipv4/raw.c +++ b/net/ipv4/raw.c @@ -131,18 +131,23 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmp_filter(struct sock *sk, struct sk_buff *skb) +static int icmp_filter(const struct sock *sk, const struct sk_buff *skb) { - int type; - - if (!pskb_may_pull(skb, sizeof(struct icmphdr))) + struct icmphdr _hdr; + const struct icmphdr *hdr; + + pr_err("icmp_filter skb_transport_offset %d data-head %ld len %d/%d\n", + skb_transport_offset(skb), skb->data - skb->head, skb->len, skb->data_len); + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + pr_err("head %p data %p hdr %p type %d\n", skb->head, skb->data, hdr, hdr ? hdr->type : -1); + if (!hdr) return 1; - type = icmp_hdr(skb)->type; - if (type < 32) { + if (hdr->type < 32) { __u32 data = raw_sk(sk)->filter.data; - return ((1 << type) & data) != 0; + return ((1U << hdr->type) & data) != 0; } /* Do not block unknown ICMP types */ diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 7b1e940393cf..72ea4752f21b 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1150,78 +1150,43 @@ new_segment: if (err) goto do_fault; } else { - bool merge = false; + bool merge = true; int i = skb_shinfo(skb)->nr_frags; - struct page *page = sk->sk_sndmsg_page; - int off; - - if (page && page_count(page) == 1) - sk->sk_sndmsg_off = 0; - - off = sk->sk_sndmsg_off; - - if (skb_can_coalesce(skb, i, page, off) && - off != PAGE_SIZE) { - /* We can extend the last page - * fragment. */ - merge = true; - } else if (i == MAX_SKB_FRAGS || !sg) { - /* Need to add new fragment and cannot - * do this because interface is non-SG, - * or because all the page slots are - * busy. */ - tcp_mark_push(tp, skb); - goto new_segment; - } else if (page) { - if (off == PAGE_SIZE) { - put_page(page); - sk->sk_sndmsg_page = page = NULL; - off = 0; + struct page_frag *pfrag = sk_page_frag(sk); + + if (!sk_page_frag_refill(sk, pfrag)) + goto wait_for_memory; + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + if (i == MAX_SKB_FRAGS || !sg) { + tcp_mark_push(tp, skb); + goto new_segment; } - } else - off = 0; + merge = false; + } - if (copy > PAGE_SIZE - off) - copy = PAGE_SIZE - off; + copy = min_t(int, copy, pfrag->size - pfrag->offset); if (!sk_wmem_schedule(sk, copy)) goto wait_for_memory; - if (!page) { - /* Allocate new cache page. */ - if (!(page = sk_stream_alloc_page(sk))) - goto wait_for_memory; - } - - /* Time to copy data. We are close to - * the end! */ err = skb_copy_to_page_nocache(sk, from, skb, - page, off, copy); - if (err) { - /* If this page was new, give it to the - * socket so it does not get leaked. - */ - if (!sk->sk_sndmsg_page) { - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; - } + pfrag->page, + pfrag->offset, + copy); + if (err) goto do_error; - } /* Update the skb. */ if (merge) { skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { - skb_fill_page_desc(skb, i, page, off, copy); - if (sk->sk_sndmsg_page) { - get_page(page); - } else if (off + copy < PAGE_SIZE) { - get_page(page); - sk->sk_sndmsg_page = page; - } + skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, copy); + get_page(pfrag->page); } - - sk->sk_sndmsg_off = off + copy; + pfrag->offset += copy; } if (!copied) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 0a7e020f16b5..93406c583f43 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2200,14 +2200,6 @@ void tcp_v4_destroy_sock(struct sock *sk) if (inet_csk(sk)->icsk_bind_hash) inet_put_port(sk); - /* - * If sendmsg cached page exists, toss it. - */ - if (sk->sk_sndmsg_page) { - __free_page(sk->sk_sndmsg_page); - sk->sk_sndmsg_page = NULL; - } - /* TCP Cookie Transactions */ if (tp->cookie_values != NULL) { kref_put(&tp->cookie_values->kref, diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 3dd4a37488d5..aece3e792f84 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1279,8 +1279,6 @@ int ip6_append_data(struct sock *sk, int getfrag(void *from, char *to, if (dst_allfrag(rt->dst.path)) cork->flags |= IPCORK_ALLFRAG; cork->length = 0; - sk->sk_sndmsg_page = NULL; - sk->sk_sndmsg_off = 0; exthdrlen = (opt ? opt->opt_flen : 0) - rt->rt6i_nfheader_len; length += exthdrlen; transhdrlen += exthdrlen; @@ -1504,48 +1502,31 @@ alloc_new_skb: } } else { int i = skb_shinfo(skb)->nr_frags; - skb_frag_t *frag = &skb_shinfo(skb)->frags[i-1]; - struct page *page = sk->sk_sndmsg_page; - int off = sk->sk_sndmsg_off; - unsigned int left; - - if (page && (left = PAGE_SIZE - off) > 0) { - if (copy >= left) - copy = left; - if (page != skb_frag_page(frag)) { - if (i == MAX_SKB_FRAGS) { - err = -EMSGSIZE; - goto error; - } - skb_fill_page_desc(skb, i, page, sk->sk_sndmsg_off, 0); - skb_frag_ref(skb, i); - frag = &skb_shinfo(skb)->frags[i]; - } - } else if(i < MAX_SKB_FRAGS) { - if (copy > PAGE_SIZE) - copy = PAGE_SIZE; - page = alloc_pages(sk->sk_allocation, 0); - if (page == NULL) { - err = -ENOMEM; - goto error; - } - sk->sk_sndmsg_page = page; - sk->sk_sndmsg_off = 0; + struct page_frag *pfrag = sk_page_frag(sk); - skb_fill_page_desc(skb, i, page, 0, 0); - frag = &skb_shinfo(skb)->frags[i]; - } else { - err = -EMSGSIZE; + err = -ENOMEM; + if (!sk_page_frag_refill(sk, pfrag)) goto error; + + if (!skb_can_coalesce(skb, i, pfrag->page, + pfrag->offset)) { + err = -EMSGSIZE; + if (i == MAX_SKB_FRAGS) + goto error; + + __skb_fill_page_desc(skb, i, pfrag->page, + pfrag->offset, 0); + skb_shinfo(skb)->nr_frags = ++i; + get_page(pfrag->page); } + copy = min_t(int, copy, pfrag->size - pfrag->offset); if (getfrag(from, - skb_frag_address(frag) + skb_frag_size(frag), - offset, copy, skb->len, skb) < 0) { - err = -EFAULT; - goto error; - } - sk->sk_sndmsg_off += copy; - skb_frag_size_add(frag, copy); + page_address(pfrag->page) + pfrag->offset, + offset, copy, skb->len, skb) < 0) + goto error_efault; + + pfrag->offset += copy; + skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); skb->len += copy; skb->data_len += copy; skb->truesize += copy; @@ -1554,7 +1535,11 @@ alloc_new_skb: offset += copy; length -= copy; } + return 0; + +error_efault: + err = -EFAULT; error: cork->length -= length; IP6_INC_STATS(sock_net(sk), rt->rt6i_idev, IPSTATS_MIB_OUTDISCARDS); diff --git a/net/sched/em_meta.c b/net/sched/em_meta.c index 4ab6e3325573..7c3de6ffa516 100644 --- a/net/sched/em_meta.c +++ b/net/sched/em_meta.c @@ -461,7 +461,7 @@ META_COLLECTOR(int_sk_sndtimeo) META_COLLECTOR(int_sk_sendmsg_off) { SKIP_NONLOCAL(skb); - dst->value = skb->sk->sk_sndmsg_off; + dst->value = skb->sk->sk_frag.offset; } META_COLLECTOR(int_sk_write_pend) -- cgit v1.2.3 From 1b05c4b50edbddbdde715c4a7350629819f6655e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 07:03:40 +0000 Subject: ipv6: raw: fix icmpv6_filter() icmpv6_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() and change the prototype to make clear both sk and skb are const. Also, if icmpv6 header cannot be found, do not deliver the packet, as we do in IPv4. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/raw.c | 21 ++++++++++----------- 1 file changed, 10 insertions(+), 11 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c index ef0579d5bca6..4a5f78b50495 100644 --- a/net/ipv6/raw.c +++ b/net/ipv6/raw.c @@ -107,21 +107,20 @@ found: * 0 - deliver * 1 - block */ -static __inline__ int icmpv6_filter(struct sock *sk, struct sk_buff *skb) +static int icmpv6_filter(const struct sock *sk, const struct sk_buff *skb) { - struct icmp6hdr *icmph; - struct raw6_sock *rp = raw6_sk(sk); - - if (pskb_may_pull(skb, sizeof(struct icmp6hdr))) { - __u32 *data = &rp->filter.data[0]; - int bit_nr; + struct icmp6hdr *_hdr; + const struct icmp6hdr *hdr; - icmph = (struct icmp6hdr *) skb->data; - bit_nr = icmph->icmp6_type; + hdr = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (hdr) { + const __u32 *data = &raw6_sk(sk)->filter.data[0]; + unsigned int type = hdr->icmp6_type; - return (data[bit_nr >> 5] & (1 << (bit_nr & 31))) != 0; + return (data[type >> 5] & (1U << (type & 31))) != 0; } - return 0; + return 1; } #if defined(CONFIG_IPV6_MIP6) || defined(CONFIG_IPV6_MIP6_MODULE) -- cgit v1.2.3 From 96af69ea2a83d292238bdba20e4508ee967cf8cb Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Sep 2012 22:01:28 +0200 Subject: ipv6: mip6: fix mip6_mh_filter() mip6_mh_filter() should not modify its input, or else its caller would need to recompute ipv6_hdr() if skb->head is reallocated. Use skb_header_pointer() instead of pskb_may_pull() Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv6/mip6.c | 20 +++++++++++--------- 1 file changed, 11 insertions(+), 9 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/mip6.c b/net/ipv6/mip6.c index 5b087c31d87b..0f9bdc5ee9f3 100644 --- a/net/ipv6/mip6.c +++ b/net/ipv6/mip6.c @@ -86,28 +86,30 @@ static int mip6_mh_len(int type) static int mip6_mh_filter(struct sock *sk, struct sk_buff *skb) { - struct ip6_mh *mh; + struct ip6_mh _hdr; + const struct ip6_mh *mh; - if (!pskb_may_pull(skb, (skb_transport_offset(skb)) + 8) || - !pskb_may_pull(skb, (skb_transport_offset(skb) + - ((skb_transport_header(skb)[1] + 1) << 3)))) + mh = skb_header_pointer(skb, skb_transport_offset(skb), + sizeof(_hdr), &_hdr); + if (!mh) return -1; - mh = (struct ip6_mh *)skb_transport_header(skb); + if (((mh->ip6mh_hdrlen + 1) << 3) > skb->len) + return -1; if (mh->ip6mh_hdrlen < mip6_mh_len(mh->ip6mh_type)) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH message too short: %d vs >=%d\n", mh->ip6mh_hdrlen, mip6_mh_len(mh->ip6mh_type)); - mip6_param_prob(skb, 0, ((&mh->ip6mh_hdrlen) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_hdrlen) + + skb_network_header_len(skb)); return -1; } if (mh->ip6mh_proto != IPPROTO_NONE) { LIMIT_NETDEBUG(KERN_DEBUG "mip6: MH invalid payload proto = %d\n", mh->ip6mh_proto); - mip6_param_prob(skb, 0, ((&mh->ip6mh_proto) - - skb_network_header(skb))); + mip6_param_prob(skb, 0, offsetof(struct ip6_mh, ip6mh_proto) + + skb_network_header_len(skb)); return -1; } -- cgit v1.2.3 From 0c5794a66c7c6443759e6098841767958a031187 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 24 Sep 2012 18:12:24 +0000 Subject: gre: remove unnecessary rcu_read_lock/unlock The gre function pointers for receive and error handling are always called (from gre.c) with rcu_read_lock already held. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_gre.c | 21 +++++++-------------- net/ipv6/ip6_gre.c | 14 +++----------- 2 files changed, 10 insertions(+), 25 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 0d4c3832d490..1c012cb2cb94 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -557,37 +557,34 @@ static void ipgre_err(struct sk_buff *skb, u32 info) break; } - rcu_read_lock(); t = ipgre_tunnel_lookup(skb->dev, iph->daddr, iph->saddr, flags, key, p[1]); if (t == NULL) - goto out; + return; if (type == ICMP_DEST_UNREACH && code == ICMP_FRAG_NEEDED) { ipv4_update_pmtu(skb, dev_net(skb->dev), info, t->parms.link, 0, IPPROTO_GRE, 0); - goto out; + return; } if (type == ICMP_REDIRECT) { ipv4_redirect(skb, dev_net(skb->dev), t->parms.link, 0, IPPROTO_GRE, 0); - goto out; + return; } if (t->parms.iph.daddr == 0 || ipv4_is_multicast(t->parms.iph.daddr)) - goto out; + return; if (t->parms.iph.ttl == 0 && type == ICMP_TIME_EXCEEDED) - goto out; + return; if (time_before(jiffies, t->err_time + IPTUNNEL_ERR_TIMEO)) t->err_count++; else t->err_count = 1; t->err_time = jiffies; -out: - rcu_read_unlock(); } static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) @@ -625,7 +622,7 @@ static int ipgre_rcv(struct sk_buff *skb) __be16 gre_proto; if (!pskb_may_pull(skb, 16)) - goto drop_nolock; + goto drop; iph = ip_hdr(skb); h = skb->data; @@ -636,7 +633,7 @@ static int ipgre_rcv(struct sk_buff *skb) - We do not support routing headers. */ if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop_nolock; + goto drop; if (flags&GRE_CSUM) { switch (skb->ip_summed) { @@ -664,7 +661,6 @@ static int ipgre_rcv(struct sk_buff *skb) gre_proto = *(__be16 *)(h + 2); - rcu_read_lock(); tunnel = ipgre_tunnel_lookup(skb->dev, iph->saddr, iph->daddr, flags, key, gre_proto); @@ -740,14 +736,11 @@ static int ipgre_rcv(struct sk_buff *skb) netif_rx(skb); - rcu_read_unlock(); return 0; } icmp_send(skb, ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, 0); drop: - rcu_read_unlock(); -drop_nolock: kfree_skb(skb); return 0; } diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 424d11a4e7ff..b987d4db790f 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -437,14 +437,12 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, ipv6h = (const struct ipv6hdr *)skb->data; p = (__be16 *)(skb->data + offset); - rcu_read_lock(); - t = ip6gre_tunnel_lookup(skb->dev, &ipv6h->daddr, &ipv6h->saddr, flags & GRE_KEY ? *(((__be32 *)p) + (grehlen / 4) - 1) : 0, p[1]); if (t == NULL) - goto out; + return; switch (type) { __u32 teli; @@ -489,8 +487,6 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, else t->err_count = 1; t->err_time = jiffies; -out: - rcu_read_unlock(); } static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t, @@ -528,7 +524,7 @@ static int ip6gre_rcv(struct sk_buff *skb) __be16 gre_proto; if (!pskb_may_pull(skb, sizeof(struct in6_addr))) - goto drop_nolock; + goto drop; ipv6h = ipv6_hdr(skb); h = skb->data; @@ -539,7 +535,7 @@ static int ip6gre_rcv(struct sk_buff *skb) - We do not support routing headers. */ if (flags&(GRE_VERSION|GRE_ROUTING)) - goto drop_nolock; + goto drop; if (flags&GRE_CSUM) { switch (skb->ip_summed) { @@ -567,7 +563,6 @@ static int ip6gre_rcv(struct sk_buff *skb) gre_proto = *(__be16 *)(h + 2); - rcu_read_lock(); tunnel = ip6gre_tunnel_lookup(skb->dev, &ipv6h->saddr, &ipv6h->daddr, key, gre_proto); @@ -646,14 +641,11 @@ static int ip6gre_rcv(struct sk_buff *skb) netif_rx(skb); - rcu_read_unlock(); return 0; } icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0); drop: - rcu_read_unlock(); -drop_nolock: kfree_skb(skb); return 0; } -- cgit v1.2.3 From b0558ef24a792906914fcad277f3befe2420e618 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 24 Sep 2012 18:12:25 +0000 Subject: xfrm: remove extranous rcu_read_lock The handlers for xfrm_tunnel are always invoked with rcu read lock already. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- net/ipv4/ip_vti.c | 5 ----- net/ipv4/ipip.c | 9 +-------- net/ipv6/sit.c | 6 ------ 3 files changed, 1 insertion(+), 19 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c index 3511ffba7bd4..978bca4818ae 100644 --- a/net/ipv4/ip_vti.c +++ b/net/ipv4/ip_vti.c @@ -304,7 +304,6 @@ static int vti_err(struct sk_buff *skb, u32 info) err = -ENOENT; - rcu_read_lock(); t = vti_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); if (t == NULL) goto out; @@ -326,7 +325,6 @@ static int vti_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - rcu_read_unlock(); return err; } @@ -336,7 +334,6 @@ static int vti_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); - rcu_read_lock(); tunnel = vti_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); if (tunnel != NULL) { struct pcpu_tstats *tstats; @@ -348,10 +345,8 @@ static int vti_rcv(struct sk_buff *skb) u64_stats_update_end(&tstats->syncp); skb->dev = tunnel->dev; - rcu_read_unlock(); return 1; } - rcu_read_unlock(); return -1; } diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 99af1f0cc658..618bde867ac1 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -365,8 +365,6 @@ static int ipip_err(struct sk_buff *skb, u32 info) } err = -ENOENT; - - rcu_read_lock(); t = ipip_tunnel_lookup(dev_net(skb->dev), iph->daddr, iph->saddr); if (t == NULL) goto out; @@ -398,7 +396,7 @@ static int ipip_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - rcu_read_unlock(); + return err; } @@ -416,13 +414,11 @@ static int ipip_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); - rcu_read_lock(); tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); if (tunnel != NULL) { struct pcpu_tstats *tstats; if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - rcu_read_unlock(); kfree_skb(skb); return 0; } @@ -445,11 +441,8 @@ static int ipip_rcv(struct sk_buff *skb) ipip_ecn_decapsulate(iph, skb); netif_rx(skb); - - rcu_read_unlock(); return 0; } - rcu_read_unlock(); return -1; } diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3bd1bfc01f85..3ed54ffd8d50 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -545,7 +545,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) err = -ENOENT; - rcu_read_lock(); t = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->daddr, @@ -579,7 +578,6 @@ static int ipip6_err(struct sk_buff *skb, u32 info) t->err_count = 1; t->err_time = jiffies; out: - rcu_read_unlock(); return err; } @@ -599,7 +597,6 @@ static int ipip6_rcv(struct sk_buff *skb) iph = ip_hdr(skb); - rcu_read_lock(); tunnel = ipip6_tunnel_lookup(dev_net(skb->dev), skb->dev, iph->saddr, iph->daddr); if (tunnel != NULL) { @@ -615,7 +612,6 @@ static int ipip6_rcv(struct sk_buff *skb) if ((tunnel->dev->priv_flags & IFF_ISATAP) && !isatap_chksrc(skb, iph, tunnel)) { tunnel->dev->stats.rx_errors++; - rcu_read_unlock(); kfree_skb(skb); return 0; } @@ -630,12 +626,10 @@ static int ipip6_rcv(struct sk_buff *skb) netif_rx(skb); - rcu_read_unlock(); return 0; } /* no tunnel matched, let upstream know, ipsec may handle it */ - rcu_read_unlock(); return 1; out: kfree_skb(skb); -- cgit v1.2.3 From eccc1bb8d4b4cf68d3c9becb083fa94ada7d495c Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Tue, 25 Sep 2012 11:02:48 +0000 Subject: tunnel: drop packet if ECN present with not-ECT Linux tunnels were written before RFC6040 and therefore never implemented the corner case of ECN getting set in the outer header and the inner header not being ready for it. Section 4.2. Default Tunnel Egress Behaviour. o If the inner ECN field is Not-ECT, the decapsulator MUST NOT propagate any other ECN codepoint onwards. This is because the inner Not-ECT marking is set by transports that rely on dropped packets as an indication of congestion and would not understand or respond to any other ECN codepoint [RFC4774]. Specifically: * If the inner ECN field is Not-ECT and the outer ECN field is CE, the decapsulator MUST drop the packet. * If the inner ECN field is Not-ECT and the outer ECN field is Not-ECT, ECT(0), or ECT(1), the decapsulator MUST forward the outgoing packet with the ECN field cleared to Not-ECT. This patch moves the ECN decap logic out of the individual tunnels into a common place. It also adds logging to allow detecting broken systems that set ECN bits incorrectly when tunneling (or an intermediate router might be changing the header). Overloads rx_frame_error to keep track of ECN related error. Thanks to Chris Wright who caught this while reviewing the new VXLAN tunnel. This code was tested by injecting faulty logic in other end GRE to send incorrectly encapsulated packets. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/net/inet_ecn.h | 76 ++++++++++++++++++++++++++++++++++++++++++++++++++ net/ipv4/ip_gre.c | 38 ++++++++++++++----------- net/ipv4/ipip.c | 42 +++++++++++++++++----------- net/ipv6/ip6_gre.c | 54 ++++++++++++++++------------------- 4 files changed, 147 insertions(+), 63 deletions(-) (limited to 'net/ipv6') diff --git a/include/net/inet_ecn.h b/include/net/inet_ecn.h index 2fa14691869c..aab73757bc4d 100644 --- a/include/net/inet_ecn.h +++ b/include/net/inet_ecn.h @@ -15,6 +15,8 @@ enum { INET_ECN_MASK = 3, }; +extern int sysctl_tunnel_ecn_log; + static inline int INET_ECN_is_ce(__u8 dsfield) { return (dsfield & INET_ECN_MASK) == INET_ECN_CE; @@ -145,4 +147,78 @@ static inline int INET_ECN_set_ce(struct sk_buff *skb) return 0; } +/* + * RFC 6080 4.2 + * To decapsulate the inner header at the tunnel egress, a compliant + * tunnel egress MUST set the outgoing ECN field to the codepoint at the + * intersection of the appropriate arriving inner header (row) and outer + * header (column) in Figure 4 + * + * +---------+------------------------------------------------+ + * |Arriving | Arriving Outer Header | + * | Inner +---------+------------+------------+------------+ + * | Header | Not-ECT | ECT(0) | ECT(1) | CE | + * +---------+---------+------------+------------+------------+ + * | Not-ECT | Not-ECT |Not-ECT(!!!)|Not-ECT(!!!)| (!!!)| + * | ECT(0) | ECT(0) | ECT(0) | ECT(1) | CE | + * | ECT(1) | ECT(1) | ECT(1) (!) | ECT(1) | CE | + * | CE | CE | CE | CE(!!!)| CE | + * +---------+---------+------------+------------+------------+ + * + * Figure 4: New IP in IP Decapsulation Behaviour + * + * returns 0 on success + * 1 if something is broken and should be logged (!!! above) + * 2 if packet should be dropped + */ +static inline int INET_ECN_decapsulate(struct sk_buff *skb, + __u8 outer, __u8 inner) +{ + if (INET_ECN_is_not_ect(inner)) { + switch (outer & INET_ECN_MASK) { + case INET_ECN_NOT_ECT: + return 0; + case INET_ECN_ECT_0: + case INET_ECN_ECT_1: + return 1; + case INET_ECN_CE: + return 2; + } + } + + if (INET_ECN_is_ce(outer)) + INET_ECN_set_ce(skb); + + return 0; +} + +static inline int IP_ECN_decapsulate(const struct iphdr *oiph, + struct sk_buff *skb) +{ + __u8 inner; + + if (skb->protocol == htons(ETH_P_IP)) + inner = ip_hdr(skb)->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + inner = ipv6_get_dsfield(ipv6_hdr(skb)); + else + return 0; + + return INET_ECN_decapsulate(skb, oiph->tos, inner); +} + +static inline int IP6_ECN_decapsulate(const struct ipv6hdr *oipv6h, + struct sk_buff *skb) +{ + __u8 inner; + + if (skb->protocol == htons(ETH_P_IP)) + inner = ip_hdr(skb)->tos; + else if (skb->protocol == htons(ETH_P_IPV6)) + inner = ipv6_get_dsfield(ipv6_hdr(skb)); + else + return 0; + + return INET_ECN_decapsulate(skb, ipv6_get_dsfield(oipv6h), inner); +} #endif diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c index 1c012cb2cb94..ef0b861ce044 100644 --- a/net/ipv4/ip_gre.c +++ b/net/ipv4/ip_gre.c @@ -120,6 +120,10 @@ Alexey Kuznetsov. */ +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + static struct rtnl_link_ops ipgre_link_ops __read_mostly; static int ipgre_tunnel_init(struct net_device *dev); static void ipgre_tunnel_setup(struct net_device *dev); @@ -204,7 +208,9 @@ static struct rtnl_link_stats64 *ipgre_get_stats64(struct net_device *dev, tot->rx_crc_errors = dev->stats.rx_crc_errors; tot->rx_fifo_errors = dev->stats.rx_fifo_errors; tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; tot->rx_errors = dev->stats.rx_errors; + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; tot->tx_carrier_errors = dev->stats.tx_carrier_errors; tot->tx_dropped = dev->stats.tx_dropped; @@ -587,17 +593,6 @@ static void ipgre_err(struct sk_buff *skb, u32 info) t->err_time = jiffies; } -static inline void ipgre_ecn_decapsulate(const struct iphdr *iph, struct sk_buff *skb) -{ - if (INET_ECN_is_ce(iph->tos)) { - if (skb->protocol == htons(ETH_P_IP)) { - IP_ECN_set_ce(ip_hdr(skb)); - } else if (skb->protocol == htons(ETH_P_IPV6)) { - IP6_ECN_set_ce(ipv6_hdr(skb)); - } - } -} - static inline u8 ipgre_ecn_encapsulate(u8 tos, const struct iphdr *old_iph, struct sk_buff *skb) { @@ -620,6 +615,7 @@ static int ipgre_rcv(struct sk_buff *skb) struct ip_tunnel *tunnel; int offset = 4; __be16 gre_proto; + int err; if (!pskb_may_pull(skb, 16)) goto drop; @@ -723,17 +719,27 @@ static int ipgre_rcv(struct sk_buff *skb) skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); - ipgre_ecn_decapsulate(iph, skb); - netif_rx(skb); return 0; diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c index 618bde867ac1..e15b45297c09 100644 --- a/net/ipv4/ipip.c +++ b/net/ipv4/ipip.c @@ -120,6 +120,10 @@ #define HASH_SIZE 16 #define HASH(addr) (((__force u32)addr^((__force u32)addr>>4))&0xF) +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + static int ipip_net_id __read_mostly; struct ipip_net { struct ip_tunnel __rcu *tunnels_r_l[HASH_SIZE]; @@ -400,28 +404,18 @@ out: return err; } -static inline void ipip_ecn_decapsulate(const struct iphdr *outer_iph, - struct sk_buff *skb) -{ - struct iphdr *inner_iph = ip_hdr(skb); - - if (INET_ECN_is_ce(outer_iph->tos)) - IP_ECN_set_ce(inner_iph); -} - static int ipip_rcv(struct sk_buff *skb) { struct ip_tunnel *tunnel; const struct iphdr *iph = ip_hdr(skb); + int err; tunnel = ipip_tunnel_lookup(dev_net(skb->dev), iph->saddr, iph->daddr); if (tunnel != NULL) { struct pcpu_tstats *tstats; - if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) { - kfree_skb(skb); - return 0; - } + if (!xfrm4_policy_check(NULL, XFRM_POLICY_IN, skb)) + goto drop; secpath_reset(skb); @@ -430,21 +424,35 @@ static int ipip_rcv(struct sk_buff *skb) skb->protocol = htons(ETH_P_IP); skb->pkt_type = PACKET_HOST; + __skb_tunnel_rx(skb, tunnel->dev); + + err = IP_ECN_decapsulate(iph, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI4 with TOS=%#x\n", + &iph->saddr, iph->tos); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - __skb_tunnel_rx(skb, tunnel->dev); - - ipip_ecn_decapsulate(iph, skb); - netif_rx(skb); return 0; } return -1; + +drop: + kfree_skb(skb); + return 0; } /* diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index b987d4db790f..613a16647741 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -56,6 +56,10 @@ #include +static bool log_ecn_error = true; +module_param(log_ecn_error, bool, 0644); +MODULE_PARM_DESC(log_ecn_error, "Log packets received with corrupted ECN"); + #define IPV6_TCLASS_MASK (IPV6_FLOWINFO_MASK & ~IPV6_FLOWLABEL_MASK) #define IPV6_TCLASS_SHIFT 20 @@ -149,7 +153,9 @@ static struct rtnl_link_stats64 *ip6gre_get_stats64(struct net_device *dev, tot->rx_crc_errors = dev->stats.rx_crc_errors; tot->rx_fifo_errors = dev->stats.rx_fifo_errors; tot->rx_length_errors = dev->stats.rx_length_errors; + tot->rx_frame_errors = dev->stats.rx_frame_errors; tot->rx_errors = dev->stats.rx_errors; + tot->tx_fifo_errors = dev->stats.tx_fifo_errors; tot->tx_carrier_errors = dev->stats.tx_carrier_errors; tot->tx_dropped = dev->stats.tx_dropped; @@ -489,28 +495,6 @@ static void ip6gre_err(struct sk_buff *skb, struct inet6_skb_parm *opt, t->err_time = jiffies; } -static inline void ip6gre_ecn_decapsulate_ipv4(const struct ip6_tnl *t, - const struct ipv6hdr *ipv6h, struct sk_buff *skb) -{ - __u8 dsfield = ipv6_get_dsfield(ipv6h) & ~INET_ECN_MASK; - - if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) - ipv4_change_dsfield(ip_hdr(skb), INET_ECN_MASK, dsfield); - - if (INET_ECN_is_ce(dsfield)) - IP_ECN_set_ce(ip_hdr(skb)); -} - -static inline void ip6gre_ecn_decapsulate_ipv6(const struct ip6_tnl *t, - const struct ipv6hdr *ipv6h, struct sk_buff *skb) -{ - if (t->parms.flags & IP6_TNL_F_RCV_DSCP_COPY) - ipv6_copy_dscp(ipv6_get_dsfield(ipv6h), ipv6_hdr(skb)); - - if (INET_ECN_is_ce(ipv6_get_dsfield(ipv6h))) - IP6_ECN_set_ce(ipv6_hdr(skb)); -} - static int ip6gre_rcv(struct sk_buff *skb) { const struct ipv6hdr *ipv6h; @@ -522,6 +506,7 @@ static int ip6gre_rcv(struct sk_buff *skb) struct ip6_tnl *tunnel; int offset = 4; __be16 gre_proto; + int err; if (!pskb_may_pull(skb, sizeof(struct in6_addr))) goto drop; @@ -625,20 +610,29 @@ static int ip6gre_rcv(struct sk_buff *skb) skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN); } + __skb_tunnel_rx(skb, tunnel->dev); + + skb_reset_network_header(skb); + + err = IP6_ECN_decapsulate(ipv6h, skb); + if (unlikely(err)) { + if (log_ecn_error) + net_info_ratelimited("non-ECT from %pI6 with dsfield=%#x\n", + &ipv6h->saddr, + ipv6_get_dsfield(ipv6h)); + if (err > 1) { + ++tunnel->dev->stats.rx_frame_errors; + ++tunnel->dev->stats.rx_errors; + goto drop; + } + } + tstats = this_cpu_ptr(tunnel->dev->tstats); u64_stats_update_begin(&tstats->syncp); tstats->rx_packets++; tstats->rx_bytes += skb->len; u64_stats_update_end(&tstats->syncp); - __skb_tunnel_rx(skb, tunnel->dev); - - skb_reset_network_header(skb); - if (skb->protocol == htons(ETH_P_IP)) - ip6gre_ecn_decapsulate_ipv4(tunnel, ipv6h, skb); - else if (skb->protocol == htons(ETH_P_IPV6)) - ip6gre_ecn_decapsulate_ipv6(tunnel, ipv6h, skb); - netif_rx(skb); return 0; -- cgit v1.2.3 From 4b7cc7fc26598914ba202d04d4196efb26b22e69 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 25 Sep 2012 06:07:50 +0000 Subject: nf_defrag_ipv6: fix oops on module unloading fix copy-paste error introduced in linux-next commit "ipv6: add a new namespace for nf_conntrack_reasm" Signed-off-by: Konstantin Khlebnikov Cc: Amerigo Wang Cc: David S. Miller Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/ipv6/netfilter/nf_conntrack_reasm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net/ipv6') diff --git a/net/ipv6/netfilter/nf_conntrack_reasm.c b/net/ipv6/netfilter/nf_conntrack_reasm.c index 1af12fde08df..18bd9bbbd1c6 100644 --- a/net/ipv6/netfilter/nf_conntrack_reasm.c +++ b/net/ipv6/netfilter/nf_conntrack_reasm.c @@ -106,7 +106,7 @@ static int __net_init nf_ct_frag6_sysctl_register(struct net *net) if (hdr == NULL) goto err_reg; - net->ipv6.sysctl.frags_hdr = hdr; + net->nf_frag.sysctl.frags_hdr = hdr; return 0; err_reg: -- cgit v1.2.3 From bcc452935da124f26885e969ec5a78a8735d1452 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 26 Sep 2012 01:39:20 +0000 Subject: ipv6: gre: remove ip6gre_header_parse() dev_parse_header() callers provide 8 bytes of storage, so it's not possible to store an IPv6 address. Signed-off-by: Eric Dumazet Reported-by: Dan Carpenter Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 613a16647741..0185679c5f53 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1279,16 +1279,8 @@ static int ip6gre_header(struct sk_buff *skb, struct net_device *dev, return -t->hlen; } -static int ip6gre_header_parse(const struct sk_buff *skb, unsigned char *haddr) -{ - const struct ipv6hdr *ipv6h = (const struct ipv6hdr *)skb_mac_header(skb); - memcpy(haddr, &ipv6h->saddr, sizeof(struct in6_addr)); - return sizeof(struct in6_addr); -} - static const struct header_ops ip6gre_header_ops = { .create = ip6gre_header, - .parse = ip6gre_header_parse, }; static const struct net_device_ops ip6gre_netdev_ops = { -- cgit v1.2.3 From 188c517a050ec5b123e72cab76ea213721e5bd9d Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 25 Sep 2012 15:17:07 +0000 Subject: ipv6: return errno pointers consistently for fib6_add_1() fib6_add_1() should consistently return errno pointers, rather than a mixture of NULL and errno pointers. Signed-off-by: Lin Ming Signed-off-by: David S. Miller --- net/ipv6/ip6_fib.c | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c index 286acfc21250..24995a93ef8c 100644 --- a/net/ipv6/ip6_fib.c +++ b/net/ipv6/ip6_fib.c @@ -514,7 +514,7 @@ static struct fib6_node * fib6_add_1(struct fib6_node *root, void *addr, ln = node_alloc(); if (!ln) - return NULL; + return ERR_PTR(-ENOMEM); ln->fn_bit = plen; ln->parent = pn; @@ -561,7 +561,7 @@ insert_above: node_free(in); if (ln) node_free(ln); - return NULL; + return ERR_PTR(-ENOMEM); } /* @@ -611,7 +611,7 @@ insert_above: ln = node_alloc(); if (!ln) - return NULL; + return ERR_PTR(-ENOMEM); ln->fn_bit = plen; @@ -777,11 +777,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (IS_ERR(fn)) { err = PTR_ERR(fn); - fn = NULL; - } - - if (!fn) goto out; + } pn = fn; @@ -820,15 +817,12 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) allow_create, replace_required); if (IS_ERR(sn)) { - err = PTR_ERR(sn); - sn = NULL; - } - if (!sn) { /* If it is failed, discard just allocated root, and then (in st_failure) stale node in main tree. */ node_free(sfn); + err = PTR_ERR(sn); goto st_failure; } @@ -843,10 +837,8 @@ int fib6_add(struct fib6_node *root, struct rt6_info *rt, struct nl_info *info) if (IS_ERR(sn)) { err = PTR_ERR(sn); - sn = NULL; - } - if (!sn) goto st_failure; + } } if (!fn->leaf) { -- cgit v1.2.3 From 64c6d08e6490fb18cea09bb03686c149946bd818 Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Wed, 26 Sep 2012 00:04:55 +0000 Subject: ipv6: del unreachable route when an addr is deleted on lo When an address is added on loopback (ip -6 a a 2002::1/128 dev lo), two routes are added: - one in the local table: local 2002::1 via :: dev lo proto none metric 0 - one the in main table (for the prefix): unreachable 2002::1 dev lo proto kernel metric 256 error -101 When the address is deleted, the route inserted in the main table remains because we use rt6_lookup(), which returns NULL when dst->error is set, which is the case here! Thus, it is better to use ip6_route_lookup() to avoid this kind of filter. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 719a828fb67f..480e68422efb 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -788,10 +788,16 @@ static void ipv6_del_addr(struct inet6_ifaddr *ifp) struct in6_addr prefix; struct rt6_info *rt; struct net *net = dev_net(ifp->idev->dev); + struct flowi6 fl6 = {}; + ipv6_addr_prefix(&prefix, &ifp->addr, ifp->prefix_len); - rt = rt6_lookup(net, &prefix, NULL, ifp->idev->dev->ifindex, 1); + fl6.flowi6_oif = ifp->idev->dev->ifindex; + fl6.daddr = prefix; + rt = (struct rt6_info *)ip6_route_lookup(net, &fl6, + RT6_LOOKUP_F_IFACE); - if (rt && addrconf_is_prefix_route(rt)) { + if (rt != net->ipv6.ip6_null_entry && + addrconf_is_prefix_route(rt)) { if (onlink == 0) { ip6_del_rt(rt); rt = NULL; -- cgit v1.2.3 From 861b650101eb0c627d171eb18de81dddb93d395e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 27 Sep 2012 02:14:53 +0000 Subject: tcp: gro: add checksuming helpers skb with CHECKSUM_NONE cant currently be handled by GRO, and we notice this deep in GRO stack in tcp[46]_gro_receive() But there are cases where GRO can be a benefit, even with a lack of checksums. This preliminary work is needed to add GRO support to tunnels. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 19 ++++++++++++++++--- net/ipv6/tcp_ipv6.c | 20 +++++++++++++++++--- 2 files changed, 33 insertions(+), 6 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 385eb79cf6aa..75735c9a6a9d 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -2803,6 +2803,8 @@ void tcp4_proc_exit(void) struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) { const struct iphdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -2811,11 +2813,22 @@ struct sk_buff **tcp4_gro_receive(struct sk_buff **head, struct sk_buff *skb) skb->ip_summed = CHECKSUM_UNNECESSARY; break; } - - /* fall through */ - case CHECKSUM_NONE: +flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; + + case CHECKSUM_NONE: + wsum = csum_tcpudp_nofold(iph->saddr, iph->daddr, + skb_gro_len(skb), IPPROTO_TCP, 0); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; } return tcp_gro_receive(head, skb); diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index d6212d6bc8d8..49c890386ce9 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -763,6 +763,8 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, struct sk_buff *skb) { const struct ipv6hdr *iph = skb_gro_network_header(skb); + __wsum wsum; + __sum16 sum; switch (skb->ip_summed) { case CHECKSUM_COMPLETE: @@ -771,11 +773,23 @@ static struct sk_buff **tcp6_gro_receive(struct sk_buff **head, skb->ip_summed = CHECKSUM_UNNECESSARY; break; } - - /* fall through */ - case CHECKSUM_NONE: +flush: NAPI_GRO_CB(skb)->flush = 1; return NULL; + + case CHECKSUM_NONE: + wsum = ~csum_unfold(csum_ipv6_magic(&iph->saddr, &iph->daddr, + skb_gro_len(skb), + IPPROTO_TCP, 0)); + sum = csum_fold(skb_checksum(skb, + skb_gro_offset(skb), + skb_gro_len(skb), + wsum)); + if (sum) + goto flush; + + skb->ip_summed = CHECKSUM_UNNECESSARY; + break; } return tcp_gro_receive(head, skb); -- cgit v1.2.3 From 62b54dd91567686a1cb118f76a72d5f4764a86dd Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Mon, 1 Oct 2012 23:19:14 +0000 Subject: ipv6: don't add link local route when there is no link local address When an address is added on loopback (ip -6 a a 2002::1/128 dev lo), a route to fe80::/64 is added in the main table: unreachable fe80::/64 dev lo proto kernel metric 256 error -101 This route does not match any prefix (no fe80:: address on lo). In fact, addrconf_dev_config() will not add link local address because this function filters interfaces by type. If the link local address is added manually, the route to the link local prefix will be automatically added by addrconf_add_linklocal(). Note also, that this route is not deleted when the address is removed. After looking at the code, it seems that addrconf_add_lroute() is redundant with addrconf_add_linklocal(), because this function will add the link local route when the link local address is configured. Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 15 ++------------- 1 file changed, 2 insertions(+), 13 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 480e68422efb..d7c56f8a5b4e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1769,14 +1769,6 @@ static void sit_route_add(struct net_device *dev) } #endif -static void addrconf_add_lroute(struct net_device *dev) -{ - struct in6_addr addr; - - ipv6_addr_set(&addr, htonl(0xFE800000), 0, 0, 0); - addrconf_prefix_route(&addr, 64, dev, 0, 0); -} - static struct inet6_dev *addrconf_add_dev(struct net_device *dev) { struct inet6_dev *idev; @@ -1794,8 +1786,6 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) if (!(dev->flags & IFF_LOOPBACK)) addrconf_add_mroute(dev); - /* Add link local route */ - addrconf_add_lroute(dev); return idev; } @@ -2474,10 +2464,9 @@ static void addrconf_sit_config(struct net_device *dev) sit_add_v4_addrs(idev); - if (dev->flags&IFF_POINTOPOINT) { + if (dev->flags&IFF_POINTOPOINT) addrconf_add_mroute(dev); - addrconf_add_lroute(dev); - } else + else sit_route_add(dev); } #endif -- cgit v1.2.3 From 6825a26c2dc21eb4f8df9c06d3786ddec97cf53b Mon Sep 17 00:00:00 2001 From: Gao feng Date: Wed, 19 Sep 2012 19:25:34 +0000 Subject: ipv6: release reference of ip6_null_entry's dst entry in __ip6_del_rt as we hold dst_entry before we call __ip6_del_rt, so we should alse call dst_release not only return -ENOENT when the rt6_info is ip6_null_entry. and we already hold the dst entry, so I think it's safe to call dst_release out of the write-read lock. Signed-off-by: Gao feng Signed-off-by: David S. Miller --- net/ipv6/route.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'net/ipv6') diff --git a/net/ipv6/route.c b/net/ipv6/route.c index d1ddbc6ddac5..7c7e963260e1 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -1593,17 +1593,18 @@ static int __ip6_del_rt(struct rt6_info *rt, struct nl_info *info) struct fib6_table *table; struct net *net = dev_net(rt->dst.dev); - if (rt == net->ipv6.ip6_null_entry) - return -ENOENT; + if (rt == net->ipv6.ip6_null_entry) { + err = -ENOENT; + goto out; + } table = rt->rt6i_table; write_lock_bh(&table->tb6_lock); - err = fib6_del(rt, info); - dst_release(&rt->dst); - write_unlock_bh(&table->tb6_lock); +out: + dst_release(&rt->dst); return err; } -- cgit v1.2.3