From a7f1884554b81bd68cd435d72f09a3527629ac43 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 12 May 2016 14:43:54 +0200 Subject: netfilter: nfnetlink_queue: fix timestamp attribute Since 4.4 we erronously use timestamp of the netlink skb (which is zero). Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1066 Fixes: b28b1e826f818c30ea7 ("netfilter: nfnetlink_queue: use y2038 safe timestamp") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nfnetlink_queue.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index cb5b630a645b..e34256a0e7e9 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -499,7 +499,7 @@ nfqnl_build_packet_message(struct net *net, struct nfqnl_instance *queue, if (entskb->tstamp.tv64) { struct nfqnl_msg_packet_timestamp ts; - struct timespec64 kts = ktime_to_timespec64(skb->tstamp); + struct timespec64 kts = ktime_to_timespec64(entskb->tstamp); ts.sec = cpu_to_be64(kts.tv_sec); ts.usec = cpu_to_be64(kts.tv_nsec / NSEC_PER_USEC); -- cgit v1.2.3 From 720b287d8382616a29524dd98264ac1e3a069e9f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 13 May 2016 22:48:51 +0200 Subject: netfilter: conntrack: remove leftover binary sysctl define Users got removed in f8572d8f2a2ba ("sysctl net: Remove unused binary sysctl code"). Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_standalone.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0f1a45bcacb2..2933db30098a 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -489,8 +489,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { { } }; -#define NET_NF_CONNTRACK_MAX 2089 - static struct ctl_table nf_ct_netfilter_table[] = { { .procname = "nf_conntrack_max", -- cgit v1.2.3 From dc3ee32e96d74dd6c80eed63af5065cb75899299 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 13 May 2016 21:18:52 -0500 Subject: netfilter: nf_queue: Make the queue_handler pernet Florian Weber reported: > Under full load (unshare() in loop -> OOM conditions) we can > get kernel panic: > > BUG: unable to handle kernel NULL pointer dereference at 0000000000000008 > IP: [] nfqnl_nf_hook_drop+0x35/0x70 > [..] > task: ffff88012dfa3840 ti: ffff88012dffc000 task.ti: ffff88012dffc000 > RIP: 0010:[] [] nfqnl_nf_hook_drop+0x35/0x70 > RSP: 0000:ffff88012dfffd80 EFLAGS: 00010206 > RAX: 0000000000000008 RBX: ffffffff81add0c0 RCX: ffff88013fd80000 > [..] > Call Trace: > [] nf_queue_nf_hook_drop+0x18/0x20 > [] nf_unregister_net_hook+0xdb/0x150 > [] netfilter_net_exit+0x2f/0x60 > [] ops_exit_list.isra.4+0x38/0x60 > [] setup_net+0xc2/0x120 > [] copy_net_ns+0x79/0x120 > [] create_new_namespaces+0x11b/0x1e0 > [] unshare_nsproxy_namespaces+0x57/0xa0 > [] SyS_unshare+0x1b2/0x340 > [] entry_SYSCALL_64_fastpath+0x1e/0xa8 > Code: 65 00 48 89 e5 41 56 41 55 41 54 53 83 e8 01 48 8b 97 70 12 00 00 48 98 49 89 f4 4c 8b 74 c2 18 4d 8d 6e 08 49 81 c6 88 00 00 00 <49> 8b 5d 00 48 85 db 74 1a 48 89 df 4c 89 e2 48 c7 c6 90 68 47 > The simple fix for this requires a new pernet variable for struct nf_queue that indicates when it is safe to use the dynamically allocated nf_queue state. As we need a variable anyway make nf_register_queue_handler and nf_unregister_queue_handler pernet. This allows the existing logic of when it is safe to use the state from the nfnetlink_queue module to be reused with no changes except for making it per net. The syncrhonize_rcu from nf_unregister_queue_handler is moved to a new function nfnl_queue_net_exit_batch so that the worst case of having a syncrhonize_rcu in the pernet exit path is not experienced in batch mode. Reported-by: Florian Westphal Signed-off-by: "Eric W. Biederman" Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_queue.h | 4 ++-- include/net/netns/netfilter.h | 2 ++ net/netfilter/nf_queue.c | 17 ++++++++--------- net/netfilter/nfnetlink_queue.c | 18 ++++++++++++------ 4 files changed, 24 insertions(+), 17 deletions(-) (limited to 'net') diff --git a/include/net/netfilter/nf_queue.h b/include/net/netfilter/nf_queue.h index 9c5638ad872e..0dbce55437f2 100644 --- a/include/net/netfilter/nf_queue.h +++ b/include/net/netfilter/nf_queue.h @@ -28,8 +28,8 @@ struct nf_queue_handler { struct nf_hook_ops *ops); }; -void nf_register_queue_handler(const struct nf_queue_handler *qh); -void nf_unregister_queue_handler(void); +void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh); +void nf_unregister_queue_handler(struct net *net); void nf_reinject(struct nf_queue_entry *entry, unsigned int verdict); void nf_queue_entry_get_refs(struct nf_queue_entry *entry); diff --git a/include/net/netns/netfilter.h b/include/net/netns/netfilter.h index 38aa4983e2a9..36d723579af2 100644 --- a/include/net/netns/netfilter.h +++ b/include/net/netns/netfilter.h @@ -5,11 +5,13 @@ struct proc_dir_entry; struct nf_logger; +struct nf_queue_handler; struct netns_nf { #if defined CONFIG_PROC_FS struct proc_dir_entry *proc_netfilter; #endif + const struct nf_queue_handler __rcu *queue_handler; const struct nf_logger __rcu *nf_loggers[NFPROTO_NUMPROTO]; #ifdef CONFIG_SYSCTL struct ctl_table_header *nf_log_dir_header; diff --git a/net/netfilter/nf_queue.c b/net/netfilter/nf_queue.c index 5baa8e24e6ac..b19ad20a705c 100644 --- a/net/netfilter/nf_queue.c +++ b/net/netfilter/nf_queue.c @@ -26,23 +26,21 @@ * Once the queue is registered it must reinject all packets it * receives, no matter what. */ -static const struct nf_queue_handler __rcu *queue_handler __read_mostly; /* return EBUSY when somebody else is registered, return EEXIST if the * same handler is registered, return 0 in case of success. */ -void nf_register_queue_handler(const struct nf_queue_handler *qh) +void nf_register_queue_handler(struct net *net, const struct nf_queue_handler *qh) { /* should never happen, we only have one queueing backend in kernel */ - WARN_ON(rcu_access_pointer(queue_handler)); - rcu_assign_pointer(queue_handler, qh); + WARN_ON(rcu_access_pointer(net->nf.queue_handler)); + rcu_assign_pointer(net->nf.queue_handler, qh); } EXPORT_SYMBOL(nf_register_queue_handler); /* The caller must flush their queue before this */ -void nf_unregister_queue_handler(void) +void nf_unregister_queue_handler(struct net *net) { - RCU_INIT_POINTER(queue_handler, NULL); - synchronize_rcu(); + RCU_INIT_POINTER(net->nf.queue_handler, NULL); } EXPORT_SYMBOL(nf_unregister_queue_handler); @@ -103,7 +101,7 @@ void nf_queue_nf_hook_drop(struct net *net, struct nf_hook_ops *ops) const struct nf_queue_handler *qh; rcu_read_lock(); - qh = rcu_dereference(queue_handler); + qh = rcu_dereference(net->nf.queue_handler); if (qh) qh->nf_hook_drop(net, ops); rcu_read_unlock(); @@ -122,9 +120,10 @@ int nf_queue(struct sk_buff *skb, struct nf_queue_entry *entry = NULL; const struct nf_afinfo *afinfo; const struct nf_queue_handler *qh; + struct net *net = state->net; /* QUEUE == DROP if no one is waiting, to be safe. */ - qh = rcu_dereference(queue_handler); + qh = rcu_dereference(net->nf.queue_handler); if (!qh) { status = -ESRCH; goto err; diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c index e34256a0e7e9..309ac026aac2 100644 --- a/net/netfilter/nfnetlink_queue.c +++ b/net/netfilter/nfnetlink_queue.c @@ -1377,21 +1377,29 @@ static int __net_init nfnl_queue_net_init(struct net *net) net->nf.proc_netfilter, &nfqnl_file_ops)) return -ENOMEM; #endif + nf_register_queue_handler(net, &nfqh); return 0; } static void __net_exit nfnl_queue_net_exit(struct net *net) { + nf_unregister_queue_handler(net); #ifdef CONFIG_PROC_FS remove_proc_entry("nfnetlink_queue", net->nf.proc_netfilter); #endif } +static void nfnl_queue_net_exit_batch(struct list_head *net_exit_list) +{ + synchronize_rcu(); +} + static struct pernet_operations nfnl_queue_net_ops = { - .init = nfnl_queue_net_init, - .exit = nfnl_queue_net_exit, - .id = &nfnl_queue_net_id, - .size = sizeof(struct nfnl_queue_net), + .init = nfnl_queue_net_init, + .exit = nfnl_queue_net_exit, + .exit_batch = nfnl_queue_net_exit_batch, + .id = &nfnl_queue_net_id, + .size = sizeof(struct nfnl_queue_net), }; static int __init nfnetlink_queue_init(void) @@ -1412,7 +1420,6 @@ static int __init nfnetlink_queue_init(void) } register_netdevice_notifier(&nfqnl_dev_notifier); - nf_register_queue_handler(&nfqh); return status; cleanup_netlink_notifier: @@ -1424,7 +1431,6 @@ out: static void __exit nfnetlink_queue_fini(void) { - nf_unregister_queue_handler(); unregister_netdevice_notifier(&nfqnl_dev_notifier); nfnetlink_subsys_unregister(&nfqnl_subsys); netlink_unregister_notifier(&nfqnl_rtnl_notifier); -- cgit v1.2.3 From b7a8daa9f3d1688e994f5557577d3252c94ec282 Mon Sep 17 00:00:00 2001 From: Taehee Yoo Date: Sat, 14 May 2016 22:19:53 +0900 Subject: netfilter: nf_ct_helper: Fix helper unregister count. helpers should unregister the only registered ports. but, helper cannot have correct registered ports value when failed to register. Signed-off-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_ftp.c | 1 + net/netfilter/nf_conntrack_irc.c | 1 + net/netfilter/nf_conntrack_sane.c | 1 + net/netfilter/nf_conntrack_sip.c | 1 + net/netfilter/nf_conntrack_tftp.c | 1 + 5 files changed, 5 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_ftp.c b/net/netfilter/nf_conntrack_ftp.c index 883c691ec8d0..19efeba02abb 100644 --- a/net/netfilter/nf_conntrack_ftp.c +++ b/net/netfilter/nf_conntrack_ftp.c @@ -632,6 +632,7 @@ static int __init nf_conntrack_ftp_init(void) if (ret) { pr_err("failed to register helper for pf: %d port: %d\n", ftp[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_ftp_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_irc.c b/net/netfilter/nf_conntrack_irc.c index 8b6da2719600..f97ac61d2536 100644 --- a/net/netfilter/nf_conntrack_irc.c +++ b/net/netfilter/nf_conntrack_irc.c @@ -271,6 +271,7 @@ static int __init nf_conntrack_irc_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", irc[i].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_irc_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_sane.c b/net/netfilter/nf_conntrack_sane.c index 7523a575f6d1..3fcbaab83b3d 100644 --- a/net/netfilter/nf_conntrack_sane.c +++ b/net/netfilter/nf_conntrack_sane.c @@ -223,6 +223,7 @@ static int __init nf_conntrack_sane_init(void) if (ret) { pr_err("failed to register helper for pf: %d port: %d\n", sane[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_sane_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c index 3e06402739e0..f72ba5587588 100644 --- a/net/netfilter/nf_conntrack_sip.c +++ b/net/netfilter/nf_conntrack_sip.c @@ -1669,6 +1669,7 @@ static int __init nf_conntrack_sip_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", sip[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_sip_fini(); return ret; } diff --git a/net/netfilter/nf_conntrack_tftp.c b/net/netfilter/nf_conntrack_tftp.c index 36f964066461..2e65b5430fba 100644 --- a/net/netfilter/nf_conntrack_tftp.c +++ b/net/netfilter/nf_conntrack_tftp.c @@ -142,6 +142,7 @@ static int __init nf_conntrack_tftp_init(void) if (ret) { pr_err("failed to register helper for pf: %u port: %u\n", tftp[i][j].tuple.src.l3num, ports[i]); + ports_c = i; nf_conntrack_tftp_fini(); return ret; } -- cgit v1.2.3 From 83170f3beccccd7ceb4f9a0ac0c4dc736afde90c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 26 May 2016 19:08:10 +0200 Subject: netfilter: nf_dup_ipv6: set again FLOWI_FLAG_KNOWN_NH at flowi6_flags With the commit 48e8aa6e3137 ("ipv6: Set FLOWI_FLAG_KNOWN_NH at flowi6_flags") ip6_pol_route() callers were asked to to set the FLOWI_FLAG_KNOWN_NH properly and xt_TEE was updated accordingly, but with the later refactor in commit bbde9fc1824a ("netfilter: factor out packet duplication for IPv4/IPv6") the flowi6_flags update was lost. This commit re-add it just before the routing decision. Fixes: bbde9fc1824a ("netfilter: factor out packet duplication for IPv4/IPv6") Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/ipv6/netfilter/nf_dup_ipv6.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/ipv6/netfilter/nf_dup_ipv6.c b/net/ipv6/netfilter/nf_dup_ipv6.c index 6989c70ae29f..4a84b5ad9ecb 100644 --- a/net/ipv6/netfilter/nf_dup_ipv6.c +++ b/net/ipv6/netfilter/nf_dup_ipv6.c @@ -33,6 +33,7 @@ static bool nf_dup_ipv6_route(struct net *net, struct sk_buff *skb, fl6.daddr = *gw; fl6.flowlabel = (__force __be32)(((iph->flow_lbl[0] & 0xF) << 16) | (iph->flow_lbl[1] << 8) | iph->flow_lbl[2]); + fl6.flowi6_flags = FLOWI_FLAG_KNOWN_NH; dst = ip6_route_output(net, NULL, &fl6); if (dst->error) { dst_release(dst); -- cgit v1.2.3 From eaa2bcd6d1d410a52df8c7b05e76d86c0319b7b0 Mon Sep 17 00:00:00 2001 From: Phil Turnbull Date: Fri, 27 May 2016 13:34:04 -0400 Subject: netfilter: nf_tables: validate NFTA_SET_TABLE parameter If the NFTA_SET_TABLE parameter is missing and the NLM_F_DUMP flag is not set, then a NULL pointer dereference is triggered in nf_tables_set_lookup because ctx.table is NULL. Signed-off-by: Phil Turnbull Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 2011977cd79d..6947e255cdd8 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2641,6 +2641,8 @@ static int nf_tables_getset(struct net *net, struct sock *nlsk, /* Only accept unspec with dump */ if (nfmsg->nfgen_family == NFPROTO_UNSPEC) return -EAFNOSUPPORT; + if (!nla[NFTA_SET_TABLE]) + return -EINVAL; set = nf_tables_set_lookup(ctx.table, nla[NFTA_SET_NAME]); if (IS_ERR(set)) -- cgit v1.2.3 From 893e093c786c4256d52809eed697e9d70a6f6643 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 24 May 2016 11:23:51 +0200 Subject: netfilter: nf_ct_helper: bail out on duplicated helpers Don't allow registration of helpers using the same tuple: { l3proto, l4proto, src-port } We lookup for the helper from the packet path using this tuple through __nf_ct_helper_find(). Therefore, we have to avoid having two helpers with the same tuple to ensure predictible behaviour. Don't compare the helper string names anymore since it is valid to register two helpers with the same name, but using different tuples. This is also implicitly fixing up duplicated helper registration via ports= modparam since the name comparison was defeating the tuple duplication validation. Reported-by: Feng Gao Reported-by: Taehee Yoo Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_conntrack_helper.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c index 3b40ec575cd5..48de9be45a9a 100644 --- a/net/netfilter/nf_conntrack_helper.c +++ b/net/netfilter/nf_conntrack_helper.c @@ -361,9 +361,10 @@ EXPORT_SYMBOL_GPL(nf_ct_helper_log); int nf_conntrack_helper_register(struct nf_conntrack_helper *me) { - int ret = 0; - struct nf_conntrack_helper *cur; + struct nf_conntrack_tuple_mask mask = { .src.u.all = htons(0xFFFF) }; unsigned int h = helper_hash(&me->tuple); + struct nf_conntrack_helper *cur; + int ret = 0; BUG_ON(me->expect_policy == NULL); BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES); @@ -371,9 +372,7 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me) mutex_lock(&nf_ct_helper_mutex); hlist_for_each_entry(cur, &nf_ct_helper_hash[h], hnode) { - if (strncmp(cur->name, me->name, NF_CT_HELPER_NAME_LEN) == 0 && - cur->tuple.src.l3num == me->tuple.src.l3num && - cur->tuple.dst.protonum == me->tuple.dst.protonum) { + if (nf_ct_tuple_src_mask_cmp(&cur->tuple, &me->tuple, &mask)) { ret = -EEXIST; goto out; } -- cgit v1.2.3 From fe7a7c57629e8dcbc0e297363a9b2366d67a6dc5 Mon Sep 17 00:00:00 2001 From: Bob Copeland Date: Sun, 15 May 2016 13:19:16 -0400 Subject: mac80211: mesh: flush mesh paths unconditionally Currently, the mesh paths associated with a nexthop station are cleaned up in the following code path: __sta_info_destroy_part1 synchronize_net() __sta_info_destroy_part2 -> cleanup_single_sta -> mesh_sta_cleanup -> mesh_plink_deactivate -> mesh_path_flush_by_nexthop However, there are a couple of problems here: 1) the paths aren't flushed at all if the MPM is running in userspace (e.g. when using wpa_supplicant or authsae) 2) there is no synchronize_rcu between removing the path and readers accessing the nexthop, which means the following race is possible: CPU0 CPU1 ~~~~ ~~~~ sta_info_destroy_part1() synchronize_net() rcu_read_lock() mesh_nexthop_resolve() mpath = mesh_path_lookup() [...] -> mesh_path_flush_by_nexthop() sta = rcu_dereference( mpath->next_hop) kfree(sta) access sta <-- CRASH Fix both of these by unconditionally flushing paths before destroying the sta, and by adding a synchronize_net() after path flush to ensure no active readers can still dereference the sta. Fixes this crash: [ 348.529295] BUG: unable to handle kernel paging request at 00020040 [ 348.530014] IP: [] ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] [ 348.530014] *pde = 00000000 [ 348.530014] Oops: 0000 [#1] PREEMPT [ 348.530014] Modules linked in: drbg ansi_cprng ctr ccm ppp_generic slhc ipt_MASQUERADE nf_nat_masquerade_ipv4 8021q ] [ 348.530014] CPU: 0 PID: 20597 Comm: wget Tainted: G O 4.6.0-rc5-wt=V1 #1 [ 348.530014] Hardware name: To Be Filled By O.E.M./To be filled by O.E.M., BIOS 080016 11/07/2014 [ 348.530014] task: f64fa280 ti: f4f9c000 task.ti: f4f9c000 [ 348.530014] EIP: 0060:[] EFLAGS: 00010246 CPU: 0 [ 348.530014] EIP is at ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] [ 348.530014] EAX: f4ce63e0 EBX: 00000088 ECX: f3788416 EDX: 00020008 [ 348.530014] ESI: 00000000 EDI: 00000088 EBP: f6409a4c ESP: f6409a40 [ 348.530014] DS: 007b ES: 007b FS: 0000 GS: 0033 SS: 0068 [ 348.530014] CR0: 80050033 CR2: 00020040 CR3: 33190000 CR4: 00000690 [ 348.530014] Stack: [ 348.530014] 00000000 f4ce63e0 f5f9bd80 f6409a64 f9291d80 0000ce67 f5d51e00 f4ce63e0 [ 348.530014] f3788416 f6409a80 f9291dc1 f4ce8320 f4ce63e0 f5d51e00 f4ce63e0 f4ce8320 [ 348.530014] f6409a98 f9277f6f 00000000 00000000 0000007c 00000000 f6409b2c f9278dd1 [ 348.530014] Call Trace: [ 348.530014] [] mesh_nexthop_lookup+0xbb/0xc8 [mac80211] [ 348.530014] [] mesh_nexthop_resolve+0x34/0xd8 [mac80211] [ 348.530014] [] ieee80211_xmit+0x92/0xc1 [mac80211] [ 348.530014] [] __ieee80211_subif_start_xmit+0x807/0x83c [mac80211] [ 348.530014] [] ? sch_direct_xmit+0xd7/0x1b3 [ 348.530014] [] ? __local_bh_enable_ip+0x5d/0x7b [ 348.530014] [] ? nf_nat_ipv4_out+0x4c/0xd0 [nf_nat_ipv4] [ 348.530014] [] ? iptable_nat_ipv4_fn+0xf/0xf [iptable_nat] [ 348.530014] [] ? netif_skb_features+0x14d/0x30a [ 348.530014] [] ieee80211_subif_start_xmit+0xa/0xe [mac80211] [ 348.530014] [] dev_hard_start_xmit+0x1f8/0x267 [ 348.530014] [] ? validate_xmit_skb.isra.120.part.121+0x10/0x253 [ 348.530014] [] sch_direct_xmit+0x8b/0x1b3 [ 348.530014] [] __dev_queue_xmit+0x2c8/0x513 [ 348.530014] [] dev_queue_xmit+0xa/0xc [ 348.530014] [] batadv_send_skb_packet+0xd6/0xec [batman_adv] [ 348.530014] [] batadv_send_unicast_skb+0x15/0x4a [batman_adv] [ 348.530014] [] batadv_dat_send_data+0x27e/0x310 [batman_adv] [ 348.530014] [] ? batadv_tt_global_hash_find.isra.11+0x8/0xa [batman_adv] [ 348.530014] [] batadv_dat_snoop_outgoing_arp_request+0x208/0x23d [batman_adv] [ 348.530014] [] batadv_interface_tx+0x206/0x385 [batman_adv] [ 348.530014] [] dev_hard_start_xmit+0x1f8/0x267 [ 348.530014] [] ? validate_xmit_skb.isra.120.part.121+0x10/0x253 [ 348.530014] [] sch_direct_xmit+0x8b/0x1b3 [ 348.530014] [] __dev_queue_xmit+0x2c8/0x513 [ 348.530014] [] ? igb_xmit_frame+0x57/0x72 [igb] [ 348.530014] [] dev_queue_xmit+0xa/0xc [ 348.530014] [] br_dev_queue_push_xmit+0xeb/0xfb [bridge] [ 348.530014] [] br_forward_finish+0x29/0x74 [bridge] [ 348.530014] [] ? deliver_clone+0x3b/0x3b [bridge] [ 348.530014] [] __br_forward+0x89/0xe7 [bridge] [ 348.530014] [] ? br_dev_queue_push_xmit+0xfb/0xfb [bridge] [ 348.530014] [] deliver_clone+0x34/0x3b [bridge] [ 348.530014] [] ? br_flood+0x95/0x95 [bridge] [ 348.530014] [] br_flood+0x77/0x95 [bridge] [ 348.530014] [] br_flood_forward+0x13/0x1a [bridge] [ 348.530014] [] ? br_flood+0x95/0x95 [bridge] [ 348.530014] [] br_handle_frame_finish+0x392/0x3db [bridge] [ 348.530014] [] ? nf_iterate+0x2b/0x6b [ 348.530014] [] br_handle_frame+0x1e6/0x240 [bridge] [ 348.530014] [] ? br_handle_local_finish+0x6a/0x6a [bridge] [ 348.530014] [] __netif_receive_skb_core+0x43a/0x66b [ 348.530014] [] ? br_handle_frame_finish+0x3db/0x3db [bridge] [ 348.530014] [] ? resched_curr+0x19/0x37 [ 348.530014] [] ? check_preempt_wakeup+0xbf/0xfe [ 348.530014] [] ? ktime_get_with_offset+0x5c/0xfc [ 348.530014] [] __netif_receive_skb+0x47/0x55 [ 348.530014] [] netif_receive_skb_internal+0x40/0x5a [ 348.530014] [] napi_gro_receive+0x3a/0x94 [ 348.530014] [] igb_poll+0x6fd/0x9ad [igb] [ 348.530014] [] ? swake_up_locked+0x14/0x26 [ 348.530014] [] net_rx_action+0xde/0x250 [ 348.530014] [] __do_softirq+0x8a/0x163 [ 348.530014] [] ? __hrtimer_tasklet_trampoline+0x19/0x19 [ 348.530014] [] do_softirq_own_stack+0x26/0x2c [ 348.530014] [ 348.530014] [] irq_exit+0x31/0x6f [ 348.530014] [] do_IRQ+0x8d/0xa0 [ 348.530014] [] common_interrupt+0x2c/0x40 [ 348.530014] Code: e7 8c 00 66 81 ff 88 00 75 12 85 d2 75 0e b2 c3 b8 83 e9 29 f9 e8 a7 5f f9 c6 eb 74 66 81 e3 8c 005 [ 348.530014] EIP: [] ieee80211_mps_set_frame_flags+0x40/0xaa [mac80211] SS:ESP 0068:f6409a40 [ 348.530014] CR2: 0000000000020040 [ 348.530014] ---[ end trace 48556ac26779732e ]--- [ 348.530014] Kernel panic - not syncing: Fatal exception in interrupt [ 348.530014] Kernel Offset: disabled Cc: stable@vger.kernel.org Reported-by: Fred Veldini Tested-by: Fred Veldini Signed-off-by: Bob Copeland Signed-off-by: Johannes Berg --- net/mac80211/mesh.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 4c6404e1ad6e..21b1fdf5d01d 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -161,6 +161,10 @@ void mesh_sta_cleanup(struct sta_info *sta) del_timer_sync(&sta->mesh->plink_timer); } + /* make sure no readers can access nexthop sta from here on */ + mesh_path_flush_by_nexthop(sta); + synchronize_net(); + if (changed) ieee80211_mbss_info_change_notify(sdata, changed); } -- cgit v1.2.3 From 6fe04128f158c5ad27e7504bfdf1b12e63331bc9 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Thu, 19 May 2016 17:34:38 +0200 Subject: mac80211: fix fast_tx header alignment The header field is defined as u8[] but also accessed as struct ieee80211_hdr. Enforce an alignment of 2 to prevent unnecessary unaligned accesses, which can be very harmful for performance on many platforms. Fixes: e495c24731a2 ("mac80211: extend fast-xmit for more ciphers") Cc: stable@vger.kernel.org Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- net/mac80211/sta_info.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h index c8b8ccc370eb..78b0ef32dddd 100644 --- a/net/mac80211/sta_info.h +++ b/net/mac80211/sta_info.h @@ -280,7 +280,7 @@ struct ieee80211_fast_tx { u8 sa_offs, da_offs, pn_offs; u8 band; u8 hdr[30 + 2 + IEEE80211_FAST_XMIT_MAX_IV + - sizeof(rfc1042_header)]; + sizeof(rfc1042_header)] __aligned(2); struct rcu_head rcu_head; }; -- cgit v1.2.3 From 7b7eba0f3515fca3296b8881d583f7c1042f5226 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 1 Jun 2016 02:04:44 +0200 Subject: netfilter: x_tables: don't reject valid target size on some architectures Quoting John Stultz: In updating a 32bit arm device from 4.6 to Linus' current HEAD, I noticed I was having some trouble with networking, and realized that /proc/net/ip_tables_names was suddenly empty. Digging through the registration process, it seems we're catching on the: if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && target_offset + sizeof(struct xt_standard_target) != next_offset) return -EINVAL; Where next_offset seems to be 4 bytes larger then the offset + standard_target struct size. next_offset needs to be aligned via XT_ALIGN (so we can access all members of ip(6)t_entry struct). This problem didn't show up on i686 as it only needs 4-byte alignment for u64, but iptables userspace on other 32bit arches does insert extra padding. Reported-by: John Stultz Tested-by: John Stultz Fixes: 7ed2abddd20cf ("netfilter: x_tables: check standard target size too") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index c69c892231d7..2675d580c490 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -612,7 +612,7 @@ int xt_compat_check_entry_offsets(const void *base, const char *elems, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct compat_xt_standard_target) != next_offset) + COMPAT_XT_ALIGN(target_offset + sizeof(struct compat_xt_standard_target)) != next_offset) return -EINVAL; /* compat_xt_entry match has less strict aligment requirements, @@ -694,7 +694,7 @@ int xt_check_entry_offsets(const void *base, return -EINVAL; if (strcmp(t->u.user.name, XT_STANDARD_TARGET) == 0 && - target_offset + sizeof(struct xt_standard_target) != next_offset) + XT_ALIGN(target_offset + sizeof(struct xt_standard_target)) != next_offset) return -EINVAL; return xt_check_entry_match(elems, base + target_offset, -- cgit v1.2.3 From ce25d66ad5f8d921bac5fe2d32d62fa30c0f9a70 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 2 Jun 2016 14:52:43 -0700 Subject: Possible problem with e6afc8ac ("udp: remove headers from UDP packets before queueing") Paul Moore tracked a regression caused by a recent commit, which mistakenly assumed that sk_filter() could be avoided if socket had no current BPF filter. The intent was to avoid udp_lib_checksum_complete() overhead. But sk_filter() also checks skb_pfmemalloc() and security_sock_rcv_skb(), so better call it. Fixes: e6afc8ace6dd ("udp: remove headers from UDP packets before queueing") Signed-off-by: Eric Dumazet Reported-by: Paul Moore Tested-by: Paul Moore Tested-by: Stephen Smalley Cc: samanthakumar Signed-off-by: David S. Miller --- net/ipv4/udp.c | 10 +++++----- net/ipv6/udp.c | 12 ++++++------ 2 files changed, 11 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d56c0559b477..0ff31d97d485 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1618,12 +1618,12 @@ int udp_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) goto csum_error; - if (sk_filter(sk, skb)) - goto drop; - } + + if (sk_filter(sk, skb)) + goto drop; udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 2da1896af934..f421c9f23c5b 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -653,12 +653,12 @@ int udpv6_queue_rcv_skb(struct sock *sk, struct sk_buff *skb) } } - if (rcu_access_pointer(sk->sk_filter)) { - if (udp_lib_checksum_complete(skb)) - goto csum_error; - if (sk_filter(sk, skb)) - goto drop; - } + if (rcu_access_pointer(sk->sk_filter) && + udp_lib_checksum_complete(skb)) + goto csum_error; + + if (sk_filter(sk, skb)) + goto drop; udp_csum_pull_header(skb); if (sk_rcvqueues_full(sk, sk->sk_rcvbuf)) { -- cgit v1.2.3 From 5d2be1422e02ccd697ccfcd45c85b4a26e6178e2 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 2 Jun 2016 04:04:56 -0400 Subject: tipc: fix an infoleak in tipc_nl_compat_link_dump link_info.str is a char array of size 60. Memory after the NULL byte is not initialized. Sending the whole object out can cause a leak. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- net/tipc/netlink_compat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index f795b1dd0ccd..3ad9fab1985f 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -604,7 +604,8 @@ static int tipc_nl_compat_link_dump(struct tipc_nl_compat_msg *msg, link_info.dest = nla_get_flag(link[TIPC_NLA_LINK_DEST]); link_info.up = htonl(nla_get_flag(link[TIPC_NLA_LINK_UP])); - strcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME])); + nla_strlcpy(link_info.str, nla_data(link[TIPC_NLA_LINK_NAME]), + TIPC_MAX_LINK_NAME); return tipc_add_tlv(msg->rep, TIPC_TLV_LINK_INFO, &link_info, sizeof(link_info)); -- cgit v1.2.3 From 4116def2337991b39919f3b448326e21c40e0dbb Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Thu, 2 Jun 2016 04:11:20 -0400 Subject: rds: fix an infoleak in rds_inc_info_copy The last field "flags" of object "minfo" is not initialized. Copying this object out may leak kernel stack data. Assign 0 to it to avoid leak. Signed-off-by: Kangjie Lu Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/recv.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/rds/recv.c b/net/rds/recv.c index c0be1ecd11c9..8413f6c99e13 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -561,5 +561,7 @@ void rds_inc_info_copy(struct rds_incoming *inc, minfo.fport = inc->i_hdr.h_dport; } + minfo.flags = 0; + rds_info_copy(iter, &minfo, sizeof(minfo)); } -- cgit v1.2.3 From 357cc9b4a8a7a0cd0e662537b76e6fa4670b6798 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:15 -0700 Subject: sch_hfsc: always keep backlog updated hfsc updates backlog lazily, that is only when we dump the stats. This is problematic after we begin to update backlog in qdisc_tree_reduce_backlog(). Reported-by: Stas Nichiporovich Tested-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_hfsc.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/sched/sch_hfsc.c b/net/sched/sch_hfsc.c index d783d7cc3348..1ac9f9f03fe3 100644 --- a/net/sched/sch_hfsc.c +++ b/net/sched/sch_hfsc.c @@ -1529,6 +1529,7 @@ hfsc_reset_qdisc(struct Qdisc *sch) q->eligible = RB_ROOT; INIT_LIST_HEAD(&q->droplist); qdisc_watchdog_cancel(&q->watchdog); + sch->qstats.backlog = 0; sch->q.qlen = 0; } @@ -1559,14 +1560,6 @@ hfsc_dump_qdisc(struct Qdisc *sch, struct sk_buff *skb) struct hfsc_sched *q = qdisc_priv(sch); unsigned char *b = skb_tail_pointer(skb); struct tc_hfsc_qopt qopt; - struct hfsc_class *cl; - unsigned int i; - - sch->qstats.backlog = 0; - for (i = 0; i < q->clhash.hashsize; i++) { - hlist_for_each_entry(cl, &q->clhash.hash[i], cl_common.hnode) - sch->qstats.backlog += cl->qdisc->qstats.backlog; - } qopt.defcls = q->defcls; if (nla_put(skb, TCA_OPTIONS, sizeof(qopt), &qopt)) @@ -1604,6 +1597,7 @@ hfsc_enqueue(struct sk_buff *skb, struct Qdisc *sch) if (cl->qdisc->q.qlen == 1) set_active(cl, qdisc_pkt_len(skb)); + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; @@ -1672,6 +1666,7 @@ hfsc_dequeue(struct Qdisc *sch) qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; @@ -1695,6 +1690,7 @@ hfsc_drop(struct Qdisc *sch) } cl->qstats.drops++; qdisc_qstats_drop(sch); + sch->qstats.backlog -= len; sch->q.qlen--; return len; } -- cgit v1.2.3 From 6529d75ad9228f4d8a8f6c5c5244ceb945ac9bc2 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:16 -0700 Subject: sch_prio: update backlog as well We need to update backlog too when we update qlen. Joint work with Stas. Reported-by: Stas Nichiporovich Tested-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_prio.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/sch_prio.c b/net/sched/sch_prio.c index fee1b15506b2..4b0a82191bc4 100644 --- a/net/sched/sch_prio.c +++ b/net/sched/sch_prio.c @@ -85,6 +85,7 @@ prio_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, qdisc); if (ret == NET_XMIT_SUCCESS) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -117,6 +118,7 @@ static struct sk_buff *prio_dequeue(struct Qdisc *sch) struct sk_buff *skb = qdisc_dequeue_peeked(qdisc); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -135,6 +137,7 @@ static unsigned int prio_drop(struct Qdisc *sch) for (prio = q->bands-1; prio >= 0; prio--) { qdisc = q->queues[prio]; if (qdisc->ops->drop && (len = qdisc->ops->drop(qdisc)) != 0) { + sch->qstats.backlog -= len; sch->q.qlen--; return len; } @@ -151,6 +154,7 @@ prio_reset(struct Qdisc *sch) for (prio = 0; prio < q->bands; prio++) qdisc_reset(q->queues[prio]); + sch->qstats.backlog = 0; sch->q.qlen = 0; } -- cgit v1.2.3 From 6a73b571b63075ef408c83f07c2565b5652f93cc Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:17 -0700 Subject: sch_drr: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_drr.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/sch_drr.c b/net/sched/sch_drr.c index a63e879e8975..bf8af2c43c2c 100644 --- a/net/sched/sch_drr.c +++ b/net/sched/sch_drr.c @@ -375,6 +375,7 @@ static int drr_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->deficit = cl->quantum; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return err; } @@ -407,6 +408,7 @@ static struct sk_buff *drr_dequeue(struct Qdisc *sch) bstats_update(&cl->bstats, skb); qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; return skb; } @@ -428,6 +430,7 @@ static unsigned int drr_drop(struct Qdisc *sch) if (cl->qdisc->ops->drop) { len = cl->qdisc->ops->drop(cl->qdisc); if (len > 0) { + sch->qstats.backlog -= len; sch->q.qlen--; if (cl->qdisc->q.qlen == 0) list_del(&cl->alist); @@ -463,6 +466,7 @@ static void drr_reset_qdisc(struct Qdisc *sch) qdisc_reset(cl->qdisc); } } + sch->qstats.backlog = 0; sch->q.qlen = 0; } -- cgit v1.2.3 From d7f4f332f082c4d4ba53582f902ed6b44fd6f45e Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:18 -0700 Subject: sch_red: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_red.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c index 8c0508c0e287..91578bdd378c 100644 --- a/net/sched/sch_red.c +++ b/net/sched/sch_red.c @@ -97,6 +97,7 @@ static int red_enqueue(struct sk_buff *skb, struct Qdisc *sch) ret = qdisc_enqueue(skb, child); if (likely(ret == NET_XMIT_SUCCESS)) { + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; } else if (net_xmit_drop_count(ret)) { q->stats.pdrop++; @@ -118,6 +119,7 @@ static struct sk_buff *red_dequeue(struct Qdisc *sch) skb = child->dequeue(child); if (skb) { qdisc_bstats_update(sch, skb); + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { if (!red_is_idling(&q->vars)) @@ -143,6 +145,7 @@ static unsigned int red_drop(struct Qdisc *sch) if (child->ops->drop && (len = child->ops->drop(child)) > 0) { q->stats.other++; qdisc_qstats_drop(sch); + sch->qstats.backlog -= len; sch->q.qlen--; return len; } @@ -158,6 +161,7 @@ static void red_reset(struct Qdisc *sch) struct red_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; red_restart(&q->vars); } -- cgit v1.2.3 From 8d5958f424b62060a8696b12c17dad198d5d386f Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 1 Jun 2016 16:15:19 -0700 Subject: sch_tbf: update backlog as well Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_tbf.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'net') diff --git a/net/sched/sch_tbf.c b/net/sched/sch_tbf.c index 83b90b584fae..3161e491990b 100644 --- a/net/sched/sch_tbf.c +++ b/net/sched/sch_tbf.c @@ -207,6 +207,7 @@ static int tbf_enqueue(struct sk_buff *skb, struct Qdisc *sch) return ret; } + qdisc_qstats_backlog_inc(sch, skb); sch->q.qlen++; return NET_XMIT_SUCCESS; } @@ -217,6 +218,7 @@ static unsigned int tbf_drop(struct Qdisc *sch) unsigned int len = 0; if (q->qdisc->ops->drop && (len = q->qdisc->ops->drop(q->qdisc)) != 0) { + sch->qstats.backlog -= len; sch->q.qlen--; qdisc_qstats_drop(sch); } @@ -263,6 +265,7 @@ static struct sk_buff *tbf_dequeue(struct Qdisc *sch) q->t_c = now; q->tokens = toks; q->ptokens = ptoks; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; qdisc_unthrottled(sch); qdisc_bstats_update(sch, skb); @@ -294,6 +297,7 @@ static void tbf_reset(struct Qdisc *sch) struct tbf_sched_data *q = qdisc_priv(sch); qdisc_reset(q->qdisc); + sch->qstats.backlog = 0; sch->q.qlen = 0; q->t_c = ktime_get_ns(); q->tokens = q->buffer; -- cgit v1.2.3 From 3ec10d3a2ba591c87da94219c1e46b02ae97757a Mon Sep 17 00:00:00 2001 From: Marco Angaroni Date: Mon, 16 May 2016 19:18:09 +0200 Subject: ipvs: update real-server binding of outgoing connections in SIP-pe Previous patch that introduced handling of outgoing packets in SIP persistent-engine did not call ip_vs_check_template() in case packet was matching a connection template. Assumption was that real-server was healthy, since it was sending a packet just in that moment. There are however real-server fault conditions requiring that association between call-id and real-server (represented by connection template) gets updated. Here is an example of the sequence of events: 1) RS1 is a back2back user agent that handled call-id1 and call-id2 2) RS1 is down and was marked as unavailable 3) new message from outside comes to IPVS with call-id1 4) IPVS reschedules the message to RS2, which becomes new call handler 5) RS2 forwards the message outside, translating call-id1 to call-id2 6) inside pe->conn_out() IPVS matches call-id2 with existing template 7) IPVS does not change association call-id2 <-> RS1 8) new message comes from client with call-id2 9) IPVS reschedules the message to a real-server potentially different from RS2, which is now the correct destination This patch introduces ip_vs_check_template() call in the handling of outgoing packets for SIP-pe. And also introduces a second optional argument for ip_vs_check_template() that allows to check if dest associated to a connection template is the same dest that was identified as the source of the packet. This is to change the real-server bound to a particular call-id independently from its availability status: the idea is that it's more reliable, for in->out direction (where internal network can be considered trusted), to always associate a call-id with the last real-server that used it in one of its messages. Think about above sequence of events where, just after step 5, RS1 returns instead to be available. Comparison of dests is done by simply comparing pointers to struct ip_vs_dest; there should be no cases where struct ip_vs_dest keeps its memory address, but represent a different real-server in terms of ip-address / port. Fixes: 39b972231536 ("ipvs: handle connections started by real-servers") Signed-off-by: Marco Angaroni Acked-by: Julian Anastasov Signed-off-by: Simon Horman --- include/net/ip_vs.h | 2 +- net/netfilter/ipvs/ip_vs_conn.c | 5 +++-- net/netfilter/ipvs/ip_vs_core.c | 5 +++-- 3 files changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/include/net/ip_vs.h b/include/net/ip_vs.h index af4c10ebb241..cd6018a9ee24 100644 --- a/include/net/ip_vs.h +++ b/include/net/ip_vs.h @@ -1232,7 +1232,7 @@ void ip_vs_conn_expire_now(struct ip_vs_conn *cp); const char *ip_vs_state_name(__u16 proto, int state); void ip_vs_tcp_conn_listen(struct ip_vs_conn *cp); -int ip_vs_check_template(struct ip_vs_conn *ct); +int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest); void ip_vs_random_dropentry(struct netns_ipvs *ipvs); int ip_vs_conn_init(void); void ip_vs_conn_cleanup(void); diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c index 2cb3c626cd43..096a45103f14 100644 --- a/net/netfilter/ipvs/ip_vs_conn.c +++ b/net/netfilter/ipvs/ip_vs_conn.c @@ -762,7 +762,7 @@ static int expire_quiescent_template(struct netns_ipvs *ipvs, * If available, return 1, otherwise invalidate this connection * template and return 0. */ -int ip_vs_check_template(struct ip_vs_conn *ct) +int ip_vs_check_template(struct ip_vs_conn *ct, struct ip_vs_dest *cdest) { struct ip_vs_dest *dest = ct->dest; struct netns_ipvs *ipvs = ct->ipvs; @@ -772,7 +772,8 @@ int ip_vs_check_template(struct ip_vs_conn *ct) */ if ((dest == NULL) || !(dest->flags & IP_VS_DEST_F_AVAILABLE) || - expire_quiescent_template(ipvs, dest)) { + expire_quiescent_template(ipvs, dest) || + (cdest && (dest != cdest))) { IP_VS_DBG_BUF(9, "check_template: dest not available for " "protocol %s s:%s:%d v:%s:%d " "-> d:%s:%d\n", diff --git a/net/netfilter/ipvs/ip_vs_core.c b/net/netfilter/ipvs/ip_vs_core.c index 1207f20d24e4..2c1b498a7a27 100644 --- a/net/netfilter/ipvs/ip_vs_core.c +++ b/net/netfilter/ipvs/ip_vs_core.c @@ -321,7 +321,7 @@ ip_vs_sched_persist(struct ip_vs_service *svc, /* Check if a template already exists */ ct = ip_vs_ct_in_get(¶m); - if (!ct || !ip_vs_check_template(ct)) { + if (!ct || !ip_vs_check_template(ct, NULL)) { struct ip_vs_scheduler *sched; /* @@ -1154,7 +1154,8 @@ struct ip_vs_conn *ip_vs_new_conn_out(struct ip_vs_service *svc, vport, ¶m) < 0) return NULL; ct = ip_vs_ct_in_get(¶m); - if (!ct) { + /* check if template exists and points to the same dest */ + if (!ct || !ip_vs_check_template(ct, dest)) { ct = ip_vs_conn_new(¶m, dest->af, daddr, dport, IP_VS_CONN_F_TEMPLATE, dest, 0); if (!ct) { -- cgit v1.2.3 From 1957598840f47d42bb0b7f8a871717a780708686 Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Fri, 3 Jun 2016 23:49:17 +0200 Subject: soreuseport: add compat case for setsockopt SO_ATTACH_REUSEPORT_CBPF Commit 538950a1b752 ("soreuseport: setsockopt SO_ATTACH_REUSEPORT_[CE]BPF") missed to add the compat case for the SO_ATTACH_REUSEPORT_CBPF option. Signed-off-by: Helge Deller Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- net/compat.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/compat.c b/net/compat.c index 5cfd26a0006f..1373947efb50 100644 --- a/net/compat.c +++ b/net/compat.c @@ -354,7 +354,8 @@ static int do_set_sock_timeout(struct socket *sock, int level, static int compat_sock_setsockopt(struct socket *sock, int level, int optname, char __user *optval, unsigned int optlen) { - if (optname == SO_ATTACH_FILTER) + if (optname == SO_ATTACH_FILTER || + optname == SO_ATTACH_REUSEPORT_CBPF) return do_set_attach_filter(sock, level, optname, optval, optlen); if (optname == SO_RCVTIMEO || optname == SO_SNDTIMEO) -- cgit v1.2.3 From a27758ffaf96f89002129eedb2cc172d254099f8 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Fri, 3 Jun 2016 15:05:57 -0700 Subject: net_sched: keep backlog updated with qlen For gso_skb we only update qlen, backlog should be updated too. Note, it is correct to just update these stats at one layer, because the gso_skb is cached there. Reported-by: Stas Nichiporovich Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/sch_generic.h | 5 ++++- net/sched/sch_generic.c | 2 ++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index a1fd76c22a59..6803af17dfcf 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -691,9 +691,11 @@ static inline struct sk_buff *qdisc_peek_dequeued(struct Qdisc *sch) /* we can reuse ->gso_skb because peek isn't called for root qdiscs */ if (!sch->gso_skb) { sch->gso_skb = sch->dequeue(sch); - if (sch->gso_skb) + if (sch->gso_skb) { /* it's still part of the queue */ + qdisc_qstats_backlog_inc(sch, sch->gso_skb); sch->q.qlen++; + } } return sch->gso_skb; @@ -706,6 +708,7 @@ static inline struct sk_buff *qdisc_dequeue_peeked(struct Qdisc *sch) if (skb) { sch->gso_skb = NULL; + qdisc_qstats_backlog_dec(sch, skb); sch->q.qlen--; } else { skb = sch->dequeue(sch); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 269dd71b3828..f9e0e9c03d0a 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -49,6 +49,7 @@ static inline int dev_requeue_skb(struct sk_buff *skb, struct Qdisc *q) { q->gso_skb = skb; q->qstats.requeues++; + qdisc_qstats_backlog_inc(q, skb); q->q.qlen++; /* it's still part of the queue */ __netif_schedule(q); @@ -92,6 +93,7 @@ static struct sk_buff *dequeue_skb(struct Qdisc *q, bool *validate, txq = skb_get_tx_queue(txq->dev, skb); if (!netif_xmit_frozen_or_stopped(txq)) { q->gso_skb = NULL; + qdisc_qstats_backlog_dec(q, skb); q->q.qlen--; } else skb = NULL; -- cgit v1.2.3 From 80e509db54c81247b32fcb75bb1730fc789b893d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sat, 4 Jun 2016 12:55:13 -0700 Subject: fq_codel: fix NET_XMIT_CN behavior My prior attempt to fix the backlogs of parents failed. If we return NET_XMIT_CN, our parents wont increase their backlog, so our qdisc_tree_reduce_backlog() should take this into account. v2: Florian Westphal pointed out that we could drop the packet, so we need to save qdisc_pkt_len(skb) in a temp variable before calling fq_codel_drop() Fixes: 9d18562a2278 ("fq_codel: add batch ability to fq_codel_drop()") Fixes: 2ccccf5fb43f ("net_sched: update hierarchical backlog too") Reported-by: Stas Nichiporovich Signed-off-by: Eric Dumazet Cc: WANG Cong Cc: Jamal Hadi Salim Acked-by: Jamal Hadi Salim Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 24 ++++++++++++++++++------ 1 file changed, 18 insertions(+), 6 deletions(-) (limited to 'net') diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index 6883a8971562..fff7867f4a4f 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -199,6 +199,7 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) unsigned int idx, prev_backlog, prev_qlen; struct fq_codel_flow *flow; int uninitialized_var(ret); + unsigned int pkt_len; bool memory_limited; idx = fq_codel_classify(skb, sch, &ret); @@ -230,6 +231,8 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) prev_backlog = sch->qstats.backlog; prev_qlen = sch->q.qlen; + /* save this packet length as it might be dropped by fq_codel_drop() */ + pkt_len = qdisc_pkt_len(skb); /* fq_codel_drop() is quite expensive, as it performs a linear search * in q->backlogs[] to find a fat flow. * So instead of dropping a single packet, drop half of its backlog @@ -237,14 +240,23 @@ static int fq_codel_enqueue(struct sk_buff *skb, struct Qdisc *sch) */ ret = fq_codel_drop(sch, q->drop_batch_size); - q->drop_overlimit += prev_qlen - sch->q.qlen; + prev_qlen -= sch->q.qlen; + prev_backlog -= sch->qstats.backlog; + q->drop_overlimit += prev_qlen; if (memory_limited) - q->drop_overmemory += prev_qlen - sch->q.qlen; - /* As we dropped packet(s), better let upper stack know this */ - qdisc_tree_reduce_backlog(sch, prev_qlen - sch->q.qlen, - prev_backlog - sch->qstats.backlog); + q->drop_overmemory += prev_qlen; - return ret == idx ? NET_XMIT_CN : NET_XMIT_SUCCESS; + /* As we dropped packet(s), better let upper stack know this. + * If we dropped a packet for this flow, return NET_XMIT_CN, + * but in this case, our parents wont increase their backlogs. + */ + if (ret == idx) { + qdisc_tree_reduce_backlog(sch, prev_qlen - 1, + prev_backlog - pkt_len); + return NET_XMIT_CN; + } + qdisc_tree_reduce_backlog(sch, prev_qlen, prev_backlog); + return NET_XMIT_SUCCESS; } /* This is the specific function called from codel_dequeue() -- cgit v1.2.3 From 335b48d980f631fbc5b233cbb3625ac0c86d67cb Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 13:59:58 -0700 Subject: RDS: TCP: Add/use rds_tcp_reset_callbacks to reset tcp socket safely When rds_tcp_accept_one() has to replace the existing tcp socket with a newer tcp socket (duelling-syn resolution), it must lock_sock() to suppress the rds_tcp_data_recv() path while callbacks are being changed. Also, existing RDS datagram reassembly state must be reset, so that the next datagram on the new socket does not have corrupted state. Similarly when resetting the newly accepted socket, appropriate locks and synchronization is needed. This commit ensures correct synchronization by invoking kernel_sock_shutdown to reset a newly accepted sock, and by taking appropriate lock_sock()s (for old and new sockets) when resetting existing callbacks. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/tcp.c | 65 +++++++++++++++++++++++++++++++++++++++++++++++++--- net/rds/tcp.h | 1 + net/rds/tcp_listen.c | 13 ++++------- 3 files changed, 67 insertions(+), 12 deletions(-) (limited to 'net') diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 86187dad1440..8faa0b1ae39d 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -126,9 +126,68 @@ void rds_tcp_restore_callbacks(struct socket *sock, } /* - * This is the only path that sets tc->t_sock. Send and receive trust that - * it is set. The RDS_CONN_UP bit protects those paths from being - * called while it isn't set. + * rds_tcp_reset_callbacks() switches the to the new sock and + * returns the existing tc->t_sock. + * + * The only functions that set tc->t_sock are rds_tcp_set_callbacks + * and rds_tcp_reset_callbacks. Send and receive trust that + * it is set. The absence of RDS_CONN_UP bit protects those paths + * from being called while it isn't set. + */ +void rds_tcp_reset_callbacks(struct socket *sock, + struct rds_connection *conn) +{ + struct rds_tcp_connection *tc = conn->c_transport_data; + struct socket *osock = tc->t_sock; + + if (!osock) + goto newsock; + + /* Need to resolve a duelling SYN between peers. + * We have an outstanding SYN to this peer, which may + * potentially have transitioned to the RDS_CONN_UP state, + * so we must quiesce any send threads before resetting + * c_transport_data. We quiesce these threads by setting + * cp_state to something other than RDS_CONN_UP, and then + * waiting for any existing threads in rds_send_xmit to + * complete release_in_xmit(). (Subsequent threads entering + * rds_send_xmit() will bail on !rds_conn_up(). + */ + lock_sock(osock->sk); + /* reset receive side state for rds_tcp_data_recv() for osock */ + if (tc->t_tinc) { + rds_inc_put(&tc->t_tinc->ti_inc); + tc->t_tinc = NULL; + } + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; + tc->t_sock = NULL; + + write_lock_bh(&osock->sk->sk_callback_lock); + + osock->sk->sk_user_data = NULL; + osock->sk->sk_data_ready = tc->t_orig_data_ready; + osock->sk->sk_write_space = tc->t_orig_write_space; + osock->sk->sk_state_change = tc->t_orig_state_change; + write_unlock_bh(&osock->sk->sk_callback_lock); + release_sock(osock->sk); + sock_release(osock); +newsock: + lock_sock(sock->sk); + write_lock_bh(&sock->sk->sk_callback_lock); + tc->t_sock = sock; + sock->sk->sk_user_data = conn; + sock->sk->sk_data_ready = rds_tcp_data_ready; + sock->sk->sk_write_space = rds_tcp_write_space; + sock->sk->sk_state_change = rds_tcp_state_change; + + write_unlock_bh(&sock->sk->sk_callback_lock); + release_sock(sock->sk); +} + +/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments + * above rds_tcp_reset_callbacks for notes about synchronization + * with data path */ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) { diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 41c228300525..ec0602b0dc24 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -50,6 +50,7 @@ struct rds_tcp_statistics { void rds_tcp_tune(struct socket *sock); void rds_tcp_nonagle(struct socket *sock); void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn); +void rds_tcp_reset_callbacks(struct socket *sock, struct rds_connection *conn); void rds_tcp_restore_callbacks(struct socket *sock, struct rds_tcp_connection *tc); u32 rds_tcp_snd_nxt(struct rds_tcp_connection *tc); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 4bf4befe5066..d9fe53675d95 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -78,7 +78,6 @@ int rds_tcp_accept_one(struct socket *sock) struct inet_sock *inet; struct rds_tcp_connection *rs_tcp = NULL; int conn_state; - struct sock *nsk; if (!sock) /* module unload or netns delete in progress */ return -ENETUNREACH; @@ -139,23 +138,19 @@ int rds_tcp_accept_one(struct socket *sock) atomic_set(&conn->c_state, RDS_CONN_CONNECTING); wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); - rds_tcp_restore_callbacks(rs_tcp->t_sock, rs_tcp); + rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; } + } else { + rds_tcp_set_callbacks(new_sock, conn); } - rds_tcp_set_callbacks(new_sock, conn); rds_connect_complete(conn); /* marks RDS_CONN_UP */ new_sock = NULL; ret = 0; goto out; rst_nsk: /* reset the newly returned accept sock and bail */ - nsk = new_sock->sk; - rds_tcp_stats_inc(s_tcp_listen_closed_stale); - nsk->sk_user_data = NULL; - nsk->sk_prot->disconnect(nsk, 0); - tcp_done(nsk); - new_sock = NULL; + kernel_sock_shutdown(new_sock, SHUT_RDWR); ret = 0; out: if (rs_tcp) -- cgit v1.2.3 From 0b6f760cff04a7cdfafc3ec6915e91fed0533d8d Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 13:59:59 -0700 Subject: RDS: TCP: Retransmit half-sent datagrams when switching sockets in rds_tcp_reset_callbacks When we switch a connection's sockets in rds_tcp_rest_callbacks, any partially sent datagram must be retransmitted on the new socket so that the receiver can correctly reassmble the RDS datagram. Use rds_send_reset() which is designed for this purpose. Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/send.c | 1 + net/rds/tcp.c | 1 + 2 files changed, 2 insertions(+) (limited to 'net') diff --git a/net/rds/send.c b/net/rds/send.c index c9cdb358ea88..b1962f8e30f7 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -99,6 +99,7 @@ void rds_send_reset(struct rds_connection *conn) list_splice_init(&conn->c_retrans, &conn->c_send_queue); spin_unlock_irqrestore(&conn->c_lock, flags); } +EXPORT_SYMBOL_GPL(rds_send_reset); static int acquire_in_xmit(struct rds_connection *conn) { diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 8faa0b1ae39d..7ab1b41ffc88 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -173,6 +173,7 @@ void rds_tcp_reset_callbacks(struct socket *sock, release_sock(osock->sk); sock_release(osock); newsock: + rds_send_reset(conn); lock_sock(sock->sk); write_lock_bh(&sock->sk->sk_callback_lock); tc->t_sock = sock; -- cgit v1.2.3 From 9c79440e2c5e2518879f1599270f64c3ddda3baf Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Sat, 4 Jun 2016 14:00:00 -0700 Subject: RDS: TCP: fix race windows in send-path quiescence by rds_tcp_accept_one() The send path needs to be quiesced before resetting callbacks from rds_tcp_accept_one(), and commit eb192840266f ("RDS:TCP: Synchronize rds_tcp_accept_one with rds_send_xmit when resetting t_sock") achieves this using the c_state and RDS_IN_XMIT bit following the pattern used by rds_conn_shutdown(). However this leaves the possibility of a race window as shown in the sequence below take t_conn_lock in rds_tcp_conn_connect send outgoing syn to peer drop t_conn_lock in rds_tcp_conn_connect incoming from peer triggers rds_tcp_accept_one, conn is marked CONNECTING wait for RDS_IN_XMIT to quiesce any rds_send_xmit threads call rds_tcp_reset_callbacks [.. race-window where incoming syn-ack can cause the conn to be marked UP from rds_tcp_state_change ..] lock_sock called from rds_tcp_reset_callbacks, and we set t_sock to null As soon as the conn is marked UP in the race-window above, rds_send_xmit() threads will proceed to rds_tcp_xmit and may encounter a null-pointer deref on the t_sock. Given that rds_tcp_state_change() is invoked in softirq context, whereas rds_tcp_reset_callbacks() is in workq context, and testing for RDS_IN_XMIT after lock_sock could result in a deadlock with tcp_sendmsg, this commit fixes the race by using a new c_state, RDS_TCP_RESETTING, which will prevent a transition to RDS_CONN_UP from rds_tcp_state_change(). Signed-off-by: Sowmini Varadhan Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- net/rds/rds.h | 2 ++ net/rds/tcp.c | 14 +++++++++++++- net/rds/tcp_connect.c | 2 +- net/rds/tcp_listen.c | 7 +++---- net/rds/threads.c | 10 ++++++++-- 5 files changed, 27 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/rds/rds.h b/net/rds/rds.h index 80256b08eac0..387df5f32e49 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -74,6 +74,7 @@ enum { RDS_CONN_CONNECTING, RDS_CONN_DISCONNECTING, RDS_CONN_UP, + RDS_CONN_RESETTING, RDS_CONN_ERROR, }; @@ -813,6 +814,7 @@ void rds_connect_worker(struct work_struct *); void rds_shutdown_worker(struct work_struct *); void rds_send_worker(struct work_struct *); void rds_recv_worker(struct work_struct *); +void rds_connect_path_complete(struct rds_connection *conn, int curr); void rds_connect_complete(struct rds_connection *conn); /* transport.c */ diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 7ab1b41ffc88..74ee126a6fe6 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -148,11 +148,23 @@ void rds_tcp_reset_callbacks(struct socket *sock, * potentially have transitioned to the RDS_CONN_UP state, * so we must quiesce any send threads before resetting * c_transport_data. We quiesce these threads by setting - * cp_state to something other than RDS_CONN_UP, and then + * c_state to something other than RDS_CONN_UP, and then * waiting for any existing threads in rds_send_xmit to * complete release_in_xmit(). (Subsequent threads entering * rds_send_xmit() will bail on !rds_conn_up(). + * + * However an incoming syn-ack at this point would end up + * marking the conn as RDS_CONN_UP, and would again permit + * rds_send_xmi() threads through, so ideally we would + * synchronize on RDS_CONN_UP after lock_sock(), but cannot + * do that: waiting on !RDS_IN_XMIT after lock_sock() may + * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT + * would not get set. As a result, we set c_state to + * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change + * cannot mark rds_conn_path_up() in the window before lock_sock() */ + atomic_set(&conn->c_state, RDS_CONN_RESETTING); + wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); lock_sock(osock->sk); /* reset receive side state for rds_tcp_data_recv() for osock */ if (tc->t_tinc) { diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index fb82e0a0bf89..fba13d0305fb 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -60,7 +60,7 @@ void rds_tcp_state_change(struct sock *sk) case TCP_SYN_RECV: break; case TCP_ESTABLISHED: - rds_connect_complete(conn); + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); break; case TCP_CLOSE_WAIT: case TCP_CLOSE: diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index d9fe53675d95..686b1d03a558 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -135,16 +135,15 @@ int rds_tcp_accept_one(struct socket *sock) !conn->c_outgoing) { goto rst_nsk; } else { - atomic_set(&conn->c_state, RDS_CONN_CONNECTING); - wait_event(conn->c_waitq, - !test_bit(RDS_IN_XMIT, &conn->c_flags)); rds_tcp_reset_callbacks(new_sock, conn); conn->c_outgoing = 0; + /* rds_connect_path_complete() marks RDS_CONN_UP */ + rds_connect_path_complete(conn, RDS_CONN_DISCONNECTING); } } else { rds_tcp_set_callbacks(new_sock, conn); + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); } - rds_connect_complete(conn); /* marks RDS_CONN_UP */ new_sock = NULL; ret = 0; goto out; diff --git a/net/rds/threads.c b/net/rds/threads.c index 454aa6d23327..4a323045719b 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -71,9 +71,9 @@ struct workqueue_struct *rds_wq; EXPORT_SYMBOL_GPL(rds_wq); -void rds_connect_complete(struct rds_connection *conn) +void rds_connect_path_complete(struct rds_connection *conn, int curr) { - if (!rds_conn_transition(conn, RDS_CONN_CONNECTING, RDS_CONN_UP)) { + if (!rds_conn_transition(conn, curr, RDS_CONN_UP)) { printk(KERN_WARNING "%s: Cannot transition to state UP, " "current state is %d\n", __func__, @@ -90,6 +90,12 @@ void rds_connect_complete(struct rds_connection *conn) queue_delayed_work(rds_wq, &conn->c_send_w, 0); queue_delayed_work(rds_wq, &conn->c_recv_w, 0); } +EXPORT_SYMBOL_GPL(rds_connect_path_complete); + +void rds_connect_complete(struct rds_connection *conn) +{ + rds_connect_path_complete(conn, RDS_CONN_CONNECTING); +} EXPORT_SYMBOL_GPL(rds_connect_complete); /* -- cgit v1.2.3 From fa54cc70ed2eddd96773ffdf4d96d23ea7483e56 Mon Sep 17 00:00:00 2001 From: Wu Fengguang Date: Sun, 5 Jun 2016 07:17:19 +0800 Subject: rxrpc: fix ptr_ret.cocci warnings net/rxrpc/rxkad.c:1165:1-3: WARNING: PTR_ERR_OR_ZERO can be used Use PTR_ERR_OR_ZERO rather than if(IS_ERR(...)) + PTR_ERR Generated by: scripts/coccinelle/api/ptr_ret.cocci CC: David Howells Signed-off-by: Fengguang Wu Signed-off-by: David S. Miller --- net/rxrpc/rxkad.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'net') diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c index 6b726a046a7d..bab56ed649ba 100644 --- a/net/rxrpc/rxkad.c +++ b/net/rxrpc/rxkad.c @@ -1162,9 +1162,7 @@ static int rxkad_init(void) /* pin the cipher we need so that the crypto layer doesn't invoke * keventd to go get it */ rxkad_ci = crypto_alloc_skcipher("pcbc(fcrypt)", 0, CRYPTO_ALG_ASYNC); - if (IS_ERR(rxkad_ci)) - return PTR_ERR(rxkad_ci); - return 0; + return PTR_ERR_OR_ZERO(rxkad_ci); } /* -- cgit v1.2.3 From 1a0f7d2984f3864e64a43714b4a0999b5a27cff5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Jun 2016 16:16:47 +0100 Subject: net: cls_u32: fix error code for invalid flags 'err' variable is not set in this test, we would return whatever previous test set 'err' to. Signed-off-by: Jakub Kicinski Acked-by: Sridhar Samudrala Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 079b43b3c5d2..b17e090f2fe1 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -863,7 +863,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, if (tb[TCA_U32_FLAGS]) { flags = nla_get_u32(tb[TCA_U32_FLAGS]); if (!tc_flags_valid(flags)) - return err; + return -EINVAL; } n = (struct tc_u_knode *)*arg; -- cgit v1.2.3 From d47a0f387fe907bdb0430a398850c1cb80eb7def Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 6 Jun 2016 16:16:48 +0100 Subject: net: cls_u32: be more strict about skip-sw flag Return an error if user requested skip-sw and the underlaying hardware cannot handle tc offloads (or offloads are disabled). Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index b17e090f2fe1..fe05449537a3 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -457,20 +457,21 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_to_netdev offload; int err; + if (!tc_should_offload(dev, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; + offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, flags)) { - offload.cls_u32->command = TC_CLSU32_NEW_HNODE; - offload.cls_u32->hnode.divisor = h->divisor; - offload.cls_u32->hnode.handle = h->handle; - offload.cls_u32->hnode.prio = h->prio; + offload.cls_u32->command = TC_CLSU32_NEW_HNODE; + offload.cls_u32->hnode.divisor = h->divisor; + offload.cls_u32->hnode.handle = h->handle; + offload.cls_u32->hnode.prio = h->prio; - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); - if (tc_skip_sw(flags)) - return err; - } + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; return 0; } -- cgit v1.2.3 From aafddbf0cffeb790f919436285328c762279b5d4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 6 Jun 2016 09:12:39 -0700 Subject: fq_codel: return non zero qlen in class dumps We properly scan the flow list to count number of packets, but John passed 0 to gnet_stats_copy_queue() so we report a zero value to user space instead of the result. Fixes: 640158536632 ("net: sched: restrict use of qstats qlen") Signed-off-by: Eric Dumazet Cc: John Fastabend Acked-by: John Fastabend Signed-off-by: David S. Miller --- net/sched/sch_fq_codel.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_fq_codel.c b/net/sched/sch_fq_codel.c index fff7867f4a4f..da250b2e06ae 100644 --- a/net/sched/sch_fq_codel.c +++ b/net/sched/sch_fq_codel.c @@ -661,7 +661,7 @@ static int fq_codel_dump_class_stats(struct Qdisc *sch, unsigned long cl, qs.backlog = q->backlogs[idx]; qs.drops = flow->dropped; } - if (gnet_stats_copy_queue(d, NULL, &qs, 0) < 0) + if (gnet_stats_copy_queue(d, NULL, &qs, qs.qlen) < 0) return -1; if (idx < q->flows_cnt) return gnet_stats_copy_app(d, &xstats, sizeof(xstats)); -- cgit v1.2.3 From a03e6fe569713fb3ff0714f8fd7c8785c0ca9e22 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 6 Jun 2016 09:54:30 -0700 Subject: act_police: fix a crash during removal The police action is using its own code to initialize tcf hash info, which makes us to forgot to initialize a->hinfo correctly. Fix this by calling the helper function tcf_hash_create() directly. This patch fixed the following crash: BUG: unable to handle kernel NULL pointer dereference at 0000000000000028 IP: [] __lock_acquire+0xd3/0xf91 PGD d3c34067 PUD d3e18067 PMD 0 Oops: 0000 [#1] SMP CPU: 2 PID: 853 Comm: tc Not tainted 4.6.0+ #87 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 task: ffff8800d3e28040 ti: ffff8800d3f6c000 task.ti: ffff8800d3f6c000 RIP: 0010:[] [] __lock_acquire+0xd3/0xf91 RSP: 0000:ffff88011b203c80 EFLAGS: 00010002 RAX: 0000000000000046 RBX: 0000000000000000 RCX: 0000000000000000 RDX: 0000000000000000 RSI: 0000000000000000 RDI: 0000000000000028 RBP: ffff88011b203d40 R08: 0000000000000001 R09: 0000000000000000 R10: ffff88011b203d58 R11: ffff88011b208000 R12: 0000000000000001 R13: ffff8800d3e28040 R14: 0000000000000028 R15: 0000000000000000 FS: 0000000000000000(0000) GS:ffff88011b200000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000028 CR3: 00000000d4be1000 CR4: 00000000000006e0 Stack: ffff8800d3e289c0 0000000000000046 000000001b203d60 ffffffff00000000 0000000000000000 ffff880000000000 0000000000000000 ffffffff00000000 ffffffff8187142c ffff88011b203ce8 ffff88011b203ce8 ffffffff8101dbfc Call Trace: [] ? __tcf_hash_release+0x77/0xd1 [] ? native_sched_clock+0x1a/0x35 [] ? native_sched_clock+0x1a/0x35 [] ? sched_clock_local+0x11/0x78 [] ? mark_lock+0x24/0x201 [] lock_acquire+0x120/0x1b4 [] ? lock_acquire+0x120/0x1b4 [] ? __tcf_hash_release+0x77/0xd1 [] _raw_spin_lock_bh+0x3c/0x72 [] ? __tcf_hash_release+0x77/0xd1 [] __tcf_hash_release+0x77/0xd1 [] tcf_action_destroy+0x49/0x7c [] tcf_exts_destroy+0x20/0x2d [] u32_destroy_key+0x1b/0x4d [] u32_delete_key_freepf_rcu+0x1b/0x1d [] rcu_process_callbacks+0x610/0x82e [] ? u32_destroy_key+0x4d/0x4d [] __do_softirq+0x191/0x3f4 Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions") Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/act_police.c | 33 +++++++++++---------------------- 1 file changed, 11 insertions(+), 22 deletions(-) (limited to 'net') diff --git a/net/sched/act_police.c b/net/sched/act_police.c index b884dae692a1..c557789765dc 100644 --- a/net/sched/act_police.c +++ b/net/sched/act_police.c @@ -38,7 +38,7 @@ struct tcf_police { bool peak_present; }; #define to_police(pc) \ - container_of(pc, struct tcf_police, common) + container_of(pc->priv, struct tcf_police, common) #define POL_TAB_MASK 15 @@ -119,14 +119,12 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, struct nlattr *est, struct tc_action *a, int ovr, int bind) { - unsigned int h; int ret = 0, err; struct nlattr *tb[TCA_POLICE_MAX + 1]; struct tc_police *parm; struct tcf_police *police; struct qdisc_rate_table *R_tab = NULL, *P_tab = NULL; struct tc_action_net *tn = net_generic(net, police_net_id); - struct tcf_hashinfo *hinfo = tn->hinfo; int size; if (nla == NULL) @@ -145,7 +143,7 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, if (parm->index) { if (tcf_hash_search(tn, a, parm->index)) { - police = to_police(a->priv); + police = to_police(a); if (bind) { police->tcf_bindcnt += 1; police->tcf_refcnt += 1; @@ -156,16 +154,15 @@ static int tcf_act_police_locate(struct net *net, struct nlattr *nla, /* not replacing */ return -EEXIST; } + } else { + ret = tcf_hash_create(tn, parm->index, NULL, a, + sizeof(*police), bind, false); + if (ret) + return ret; + ret = ACT_P_CREATED; } - police = kzalloc(sizeof(*police), GFP_KERNEL); - if (police == NULL) - return -ENOMEM; - ret = ACT_P_CREATED; - police->tcf_refcnt = 1; - spin_lock_init(&police->tcf_lock); - if (bind) - police->tcf_bindcnt = 1; + police = to_police(a); override: if (parm->rate.rate) { err = -ENOMEM; @@ -237,16 +234,8 @@ override: return ret; police->tcfp_t_c = ktime_get_ns(); - police->tcf_index = parm->index ? parm->index : - tcf_hash_new_index(tn); - police->tcf_tm.install = jiffies; - police->tcf_tm.lastuse = jiffies; - h = tcf_hash(police->tcf_index, POL_TAB_MASK); - spin_lock_bh(&hinfo->lock); - hlist_add_head(&police->tcf_head, &hinfo->htab[h]); - spin_unlock_bh(&hinfo->lock); + tcf_hash_insert(tn, a); - a->priv = police; return ret; failure_unlock: @@ -255,7 +244,7 @@ failure: qdisc_put_rtab(P_tab); qdisc_put_rtab(R_tab); if (ret == ACT_P_CREATED) - kfree(police); + tcf_hash_cleanup(a, est); return err; } -- cgit v1.2.3 From 92c075dbdeed02bdf293cb0f513bad70aa714b8d Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 6 Jun 2016 22:50:39 +0200 Subject: net: sched: fix tc_should_offload for specific clsact classes When offloading classifiers such as u32 or flower to hardware, and the qdisc is clsact (TC_H_CLSACT), then we need to differentiate its classes, since not all of them handle ingress, therefore we must leave those in software path. Add a .tcf_cl_offload() callback, so we can generically handle them, tested on ixgbe. Fixes: 10cbc6843446 ("net/sched: cls_flower: Hardware offloaded filters statistics support") Fixes: 5b33f48842fa ("net/flower: Introduce hardware offload support") Fixes: a1b7c5fd7fe9 ("net: sched: add cls_u32 offload hooks for netdevs") Signed-off-by: Daniel Borkmann Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/net/pkt_cls.h | 10 +++++++--- include/net/sch_generic.h | 1 + net/sched/cls_flower.c | 6 +++--- net/sched/cls_u32.c | 8 ++++---- net/sched/sch_ingress.c | 12 ++++++++++++ 5 files changed, 27 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 0f7efa88f210..3722dda0199d 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -392,16 +392,20 @@ struct tc_cls_u32_offload { }; }; -static inline bool tc_should_offload(struct net_device *dev, u32 flags) +static inline bool tc_should_offload(const struct net_device *dev, + const struct tcf_proto *tp, u32 flags) { + const struct Qdisc *sch = tp->q; + const struct Qdisc_class_ops *cops = sch->ops->cl_ops; + if (!(dev->features & NETIF_F_HW_TC)) return false; - if (flags & TCA_CLS_FLAGS_SKIP_HW) return false; - if (!dev->netdev_ops->ndo_setup_tc) return false; + if (cops && cops->tcf_cl_offload) + return cops->tcf_cl_offload(tp->classid); return true; } diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index 6803af17dfcf..62d553184e91 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -168,6 +168,7 @@ struct Qdisc_class_ops { /* Filter manipulation */ struct tcf_proto __rcu ** (*tcf_chain)(struct Qdisc *, unsigned long); + bool (*tcf_cl_offload)(u32 classid); unsigned long (*bind_tcf)(struct Qdisc *, unsigned long, u32 classid); void (*unbind_tcf)(struct Qdisc *, unsigned long); diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c index 730aacafc22d..b3b7978f4182 100644 --- a/net/sched/cls_flower.c +++ b/net/sched/cls_flower.c @@ -171,7 +171,7 @@ static void fl_hw_destroy_filter(struct tcf_proto *tp, unsigned long cookie) struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, 0)) + if (!tc_should_offload(dev, tp, 0)) return; offload.command = TC_CLSFLOWER_DESTROY; @@ -194,7 +194,7 @@ static void fl_hw_replace_filter(struct tcf_proto *tp, struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, flags)) + if (!tc_should_offload(dev, tp, flags)) return; offload.command = TC_CLSFLOWER_REPLACE; @@ -216,7 +216,7 @@ static void fl_hw_update_stats(struct tcf_proto *tp, struct cls_fl_filter *f) struct tc_cls_flower_offload offload = {0}; struct tc_to_netdev tc; - if (!tc_should_offload(dev, 0)) + if (!tc_should_offload(dev, tp, 0)) return; offload.command = TC_CLSFLOWER_STATS; diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index fe05449537a3..27b99fd774d7 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -440,7 +440,7 @@ static void u32_remove_hw_knode(struct tcf_proto *tp, u32 handle) offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, 0)) { + if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_KNODE; offload.cls_u32->knode.handle = handle; dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, @@ -457,7 +457,7 @@ static int u32_replace_hw_hnode(struct tcf_proto *tp, struct tc_to_netdev offload; int err; - if (!tc_should_offload(dev, flags)) + if (!tc_should_offload(dev, tp, flags)) return tc_skip_sw(flags) ? -EINVAL : 0; offload.type = TC_SETUP_CLSU32; @@ -485,7 +485,7 @@ static void u32_clear_hw_hnode(struct tcf_proto *tp, struct tc_u_hnode *h) offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, 0)) { + if (tc_should_offload(dev, tp, 0)) { offload.cls_u32->command = TC_CLSU32_DELETE_HNODE; offload.cls_u32->hnode.divisor = h->divisor; offload.cls_u32->hnode.handle = h->handle; @@ -508,7 +508,7 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, flags)) { + if (tc_should_offload(dev, tp, flags)) { offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; offload.cls_u32->knode.handle = n->handle; offload.cls_u32->knode.fshift = n->fshift; diff --git a/net/sched/sch_ingress.c b/net/sched/sch_ingress.c index 10adbc617905..8fe6999b642a 100644 --- a/net/sched/sch_ingress.c +++ b/net/sched/sch_ingress.c @@ -27,6 +27,11 @@ static unsigned long ingress_get(struct Qdisc *sch, u32 classid) return TC_H_MIN(classid) + 1; } +static bool ingress_cl_offload(u32 classid) +{ + return true; +} + static unsigned long ingress_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { @@ -86,6 +91,7 @@ static const struct Qdisc_class_ops ingress_class_ops = { .put = ingress_put, .walk = ingress_walk, .tcf_chain = ingress_find_tcf, + .tcf_cl_offload = ingress_cl_offload, .bind_tcf = ingress_bind_filter, .unbind_tcf = ingress_put, }; @@ -110,6 +116,11 @@ static unsigned long clsact_get(struct Qdisc *sch, u32 classid) } } +static bool clsact_cl_offload(u32 classid) +{ + return TC_H_MIN(classid) == TC_H_MIN(TC_H_MIN_INGRESS); +} + static unsigned long clsact_bind_filter(struct Qdisc *sch, unsigned long parent, u32 classid) { @@ -158,6 +169,7 @@ static const struct Qdisc_class_ops clsact_class_ops = { .put = ingress_put, .walk = ingress_walk, .tcf_chain = clsact_find_tcf, + .tcf_cl_offload = clsact_cl_offload, .bind_tcf = clsact_bind_filter, .unbind_tcf = ingress_put, }; -- cgit v1.2.3 From ce3cf4ec0305919fc69a972f6c2b2efd35d36abc Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Mon, 6 Jun 2016 15:07:18 -0700 Subject: tcp: record TLP and ER timer stats in v6 stats The v6 tcp stats scan do not provide TLP and ER timer information correctly like the v4 version . This patch fixes that. Fixes: 6ba8a3b19e76 ("tcp: Tail loss probe (TLP)") Fixes: eed530b6c676 ("tcp: early retransmit") Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- net/ipv6/tcp_ipv6.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 79e33e02f11a..f36c2d076fce 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -1721,7 +1721,9 @@ static void get_tcp6_sock(struct seq_file *seq, struct sock *sp, int i) destp = ntohs(inet->inet_dport); srcp = ntohs(inet->inet_sport); - if (icsk->icsk_pending == ICSK_TIME_RETRANS) { + if (icsk->icsk_pending == ICSK_TIME_RETRANS || + icsk->icsk_pending == ICSK_TIME_EARLY_RETRANS || + icsk->icsk_pending == ICSK_TIME_LOSS_PROBE) { timer_active = 1; timer_expires = icsk->icsk_timeout; } else if (icsk->icsk_pending == ICSK_TIME_PROBE0) { -- cgit v1.2.3 From 0b148def403153a4d1565f1640356cb78ce5109f Mon Sep 17 00:00:00 2001 From: Toshiaki Makita Date: Tue, 7 Jun 2016 19:14:17 +0900 Subject: bridge: Don't insert unnecessary local fdb entry on changing mac address The missing br_vlan_should_use() test caused creation of an unneeded local fdb entry on changing mac address of a bridge device when there is a vlan which is configured on a bridge port but not on the bridge device. Fixes: 2594e9064a57 ("bridge: vlan: add per-vlan struct and move to rhashtables") Signed-off-by: Toshiaki Makita Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- net/bridge/br_fdb.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/bridge/br_fdb.c b/net/bridge/br_fdb.c index dcea4f4c62b3..c18080ad4085 100644 --- a/net/bridge/br_fdb.c +++ b/net/bridge/br_fdb.c @@ -279,6 +279,8 @@ void br_fdb_change_mac_address(struct net_bridge *br, const u8 *newaddr) * change from under us. */ list_for_each_entry(v, &vg->vlan_list, vlist) { + if (!br_vlan_should_use(v)) + continue; f = __br_fdb_get(br, br->dev->dev_addr, v->vid); if (f && f->is_local && !f->dst) fdb_delete_local(br, NULL, f); -- cgit v1.2.3 From 88832a22d6bb50e3b5f9d5ecc6cf26707c35f322 Mon Sep 17 00:00:00 2001 From: Ben Dooks Date: Tue, 7 Jun 2016 19:27:51 +0100 Subject: net-sysfs: fix missing The of_find_net_device_by_node() function is defined in but not included in the .c file that implements it. Fix the following warning by including the header: net/core/net-sysfs.c:1494:19: warning: symbol 'of_find_net_device_by_node' was not declared. Should it be static? Signed-off-by: Ben Dooks Signed-off-by: David S. Miller --- net/core/net-sysfs.c | 1 + 1 file changed, 1 insertion(+) (limited to 'net') diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index 2b3f76fe65f4..7a0b616557ab 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "net-sysfs.h" -- cgit v1.2.3 From a5c5e2da8551eb69e5d5d09d51d526140b5db9fb Mon Sep 17 00:00:00 2001 From: Guillaume Nault Date: Wed, 8 Jun 2016 12:59:17 +0200 Subject: l2tp: fix configuration passed to setup_udp_tunnel_sock() Unused fields of udp_cfg must be all zeros. Otherwise setup_udp_tunnel_sock() fills ->gro_receive and ->gro_complete callbacks with garbage, eventually resulting in panic when used by udp_gro_receive(). [ 72.694123] BUG: unable to handle kernel paging request at ffff880033f87d78 [ 72.695518] IP: [] 0xffff880033f87d78 [ 72.696530] PGD 26e2067 PUD 26e3067 PMD 342ed063 PTE 8000000033f87163 [ 72.696530] Oops: 0011 [#1] SMP KASAN [ 72.696530] Modules linked in: l2tp_ppp l2tp_netlink l2tp_core ip6_udp_tunnel udp_tunnel pptp gre pppox ppp_generic slhc crc32c_intel ghash_clmulni_intel jitterentropy_rng sha256_generic hmac drbg ansi_cprng aesni_intel evdev aes_x86_64 ablk_helper cryptd lrw gf128mul glue_helper serio_raw acpi_cpufreq button proc\ essor ext4 crc16 jbd2 mbcache virtio_blk virtio_net virtio_pci virtio_ring virtio [ 72.696530] CPU: 3 PID: 0 Comm: swapper/3 Not tainted 4.7.0-rc1 #1 [ 72.696530] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Debian-1.8.2-1 04/01/2014 [ 72.696530] task: ffff880035b59700 ti: ffff880035b70000 task.ti: ffff880035b70000 [ 72.696530] RIP: 0010:[] [] 0xffff880033f87d78 [ 72.696530] RSP: 0018:ffff880035f87bc0 EFLAGS: 00010246 [ 72.696530] RAX: ffffed000698f996 RBX: ffff88003326b840 RCX: ffffffff814cc823 [ 72.696530] RDX: ffff88003326b840 RSI: ffff880033e48038 RDI: ffff880034c7c780 [ 72.696530] RBP: ffff880035f87c18 R08: 000000000000a506 R09: 0000000000000000 [ 72.696530] R10: ffff880035f87b38 R11: ffff880034b9344d R12: 00000000ebfea715 [ 72.696530] R13: 0000000000000000 R14: ffff880034c7c780 R15: 0000000000000000 [ 72.696530] FS: 0000000000000000(0000) GS:ffff880035f80000(0000) knlGS:0000000000000000 [ 72.696530] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 72.696530] CR2: ffff880033f87d78 CR3: 0000000033c98000 CR4: 00000000000406a0 [ 72.696530] Stack: [ 72.696530] ffffffff814cc834 ffff880034b93468 0000001481416818 ffff88003326b874 [ 72.696530] ffff880034c7ccb0 ffff880033e48038 ffff88003326b840 ffff880034b93462 [ 72.696530] ffff88003326b88a ffff88003326b88c ffff880034b93468 ffff880035f87c70 [ 72.696530] Call Trace: [ 72.696530] [ 72.696530] [] ? udp_gro_receive+0x1c6/0x1f9 [ 72.696530] [] udp4_gro_receive+0x2b5/0x310 [ 72.696530] [] inet_gro_receive+0x4a3/0x4cd [ 72.696530] [] dev_gro_receive+0x584/0x7a3 [ 72.696530] [] ? __lock_is_held+0x29/0x64 [ 72.696530] [] napi_gro_receive+0x124/0x21d [ 72.696530] [] virtnet_receive+0x8df/0x8f6 [virtio_net] [ 72.696530] [] virtnet_poll+0x1d/0x8d [virtio_net] [ 72.696530] [] net_rx_action+0x15b/0x3b9 [ 72.696530] [] __do_softirq+0x216/0x546 [ 72.696530] [] irq_exit+0x49/0xb6 [ 72.696530] [] do_IRQ+0xe2/0xfa [ 72.696530] [] common_interrupt+0x89/0x89 [ 72.696530] [ 72.696530] [] ? trace_hardirqs_on_caller+0x229/0x270 [ 72.696530] [] ? default_idle+0x1c/0x2d [ 72.696530] [] ? default_idle+0x1a/0x2d [ 72.696530] [] arch_cpu_idle+0xa/0xc [ 72.696530] [] default_idle_call+0x1a/0x1c [ 72.696530] [] cpu_startup_entry+0x15b/0x20f [ 72.696530] [] start_secondary+0x12c/0x133 [ 72.696530] Code: ff ff ff ff ff ff ff ff ff ff 7f ff ff ff ff ff ff ff 7f 00 7e f8 33 00 88 ff ff 6d 61 58 81 ff ff ff ff 5e de 0a 81 ff ff ff ff <00> 5c e2 34 00 88 ff ff 00 00 00 00 00 00 00 00 00 00 00 00 00 [ 72.696530] RIP [] 0xffff880033f87d78 [ 72.696530] RSP [ 72.696530] CR2: ffff880033f87d78 [ 72.696530] ---[ end trace ad7758b9a1dccf99 ]--- [ 72.696530] Kernel panic - not syncing: Fatal exception in interrupt [ 72.696530] Kernel Offset: disabled [ 72.696530] ---[ end Kernel panic - not syncing: Fatal exception in interrupt v2: use empty initialiser instead of "{ NULL }" to avoid relying on first field's type. Fixes: 38fd2af24fcf ("udp: Add socket based GRO and config") Signed-off-by: Guillaume Nault Signed-off-by: David S. Miller --- net/l2tp/l2tp_core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/l2tp/l2tp_core.c b/net/l2tp/l2tp_core.c index 6edfa9980314..1e40dacaa137 100644 --- a/net/l2tp/l2tp_core.c +++ b/net/l2tp/l2tp_core.c @@ -1581,7 +1581,7 @@ int l2tp_tunnel_create(struct net *net, int fd, int version, u32 tunnel_id, u32 /* Mark socket as an encapsulation socket. See net/ipv4/udp.c */ tunnel->encap = encap; if (encap == L2TP_ENCAPTYPE_UDP) { - struct udp_tunnel_sock_cfg udp_cfg; + struct udp_tunnel_sock_cfg udp_cfg = { }; udp_cfg.sk_user_data = tunnel; udp_cfg.encap_type = UDP_ENCAP_L2TPINUDP; -- cgit v1.2.3 From 00bc0ef5880dc7b82f9c320dead4afaad48e47be Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 8 Jun 2016 15:13:34 +0200 Subject: ipv6: Skip XFRM lookup if dst_entry in socket cache is valid At present we perform an xfrm_lookup() for each UDPv6 message we send. The lookup involves querying the flow cache (flow_cache_lookup) and, in case of a cache miss, creating an XFRM bundle. If we miss the flow cache, we can end up creating a new bundle and deriving the path MTU (xfrm_init_pmtu) from on an already transformed dst_entry, which we pass from the socket cache (sk->sk_dst_cache) down to xfrm_lookup(). This can happen only if we're caching the dst_entry in the socket, that is when we're using a connected UDP socket. To put it another way, the path MTU shrinks each time we miss the flow cache, which later on leads to incorrectly fragmented payload. It can be observed with ESPv6 in transport mode: 1) Set up a transformation and lower the MTU to trigger fragmentation # ip xfrm policy add dir out src ::1 dst ::1 \ tmpl src ::1 dst ::1 proto esp spi 1 # ip xfrm state add src ::1 dst ::1 \ proto esp spi 1 enc 'aes' 0x0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b0b # ip link set dev lo mtu 1500 2) Monitor the packet flow and set up an UDP sink # tcpdump -ni lo -ttt & # socat udp6-listen:12345,fork /dev/null & 3) Send a datagram that needs fragmentation with a connected socket # perl -e 'print "@" x 1470 | socat - udp6:[::1]:12345 2016/06/07 18:52:52 socat[724] E read(3, 0x555bb3d5ba00, 8192): Protocol error 00:00:00.000000 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x2), length 1448 00:00:00.000014 IP6 ::1 > ::1: frag (1448|32) 00:00:00.000050 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x3), length 1272 (^ ICMPv6 Parameter Problem) 00:00:00.000022 IP6 ::1 > ::1: ESP(spi=0x00000001,seq=0x5), length 136 4) Compare it to a non-connected socket # perl -e 'print "@" x 1500' | socat - udp6-sendto:[::1]:12345 00:00:40.535488 IP6 ::1 > ::1: frag (0|1448) ESP(spi=0x00000001,seq=0x6), length 1448 00:00:00.000010 IP6 ::1 > ::1: frag (1448|64) What happens in step (3) is: 1) when connecting the socket in __ip6_datagram_connect(), we perform an XFRM lookup, miss the flow cache, create an XFRM bundle, and cache the destination, 2) afterwards, when sending the datagram, we perform an XFRM lookup, again, miss the flow cache (due to mismatch of flowi6_iif and flowi6_oif, which is an issue of its own), and recreate an XFRM bundle based on the cached (and already transformed) destination. To prevent the recreation of an XFRM bundle, avoid an XFRM lookup altogether whenever we already have a destination entry cached in the socket. This prevents the path MTU shrinkage and brings us on par with UDPv4. The fix also benefits connected PINGv6 sockets, another user of ip6_sk_dst_lookup_flow(), who also suffer messages being transformed twice. Joint work with Hannes Frederic Sowa. Reported-by: Jan Tluka Signed-off-by: Jakub Sitnicki Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- net/ipv6/ip6_output.c | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index cbf127ae7c67..635b8d340cdb 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -1071,17 +1071,12 @@ struct dst_entry *ip6_sk_dst_lookup_flow(struct sock *sk, struct flowi6 *fl6, const struct in6_addr *final_dst) { struct dst_entry *dst = sk_dst_check(sk, inet6_sk(sk)->dst_cookie); - int err; dst = ip6_sk_dst_check(sk, dst, fl6); + if (!dst) + dst = ip6_dst_lookup_flow(sk, fl6, final_dst); - err = ip6_dst_lookup_tail(sock_net(sk), sk, &dst, fl6); - if (err) - return ERR_PTR(err); - if (final_dst) - fl6->daddr = *final_dst; - - return xfrm_lookup_route(sock_net(sk), dst, flowi6_to_flowi(fl6), sk, 0); + return dst; } EXPORT_SYMBOL_GPL(ip6_sk_dst_lookup_flow); -- cgit v1.2.3 From e0d194adfa9f5f473068cc546bee60fb84ab77ba Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 8 Jun 2016 06:19:45 -0700 Subject: net_sched: add missing paddattr description "make htmldocs" complains otherwise: .//net/core/gen_stats.c:65: warning: No description found for parameter 'padattr' .//net/core/gen_stats.c:101: warning: No description found for parameter 'padattr' Fixes: 9854518ea04d ("sched: align nlattr properly when needed") Signed-off-by: Eric Dumazet Reported-by: kbuild test robot Acked-by: Nicolas Dichtel Signed-off-by: David S. Miller --- net/core/gen_stats.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c index f96ee8b9478d..be873e4e3125 100644 --- a/net/core/gen_stats.c +++ b/net/core/gen_stats.c @@ -47,6 +47,7 @@ nla_put_failure: * @xstats_type: TLV type for backward compatibility xstats TLV * @lock: statistics lock * @d: dumping handle + * @padattr: padding attribute * * Initializes the dumping handle, grabs the statistic lock and appends * an empty TLV header to the socket buffer for use a container for all @@ -87,6 +88,7 @@ EXPORT_SYMBOL(gnet_stats_start_copy_compat); * @type: TLV type for top level statistic TLV * @lock: statistics lock * @d: dumping handle + * @padattr: padding attribute * * Initializes the dumping handle, grabs the statistic lock and appends * an empty TLV header to the socket buffer for use a container for all -- cgit v1.2.3 From 6eef3801e719e4ea9c15c01b1d77706f47331166 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Jun 2016 20:11:03 +0100 Subject: net: cls_u32: catch all hardware offload errors Errors reported by u32_replace_hw_hnode() were not propagated. Signed-off-by: Jakub Kicinski Acked-by: Sridhar Samudrala Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 27b99fd774d7..54ab32a8ff4c 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -922,11 +922,17 @@ static int u32_change(struct net *net, struct sk_buff *in_skb, ht->divisor = divisor; ht->handle = handle; ht->prio = tp->prio; + + err = u32_replace_hw_hnode(tp, ht, flags); + if (err) { + kfree(ht); + return err; + } + RCU_INIT_POINTER(ht->next, tp_c->hlist); rcu_assign_pointer(tp_c->hlist, ht); *arg = (unsigned long)ht; - u32_replace_hw_hnode(tp, ht, flags); return 0; } -- cgit v1.2.3 From 201c44bd8ffa899f07b7b322a73e19baf0ada1e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Jun 2016 20:11:04 +0100 Subject: net: cls_u32: be more strict about skip-sw flag for knodes Return an error if user requested skip-sw and the underlaying hardware cannot handle tc offloads (or offloads are disabled). This patch fixes the knode handling. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- net/sched/cls_u32.c | 37 +++++++++++++++++++------------------ 1 file changed, 19 insertions(+), 18 deletions(-) (limited to 'net') diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c index 54ab32a8ff4c..ffe593efe930 100644 --- a/net/sched/cls_u32.c +++ b/net/sched/cls_u32.c @@ -508,27 +508,28 @@ static int u32_replace_hw_knode(struct tcf_proto *tp, offload.type = TC_SETUP_CLSU32; offload.cls_u32 = &u32_offload; - if (tc_should_offload(dev, tp, flags)) { - offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; - offload.cls_u32->knode.handle = n->handle; - offload.cls_u32->knode.fshift = n->fshift; + if (!tc_should_offload(dev, tp, flags)) + return tc_skip_sw(flags) ? -EINVAL : 0; + + offload.cls_u32->command = TC_CLSU32_REPLACE_KNODE; + offload.cls_u32->knode.handle = n->handle; + offload.cls_u32->knode.fshift = n->fshift; #ifdef CONFIG_CLS_U32_MARK - offload.cls_u32->knode.val = n->val; - offload.cls_u32->knode.mask = n->mask; + offload.cls_u32->knode.val = n->val; + offload.cls_u32->knode.mask = n->mask; #else - offload.cls_u32->knode.val = 0; - offload.cls_u32->knode.mask = 0; + offload.cls_u32->knode.val = 0; + offload.cls_u32->knode.mask = 0; #endif - offload.cls_u32->knode.sel = &n->sel; - offload.cls_u32->knode.exts = &n->exts; - if (n->ht_down) - offload.cls_u32->knode.link_handle = n->ht_down->handle; - - err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, - tp->protocol, &offload); - if (tc_skip_sw(flags)) - return err; - } + offload.cls_u32->knode.sel = &n->sel; + offload.cls_u32->knode.exts = &n->exts; + if (n->ht_down) + offload.cls_u32->knode.link_handle = n->ht_down->handle; + + err = dev->netdev_ops->ndo_setup_tc(dev, tp->q->handle, + tp->protocol, &offload); + if (tc_skip_sw(flags)) + return err; return 0; } -- cgit v1.2.3 From 0a46baaf634663d28038fc137239b71bf5385e5a Mon Sep 17 00:00:00 2001 From: Shweta Choudaha Date: Wed, 8 Jun 2016 20:15:43 +0100 Subject: ip6gre: Allow live link address change The ip6 GRE tap device should not be forced to down state to change the mac address and should allow live address change for tap device similar to ipv4 gre. Signed-off-by: Shweta Choudaha Signed-off-by: David S. Miller --- net/ipv6/ip6_gre.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'net') diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index f4ac2842d4d9..fdc9de276ab1 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1256,6 +1256,8 @@ static int ip6gre_tap_init(struct net_device *dev) if (ret) return ret; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; + tunnel = netdev_priv(dev); ip6gre_tnl_link_config(tunnel, 1); @@ -1289,6 +1291,7 @@ static void ip6gre_tap_setup(struct net_device *dev) dev->features |= NETIF_F_NETNS_LOCAL; dev->priv_flags &= ~IFF_TX_SKB_SHARING; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE; } static bool ip6gre_netlink_encap_parms(struct nlattr *data[], -- cgit v1.2.3 From 9b15350f0d5c401c02eca15c4e6ca0603cff1a41 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 8 Jun 2016 23:23:01 +0200 Subject: qfq: don't leak skb if kzalloc fails When we need to create a new aggregate to enqueue the skb we call kzalloc. If that fails we returned ENOBUFS without freeing the skb. Spotted during code review. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- net/sched/sch_qfq.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/sched/sch_qfq.c b/net/sched/sch_qfq.c index 8d2d8d953432..f18857febdad 100644 --- a/net/sched/sch_qfq.c +++ b/net/sched/sch_qfq.c @@ -1235,8 +1235,10 @@ static int qfq_enqueue(struct sk_buff *skb, struct Qdisc *sch) cl->agg->lmax, qdisc_pkt_len(skb), cl->common.classid); err = qfq_change_agg(sch, cl, cl->agg->class_weight, qdisc_pkt_len(skb)); - if (err) - return err; + if (err) { + cl->qstats.drops++; + return qdisc_drop(skb, sch); + } } err = qdisc_enqueue(skb, cl->qdisc); -- cgit v1.2.3 From 6cbf6236d54c24b9a29e6892549c25b6902b44ce Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 9 Jun 2016 09:40:55 +0200 Subject: cfg80211: remove get/set antenna and tx power warnings Since set_tx_power and set_antenna are frequently implemented without the matching get_tx_power/get_antenna, we shouldn't have added warnings for those. Remove them. The remaining ones are correct and need to be implemented symmetrically for correct operation. Cc: stable@vger.kernel.org Fixes: de3bb771f471 ("cfg80211: add more warnings for inconsistent ops") Signed-off-by: Johannes Berg --- net/wireless/core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'net') diff --git a/net/wireless/core.c b/net/wireless/core.c index d25c82bc1bbe..ecca3896b9f7 100644 --- a/net/wireless/core.c +++ b/net/wireless/core.c @@ -363,8 +363,6 @@ struct wiphy *wiphy_new_nm(const struct cfg80211_ops *ops, int sizeof_priv, WARN_ON(ops->remain_on_channel && !ops->cancel_remain_on_channel); WARN_ON(ops->tdls_channel_switch && !ops->tdls_cancel_channel_switch); WARN_ON(ops->add_tx_ts && !ops->del_tx_ts); - WARN_ON(ops->set_tx_power && !ops->get_tx_power); - WARN_ON(ops->set_antenna && !ops->get_antenna); alloc_size = sizeof(*rdev) + sizeof_priv; -- cgit v1.2.3 From 3d5fdff46c4b2b9534fa2f9fc78e90a48e0ff724 Mon Sep 17 00:00:00 2001 From: Prasun Maiti Date: Mon, 6 Jun 2016 20:04:19 +0530 Subject: wext: Fix 32 bit iwpriv compatibility issue with 64 bit Kernel iwpriv app uses iw_point structure to send data to Kernel. The iw_point structure holds a pointer. For compatibility Kernel converts the pointer as required for WEXT IOCTLs (SIOCIWFIRST to SIOCIWLAST). Some drivers may use iw_handler_def.private_args to populate iwpriv commands instead of iw_handler_def.private. For those case, the IOCTLs from SIOCIWFIRSTPRIV to SIOCIWLASTPRIV will follow the path ndo_do_ioctl(). Accordingly when the filled up iw_point structure comes from 32 bit iwpriv to 64 bit Kernel, Kernel will not convert the pointer and sends it to driver. So, the driver may get the invalid data. The pointer conversion for the IOCTLs (SIOCIWFIRSTPRIV to SIOCIWLASTPRIV), which follow the path ndo_do_ioctl(), is mandatory. This patch adds pointer conversion from 32 bit to 64 bit and vice versa, if the ioctl comes from 32 bit iwpriv to 64 bit Kernel. Cc: stable@vger.kernel.org Signed-off-by: Prasun Maiti Signed-off-by: Ujjal Roy Tested-by: Dibyajyoti Ghosh Signed-off-by: Johannes Berg --- net/wireless/wext-core.c | 25 +++++++++++++++++++++++-- 1 file changed, 23 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/wireless/wext-core.c b/net/wireless/wext-core.c index 6250b1cfcde5..dbb2738e356a 100644 --- a/net/wireless/wext-core.c +++ b/net/wireless/wext-core.c @@ -958,8 +958,29 @@ static int wireless_process_ioctl(struct net *net, struct ifreq *ifr, return private(dev, iwr, cmd, info, handler); } /* Old driver API : call driver ioctl handler */ - if (dev->netdev_ops->ndo_do_ioctl) - return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + if (dev->netdev_ops->ndo_do_ioctl) { +#ifdef CONFIG_COMPAT + if (info->flags & IW_REQUEST_FLAG_COMPAT) { + int ret = 0; + struct iwreq iwr_lcl; + struct compat_iw_point *iwp_compat = (void *) &iwr->u.data; + + memcpy(&iwr_lcl, iwr, sizeof(struct iwreq)); + iwr_lcl.u.data.pointer = compat_ptr(iwp_compat->pointer); + iwr_lcl.u.data.length = iwp_compat->length; + iwr_lcl.u.data.flags = iwp_compat->flags; + + ret = dev->netdev_ops->ndo_do_ioctl(dev, (void *) &iwr_lcl, cmd); + + iwp_compat->pointer = ptr_to_compat(iwr_lcl.u.data.pointer); + iwp_compat->length = iwr_lcl.u.data.length; + iwp_compat->flags = iwr_lcl.u.data.flags; + + return ret; + } else +#endif + return dev->netdev_ops->ndo_do_ioctl(dev, ifr, cmd); + } return -EOPNOTSUPP; } -- cgit v1.2.3 From 719c44d340beeecd22cbda91b00ef55585b3c1a0 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 7 Jun 2016 12:06:34 -0400 Subject: packet: compat support for sock_fprog Socket option PACKET_FANOUT_DATA takes a struct sock_fprog as argument if PACKET_FANOUT has mode PACKET_FANOUT_CBPF. This structure contains a pointer into user memory. If userland is 32-bit and kernel is 64-bit the two disagree about the layout of struct sock_fprog. Add compat setsockopt support to convert a 32-bit compat_sock_fprog to a 64-bit sock_fprog. This is analogous to compat_sock_fprog support for SO_REUSEPORT added in commit 1957598840f4 ("soreuseport: add compat case for setsockopt SO_ATTACH_REUSEPORT_CBPF"). Reported-by: Daniel Borkmann Signed-off-by: Willem de Bruijn Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/net/compat.h | 1 + net/compat.c | 17 +++++++++++++++-- net/packet/af_packet.c | 25 +++++++++++++++++++++++++ 3 files changed, 41 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/net/compat.h b/include/net/compat.h index 48103cf94e97..13de0ccaa059 100644 --- a/include/net/compat.h +++ b/include/net/compat.h @@ -42,6 +42,7 @@ int compat_sock_get_timestampns(struct sock *, struct timespec __user *); int get_compat_msghdr(struct msghdr *, struct compat_msghdr __user *, struct sockaddr __user **, struct iovec **); +struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval); asmlinkage long compat_sys_sendmsg(int, struct compat_msghdr __user *, unsigned int); asmlinkage long compat_sys_sendmmsg(int, struct compat_mmsghdr __user *, diff --git a/net/compat.c b/net/compat.c index 1373947efb50..1cd2ec046164 100644 --- a/net/compat.c +++ b/net/compat.c @@ -309,8 +309,8 @@ void scm_detach_fds_compat(struct msghdr *kmsg, struct scm_cookie *scm) __scm_destroy(scm); } -static int do_set_attach_filter(struct socket *sock, int level, int optname, - char __user *optval, unsigned int optlen) +/* allocate a 64-bit sock_fprog on the user stack for duration of syscall. */ +struct sock_fprog __user *get_compat_bpf_fprog(char __user *optval) { struct compat_sock_fprog __user *fprog32 = (struct compat_sock_fprog __user *)optval; struct sock_fprog __user *kfprog = compat_alloc_user_space(sizeof(struct sock_fprog)); @@ -323,6 +323,19 @@ static int do_set_attach_filter(struct socket *sock, int level, int optname, __get_user(ptr, &fprog32->filter) || __put_user(len, &kfprog->len) || __put_user(compat_ptr(ptr), &kfprog->filter)) + return NULL; + + return kfprog; +} +EXPORT_SYMBOL_GPL(get_compat_bpf_fprog); + +static int do_set_attach_filter(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct sock_fprog __user *kfprog; + + kfprog = get_compat_bpf_fprog(optval); + if (!kfprog) return -EFAULT; return sock_setsockopt(sock, level, optname, (char __user *)kfprog, diff --git a/net/packet/af_packet.c b/net/packet/af_packet.c index 4040eb92d9c9..9bff6ef16fa7 100644 --- a/net/packet/af_packet.c +++ b/net/packet/af_packet.c @@ -93,6 +93,7 @@ #include #endif #include +#include #include "internal.h" @@ -3940,6 +3941,27 @@ static int packet_getsockopt(struct socket *sock, int level, int optname, } +#ifdef CONFIG_COMPAT +static int compat_packet_setsockopt(struct socket *sock, int level, int optname, + char __user *optval, unsigned int optlen) +{ + struct packet_sock *po = pkt_sk(sock->sk); + + if (level != SOL_PACKET) + return -ENOPROTOOPT; + + if (optname == PACKET_FANOUT_DATA && + po->fanout && po->fanout->type == PACKET_FANOUT_CBPF) { + optval = (char __user *)get_compat_bpf_fprog(optval); + if (!optval) + return -EFAULT; + optlen = sizeof(struct sock_fprog); + } + + return packet_setsockopt(sock, level, optname, optval, optlen); +} +#endif + static int packet_notifier(struct notifier_block *this, unsigned long msg, void *ptr) { @@ -4416,6 +4438,9 @@ static const struct proto_ops packet_ops = { .shutdown = sock_no_shutdown, .setsockopt = packet_setsockopt, .getsockopt = packet_getsockopt, +#ifdef CONFIG_COMPAT + .compat_setsockopt = compat_packet_setsockopt, +#endif .sendmsg = packet_sendmsg, .recvmsg = packet_recvmsg, .mmap = packet_mmap, -- cgit v1.2.3