From c028c6309a9f9b385ba8c0c984eb2b6c3f368650 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 30 Jan 2018 13:17:38 +0100 Subject: cfg80211: use only 1Mbps for basic rates in mesh Mesh used to use the mandatory rates as basic rates, but we got the calculation of mandatory rates wrong until some time ago. Fix this this broke interoperability with older versions since now more basic rates are required, and thus the MBSS isn't the same and the network stops working. Fix this by simply using only 1Mbps as the basic rate in 2.4GHz. Since the changed mandatory rates only affected 2.4GHz, this is all we need to make it work again. Reported-and-tested-by: Matthias Schiffer Fixes: 1bd773c077de ("wireless: set correct mandatory rate flags") Signed-off-by: Johannes Berg --- net/wireless/mesh.c | 25 ++++++++++++++++++++++--- 1 file changed, 22 insertions(+), 3 deletions(-) diff --git a/net/wireless/mesh.c b/net/wireless/mesh.c index 51aa55618ef7..b12da6ef3c12 100644 --- a/net/wireless/mesh.c +++ b/net/wireless/mesh.c @@ -170,9 +170,28 @@ int __cfg80211_join_mesh(struct cfg80211_registered_device *rdev, enum nl80211_bss_scan_width scan_width; struct ieee80211_supported_band *sband = rdev->wiphy.bands[setup->chandef.chan->band]; - scan_width = cfg80211_chandef_to_scan_width(&setup->chandef); - setup->basic_rates = ieee80211_mandatory_rates(sband, - scan_width); + + if (setup->chandef.chan->band == NL80211_BAND_2GHZ) { + int i; + + /* + * Older versions selected the mandatory rates for + * 2.4 GHz as well, but were broken in that only + * 1 Mbps was regarded as a mandatory rate. Keep + * using just 1 Mbps as the default basic rate for + * mesh to be interoperable with older versions. + */ + for (i = 0; i < sband->n_bitrates; i++) { + if (sband->bitrates[i].bitrate == 10) { + setup->basic_rates = BIT(i); + break; + } + } + } else { + scan_width = cfg80211_chandef_to_scan_width(&setup->chandef); + setup->basic_rates = ieee80211_mandatory_rates(sband, + scan_width); + } } err = cfg80211_chandef_dfs_required(&rdev->wiphy, -- cgit v1.2.3 From c4de37ee2b55deac7d6aeac33e02e3d6be243898 Mon Sep 17 00:00:00 2001 From: Peter Oh Date: Fri, 26 Jan 2018 14:02:37 -0800 Subject: mac80211: mesh: fix wrong mesh TTL offset calculation mesh TTL offset in Mesh Channel Switch Parameters element depends on not only Secondary Channel Offset element, but also affected by HT Control field and Wide Bandwidth Channel Switch element. So use element structure to manipulate mesh channel swich param IE after removing its constant attribution to correct the miscalculation. Signed-off-by: Peter Oh Signed-off-by: Johannes Berg --- net/mac80211/ieee80211_i.h | 2 +- net/mac80211/mesh.c | 17 ++++++----------- 2 files changed, 7 insertions(+), 12 deletions(-) diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h index 885d00b41911..61db1fb156ed 100644 --- a/net/mac80211/ieee80211_i.h +++ b/net/mac80211/ieee80211_i.h @@ -1467,7 +1467,7 @@ struct ieee802_11_elems { const struct ieee80211_timeout_interval_ie *timeout_int; const u8 *opmode_notif; const struct ieee80211_sec_chan_offs_ie *sec_chan_offs; - const struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie; + struct ieee80211_mesh_chansw_params_ie *mesh_chansw_params_ie; const struct ieee80211_bss_max_idle_period_ie *max_idle_period_ie; /* length of them, respectively */ diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c index 5e27364e10ac..23555536bad5 100644 --- a/net/mac80211/mesh.c +++ b/net/mac80211/mesh.c @@ -1253,13 +1253,12 @@ int ieee80211_mesh_csa_beacon(struct ieee80211_sub_if_data *sdata, } static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, - struct ieee80211_mgmt *mgmt, size_t len) + struct ieee80211_mgmt *mgmt, size_t len, + struct ieee802_11_elems *elems) { struct ieee80211_mgmt *mgmt_fwd; struct sk_buff *skb; struct ieee80211_local *local = sdata->local; - u8 *pos = mgmt->u.action.u.chan_switch.variable; - size_t offset_ttl; skb = dev_alloc_skb(local->tx_headroom + len); if (!skb) @@ -1267,13 +1266,9 @@ static int mesh_fwd_csa_frame(struct ieee80211_sub_if_data *sdata, skb_reserve(skb, local->tx_headroom); mgmt_fwd = skb_put(skb, len); - /* offset_ttl is based on whether the secondary channel - * offset is available or not. Subtract 1 from the mesh TTL - * and disable the initiator flag before forwarding. - */ - offset_ttl = (len < 42) ? 7 : 10; - *(pos + offset_ttl) -= 1; - *(pos + offset_ttl + 1) &= ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; + elems->mesh_chansw_params_ie->mesh_ttl--; + elems->mesh_chansw_params_ie->mesh_flags &= + ~WLAN_EID_CHAN_SWITCH_PARAM_INITIATOR; memcpy(mgmt_fwd, mgmt, len); eth_broadcast_addr(mgmt_fwd->da); @@ -1321,7 +1316,7 @@ static void mesh_rx_csa_frame(struct ieee80211_sub_if_data *sdata, /* forward or re-broadcast the CSA frame */ if (fwd_csa) { - if (mesh_fwd_csa_frame(sdata, mgmt, len) < 0) + if (mesh_fwd_csa_frame(sdata, mgmt, len, &elems) < 0) mcsa_dbg(sdata, "Failed to forward the CSA frame"); } } -- cgit v1.2.3 From fd2c19b2a28bb574b3914466a68ef830212d5cf7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 31 Jan 2018 09:16:56 +0100 Subject: netfilter: x_tables: remove size check Back in 2002 vmalloc used to BUG on too large sizes. We are much better behaved these days and vmalloc simply returns NULL for those. Remove the check as it simply not needed and the comment is even misleading. Link: http://lkml.kernel.org/r/20180131081916.GO21609@dhcp22.suse.cz Suggested-by: Andrew Morton Signed-off-by: Michal Hocko Reviewed-by: Andrew Morton Cc: Florian Westphal Cc: David S. Miller Signed-off-by: Andrew Morton Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 4 ---- 1 file changed, 4 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 2f685ee1f9c8..97e06a04c0d4 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -1004,10 +1004,6 @@ struct xt_table_info *xt_alloc_table_info(unsigned int size) if (sz < sizeof(*info)) return NULL; - /* Pedantry: prevent them from hitting BUG() in vmalloc.c --RR */ - if ((size >> PAGE_SHIFT) + 2 > totalram_pages) - return NULL; - /* __GFP_NORETRY is not fully supported by kvmalloc but it should * work reasonably well if sz is too large and bail out rather * than shoot all processes down before realizing there is nothing -- cgit v1.2.3 From b3e456fce9f51d6276e576d00271e2813c1b8b67 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Wed, 7 Feb 2018 21:59:17 -0800 Subject: netfilter: ipt_CLUSTERIP: fix a race condition of proc file creation There is a race condition between clusterip_config_entry_put() and clusterip_config_init(), after we release the spinlock in clusterip_config_entry_put(), a new proc file with a same IP could be created immediately since it is already removed from the configs list, therefore it triggers this warning: ------------[ cut here ]------------ proc_dir_entry 'ipt_CLUSTERIP/172.20.0.170' already registered WARNING: CPU: 1 PID: 4152 at fs/proc/generic.c:330 proc_register+0x2a4/0x370 fs/proc/generic.c:329 Kernel panic - not syncing: panic_on_warn set ... As a quick fix, just move the proc_remove() inside the spinlock. Reported-by: Fixes: 6c5d5cfbe3c5 ("netfilter: ipt_CLUSTERIP: check duplicate config when initializing") Tested-by: Paolo Abeni Cc: Xin Long Cc: Pablo Neira Ayuso Signed-off-by: Cong Wang Reviewed-by: Xin Long Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 3a84a60f6b39..1ff72b87a066 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -107,12 +107,6 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) local_bh_disable(); if (refcount_dec_and_lock(&c->entries, &cn->lock)) { - list_del_rcu(&c->list); - spin_unlock(&cn->lock); - local_bh_enable(); - - unregister_netdevice_notifier(&c->notifier); - /* In case anyone still accesses the file, the open/close * functions are also incrementing the refcount on their own, * so it's safe to remove the entry even if it's in use. */ @@ -120,6 +114,12 @@ clusterip_config_entry_put(struct net *net, struct clusterip_config *c) if (cn->procdir) proc_remove(c->pde); #endif + list_del_rcu(&c->list); + spin_unlock(&cn->lock); + local_bh_enable(); + + unregister_netdevice_notifier(&c->notifier); + return; } local_bh_enable(); -- cgit v1.2.3 From 9a3efb6b661f71d5675369ace9257833f0e78ef3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Tue, 13 Feb 2018 19:00:21 -0800 Subject: bpf: fix memory leak in lpm_trie map_free callback function There is a memory leak happening in lpm_trie map_free callback function trie_free. The trie structure itself does not get freed. Also, trie_free function did not do synchronize_rcu before freeing various data structures. This is incorrect as some rcu_read_lock region(s) for lookup, update, delete or get_next_key may not complete yet. The fix is to add synchronize_rcu in the beginning of trie_free. The useless spin_lock is removed from this function as well. Fixes: b95a5c4db09b ("bpf: add a longest prefix match trie map implementation") Reported-by: Mathieu Malaterre Reported-by: Alexei Starovoitov Tested-by: Mathieu Malaterre Signed-off-by: Yonghong Song Signed-off-by: Alexei Starovoitov --- kernel/bpf/lpm_trie.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index 7b469d10d0e9..a75e02c961b5 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -555,7 +555,10 @@ static void trie_free(struct bpf_map *map) struct lpm_trie_node __rcu **slot; struct lpm_trie_node *node; - raw_spin_lock(&trie->lock); + /* Wait for outstanding programs to complete + * update/lookup/delete/get_next_key and free the trie. + */ + synchronize_rcu(); /* Always start at the root and walk down to a node that has no * children. Then free that node, nullify its reference in the parent @@ -569,7 +572,7 @@ static void trie_free(struct bpf_map *map) node = rcu_dereference_protected(*slot, lockdep_is_held(&trie->lock)); if (!node) - goto unlock; + goto out; if (rcu_access_pointer(node->child[0])) { slot = &node->child[0]; @@ -587,8 +590,8 @@ static void trie_free(struct bpf_map *map) } } -unlock: - raw_spin_unlock(&trie->lock); +out: + kfree(trie); } static int trie_get_next_key(struct bpf_map *map, void *_key, void *_next_key) -- cgit v1.2.3 From 952fad8e323975c4e826b659087d2648777594a6 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 13 Feb 2018 15:33:52 -0800 Subject: bpf: fix sock_map_alloc() error path In case user program provides silly parameters, we want a map_alloc() handler to return an error, not a NULL pointer, otherwise we crash later in find_and_alloc_map() Fixes: 1aa12bdf1bfb ("bpf: sockmap, add sock close() hook to remove socks") Signed-off-by: Eric Dumazet Reported-by: syzbot Acked-by: John Fastabend Signed-off-by: Alexei Starovoitov --- kernel/bpf/sockmap.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/sockmap.c b/kernel/bpf/sockmap.c index 48c33417d13c..a927e89dad6e 100644 --- a/kernel/bpf/sockmap.c +++ b/kernel/bpf/sockmap.c @@ -521,8 +521,8 @@ static struct smap_psock *smap_init_psock(struct sock *sock, static struct bpf_map *sock_map_alloc(union bpf_attr *attr) { struct bpf_stab *stab; - int err = -EINVAL; u64 cost; + int err; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -547,6 +547,7 @@ static struct bpf_map *sock_map_alloc(union bpf_attr *attr) /* make sure page count doesn't overflow */ cost = (u64) stab->map.max_entries * sizeof(struct sock *); + err = -EINVAL; if (cost >= U32_MAX - PAGE_SIZE) goto free_stab; -- cgit v1.2.3 From 7fc17e909edfb9bf421ee04e981d3d474175c7c7 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 14 Feb 2018 22:17:34 +0800 Subject: bpf: cpumap: use GFP_KERNEL instead of GFP_ATOMIC in __cpu_map_entry_alloc() There're several implications after commit 0bf7800f1799 ("ptr_ring: try vmalloc() when kmalloc() fails") with the using of vmalloc() since can't allow GFP_ATOMIC but mandate GFP_KERNEL. This will lead a WARN since cpumap try to call with GFP_ATOMIC. Fortunately, entry allocation of cpumap can only be done through syscall path which means GFP_ATOMIC is not necessary, so fixing this by replacing GFP_ATOMIC with GFP_KERNEL. Reported-by: syzbot+1a240cdb1f4cc88819df@syzkaller.appspotmail.com Fixes: 0bf7800f1799 ("ptr_ring: try vmalloc() when kmalloc() fails") Cc: Michal Hocko Cc: Daniel Borkmann Cc: Matthew Wilcox Cc: Jesper Dangaard Brouer Cc: akpm@linux-foundation.org Cc: dhowells@redhat.com Cc: hannes@cmpxchg.org Signed-off-by: Jason Wang Acked-by: Jesper Dangaard Brouer Signed-off-by: Daniel Borkmann --- kernel/bpf/cpumap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index fbfdada6caee..a4bb0b34375a 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -334,7 +334,7 @@ static int cpu_map_kthread_run(void *data) static struct bpf_cpu_map_entry *__cpu_map_entry_alloc(u32 qsize, u32 cpu, int map_id) { - gfp_t gfp = GFP_ATOMIC|__GFP_NOWARN; + gfp_t gfp = GFP_KERNEL | __GFP_NOWARN; struct bpf_cpu_map_entry *rcpu; int numa, err; -- cgit v1.2.3 From 9c481b908b011398b1491752271cd1e2c9ad5758 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 14 Feb 2018 15:31:00 +0100 Subject: bpf: fix bpf_prog_array_copy_to_user warning from perf event prog query syzkaller tried to perform a prog query in perf_event_query_prog_array() where struct perf_event_query_bpf had an ids_len of 1,073,741,353 and thus causing a warning due to failed kcalloc() allocation out of the bpf_prog_array_copy_to_user() helper. Given we cannot attach more than 64 programs to a perf event, there's no point in allowing huge ids_len. Therefore, allow a buffer that would fix the maximum number of ids and also add a __GFP_NOWARN to the temporary ids buffer. Fixes: f371b304f12e ("bpf/tracing: allow user space to query prog array on the same tp") Fixes: 0911287ce32b ("bpf: fix bpf_prog_array_copy_to_user() issues") Reported-by: syzbot+cab5816b0edbabf598b3@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- kernel/bpf/core.c | 2 +- kernel/trace/bpf_trace.c | 2 ++ 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 29ca9208dcfa..d315b393abdd 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1590,7 +1590,7 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, * so always copy 'cnt' prog_ids to the user. * In a rare race the user will see zero prog_ids */ - ids = kcalloc(cnt, sizeof(u32), GFP_USER); + ids = kcalloc(cnt, sizeof(u32), GFP_USER | __GFP_NOWARN); if (!ids) return -ENOMEM; rcu_read_lock(); diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index fc2838ac8b78..c0a9e310d715 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -872,6 +872,8 @@ int perf_event_query_prog_array(struct perf_event *event, void __user *info) return -EINVAL; if (copy_from_user(&query, uquery, sizeof(query))) return -EFAULT; + if (query.ids_len > BPF_TRACE_MAX_PROGS) + return -E2BIG; mutex_lock(&bpf_event_mutex); ret = bpf_prog_array_copy_info(event->tp_event->prog_array, -- cgit v1.2.3 From 01ea306f2ac2baff98d472da719193e738759d93 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Thu, 8 Feb 2018 12:19:00 +0100 Subject: netfilter: drop outermost socket lock in getsockopt() The Syzbot reported a possible deadlock in the netfilter area caused by rtnl lock, xt lock and socket lock being acquired with a different order on different code paths, leading to the following backtrace: Reviewed-by: Xin Long ====================================================== WARNING: possible circular locking dependency detected 4.15.0+ #301 Not tainted ------------------------------------------------------ syzkaller233489/4179 is trying to acquire lock: (rtnl_mutex){+.+.}, at: [<0000000048e996fd>] rtnl_lock+0x17/0x20 net/core/rtnetlink.c:74 but task is already holding lock: (&xt[i].mutex){+.+.}, at: [<00000000328553a2>] xt_find_table_lock+0x3e/0x3e0 net/netfilter/x_tables.c:1041 which lock already depends on the new lock. === Since commit 3f34cfae1230 ("netfilter: on sockopt() acquire sock lock only in the required scope"), we already acquire the socket lock in the innermost scope, where needed. In such commit I forgot to remove the outer-most socket lock from the getsockopt() path, this commit addresses the issues dropping it now. v1 -> v2: fix bad subj, added relavant 'fixes' tag Fixes: 22265a5c3c10 ("netfilter: xt_TEE: resolve oif using netdevice notifiers") Fixes: 202f59afd441 ("netfilter: ipt_CLUSTERIP: do not hold dev") Fixes: 3f34cfae1230 ("netfilter: on sockopt() acquire sock lock only in the required scope") Reported-by: syzbot+ddde1c7b7ff7442d7f2d@syzkaller.appspotmail.com Suggested-by: Florian Westphal Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/ipv4/ip_sockglue.c | 7 +------ net/ipv6/ipv6_sockglue.c | 10 ++-------- 2 files changed, 3 insertions(+), 14 deletions(-) diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c index 008be04ac1cc..9c41a0cef1a5 100644 --- a/net/ipv4/ip_sockglue.c +++ b/net/ipv4/ip_sockglue.c @@ -1567,10 +1567,7 @@ int ip_getsockopt(struct sock *sk, int level, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); - err = nf_getsockopt(sk, PF_INET, optname, optval, - &len); - release_sock(sk); + err = nf_getsockopt(sk, PF_INET, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); return err; @@ -1602,9 +1599,7 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); err = compat_nf_getsockopt(sk, PF_INET, optname, optval, &len); - release_sock(sk); if (err >= 0) err = put_user(len, optlen); return err; diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index d78d41fc4b1a..24535169663d 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -1367,10 +1367,7 @@ int ipv6_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); - err = nf_getsockopt(sk, PF_INET6, optname, optval, - &len); - release_sock(sk); + err = nf_getsockopt(sk, PF_INET6, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); } @@ -1409,10 +1406,7 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, if (get_user(len, optlen)) return -EFAULT; - lock_sock(sk); - err = compat_nf_getsockopt(sk, PF_INET6, - optname, optval, &len); - release_sock(sk); + err = compat_nf_getsockopt(sk, PF_INET6, optname, optval, &len); if (err >= 0) err = put_user(len, optlen); } -- cgit v1.2.3 From 57ebd808a97d7c5b1e1afb937c2db22beba3c1f8 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 7 Feb 2018 13:46:25 +0100 Subject: netfilter: add back stackpointer size checks The rationale for removing the check is only correct for rulesets generated by ip(6)tables. In iptables, a jump can only occur to a user-defined chain, i.e. because we size the stack based on number of user-defined chains we cannot exceed stack size. However, the underlying binary format has no such restriction, and the validation step only ensures that the jump target is a valid rule start point. IOW, its possible to build a rule blob that has no user-defined chains but does contain a jump. If this happens, no jump stack gets allocated and crash occurs because no jumpstack was allocated. Fixes: 7814b6ec6d0d6 ("netfilter: xtables: don't save/restore jumpstack offset") Reported-by: syzbot+e783f671527912cd9403@syzkaller.appspotmail.com Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/arp_tables.c | 4 ++++ net/ipv4/netfilter/ip_tables.c | 7 ++++++- net/ipv6/netfilter/ip6_tables.c | 4 ++++ 3 files changed, 14 insertions(+), 1 deletion(-) diff --git a/net/ipv4/netfilter/arp_tables.c b/net/ipv4/netfilter/arp_tables.c index 4ffe302f9b82..e3e420f3ba7b 100644 --- a/net/ipv4/netfilter/arp_tables.c +++ b/net/ipv4/netfilter/arp_tables.c @@ -252,6 +252,10 @@ unsigned int arpt_do_table(struct sk_buff *skb, } if (table_base + v != arpt_next_entry(e)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } jumpstack[stackidx++] = e; } diff --git a/net/ipv4/netfilter/ip_tables.c b/net/ipv4/netfilter/ip_tables.c index 9a71f3149507..e38395a8dcf2 100644 --- a/net/ipv4/netfilter/ip_tables.c +++ b/net/ipv4/netfilter/ip_tables.c @@ -330,8 +330,13 @@ ipt_do_table(struct sk_buff *skb, continue; } if (table_base + v != ipt_next_entry(e) && - !(e->ip.flags & IPT_F_GOTO)) + !(e->ip.flags & IPT_F_GOTO)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } jumpstack[stackidx++] = e; + } e = get_entry(table_base, v); continue; diff --git a/net/ipv6/netfilter/ip6_tables.c b/net/ipv6/netfilter/ip6_tables.c index af4c917e0836..62358b93bbac 100644 --- a/net/ipv6/netfilter/ip6_tables.c +++ b/net/ipv6/netfilter/ip6_tables.c @@ -352,6 +352,10 @@ ip6t_do_table(struct sk_buff *skb, } if (table_base + v != ip6t_next_entry(e) && !(e->ipv6.flags & IP6T_F_GOTO)) { + if (unlikely(stackidx >= private->stacksize)) { + verdict = NF_DROP; + break; + } jumpstack[stackidx++] = e; } -- cgit v1.2.3 From db93a3632b0f8773a3899e04a3a3e0aa7a26eb46 Mon Sep 17 00:00:00 2001 From: Cong Wang Date: Thu, 8 Feb 2018 13:53:52 -0800 Subject: netfilter: ipt_CLUSTERIP: fix a refcount bug in clusterip_config_find_get() In clusterip_config_find_get() we hold RCU read lock so it could run concurrently with clusterip_config_entry_put(), as a result, the refcnt could go back to 1 from 0, which leads to a double list_del()... Just replace refcount_inc() with refcount_inc_not_zero(), as for c->refcount. Fixes: d73f33b16883 ("netfilter: CLUSTERIP: RCU conversion") Cc: Eric Dumazet Cc: Pablo Neira Ayuso Cc: Florian Westphal Signed-off-by: Cong Wang Reviewed-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_CLUSTERIP.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/net/ipv4/netfilter/ipt_CLUSTERIP.c b/net/ipv4/netfilter/ipt_CLUSTERIP.c index 1ff72b87a066..4b02ab39ebc5 100644 --- a/net/ipv4/netfilter/ipt_CLUSTERIP.c +++ b/net/ipv4/netfilter/ipt_CLUSTERIP.c @@ -154,8 +154,12 @@ clusterip_config_find_get(struct net *net, __be32 clusterip, int entry) #endif if (unlikely(!refcount_inc_not_zero(&c->refcount))) c = NULL; - else if (entry) - refcount_inc(&c->entries); + else if (entry) { + if (unlikely(!refcount_inc_not_zero(&c->entries))) { + clusterip_config_put(c); + c = NULL; + } + } } rcu_read_unlock_bh(); -- cgit v1.2.3 From 0cc9501f94592125b2012452c57054b8215bcf33 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:51:59 +0100 Subject: netfilter: x_tables: remove pr_info where possible remove several pr_info messages that cannot be triggered with iptables, the check is only to ensure input is sane. iptables(8) already prints error messages in these cases. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_ECN.c | 10 ++++------ net/netfilter/xt_CHECKSUM.c | 5 ++--- net/netfilter/xt_DSCP.c | 4 +--- net/netfilter/xt_HL.c | 13 +++---------- net/netfilter/xt_HMARK.c | 10 ++++------ net/netfilter/xt_LED.c | 4 +--- net/netfilter/xt_dscp.c | 4 +--- 7 files changed, 16 insertions(+), 34 deletions(-) diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 270765236f5e..39ff167e6d86 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -98,14 +98,12 @@ static int ecn_tg_check(const struct xt_tgchk_param *par) const struct ipt_ECN_info *einfo = par->targinfo; const struct ipt_entry *e = par->entryinfo; - if (einfo->operation & IPT_ECN_OP_MASK) { - pr_info("unsupported ECN operation %x\n", einfo->operation); + if (einfo->operation & IPT_ECN_OP_MASK) return -EINVAL; - } - if (einfo->ip_ect & ~IPT_ECN_IP_MASK) { - pr_info("new ECT codepoint %x out of mask\n", einfo->ip_ect); + + if (einfo->ip_ect & ~IPT_ECN_IP_MASK) return -EINVAL; - } + if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { pr_info("cannot use TCP operations on a non-tcp rule\n"); diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c index 0f642ef8cd26..ea3c5701fb0f 100644 --- a/net/netfilter/xt_CHECKSUM.c +++ b/net/netfilter/xt_CHECKSUM.c @@ -39,10 +39,9 @@ static int checksum_tg_check(const struct xt_tgchk_param *par) pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); return -EINVAL; } - if (!einfo->operation) { - pr_info("no CHECKSUM operation enabled\n"); + if (!einfo->operation) return -EINVAL; - } + return 0; } diff --git a/net/netfilter/xt_DSCP.c b/net/netfilter/xt_DSCP.c index 3f83d38c4e5b..098ed851b7a7 100644 --- a/net/netfilter/xt_DSCP.c +++ b/net/netfilter/xt_DSCP.c @@ -66,10 +66,8 @@ static int dscp_tg_check(const struct xt_tgchk_param *par) { const struct xt_DSCP_info *info = par->targinfo; - if (info->dscp > XT_DSCP_MAX) { - pr_info("dscp %x out of range\n", info->dscp); + if (info->dscp > XT_DSCP_MAX) return -EDOM; - } return 0; } diff --git a/net/netfilter/xt_HL.c b/net/netfilter/xt_HL.c index 1535e87ed9bd..4653b071bed4 100644 --- a/net/netfilter/xt_HL.c +++ b/net/netfilter/xt_HL.c @@ -105,10 +105,8 @@ static int ttl_tg_check(const struct xt_tgchk_param *par) { const struct ipt_TTL_info *info = par->targinfo; - if (info->mode > IPT_TTL_MAXMODE) { - pr_info("TTL: invalid or unknown mode %u\n", info->mode); + if (info->mode > IPT_TTL_MAXMODE) return -EINVAL; - } if (info->mode != IPT_TTL_SET && info->ttl == 0) return -EINVAL; return 0; @@ -118,15 +116,10 @@ static int hl_tg6_check(const struct xt_tgchk_param *par) { const struct ip6t_HL_info *info = par->targinfo; - if (info->mode > IP6T_HL_MAXMODE) { - pr_info("invalid or unknown mode %u\n", info->mode); + if (info->mode > IP6T_HL_MAXMODE) return -EINVAL; - } - if (info->mode != IP6T_HL_SET && info->hop_limit == 0) { - pr_info("increment/decrement does not " - "make sense with value 0\n"); + if (info->mode != IP6T_HL_SET && info->hop_limit == 0) return -EINVAL; - } return 0; } diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index 60e6dbe12460..dd08cc1f86c7 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -313,10 +313,9 @@ static int hmark_tg_check(const struct xt_tgchk_param *par) { const struct xt_hmark_info *info = par->targinfo; - if (!info->hmodulus) { - pr_info("xt_HMARK: hash modulus can't be zero\n"); + if (!info->hmodulus) return -EINVAL; - } + if (info->proto_mask && (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) { pr_info("xt_HMARK: proto mask must be zero with L3 mode\n"); @@ -324,10 +323,9 @@ static int hmark_tg_check(const struct xt_tgchk_param *par) } if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | - XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) { - pr_info("xt_HMARK: spi-mask and port-mask can't be combined\n"); + XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) return -EINVAL; - } + if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | XT_HMARK_FLAG(XT_HMARK_DPORT)))) { diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 1dcad893df78..ece311c11fdc 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -111,10 +111,8 @@ static int led_tg_check(const struct xt_tgchk_param *par) struct xt_led_info_internal *ledinternal; int err; - if (ledinfo->id[0] == '\0') { - pr_info("No 'id' parameter given.\n"); + if (ledinfo->id[0] == '\0') return -EINVAL; - } mutex_lock(&xt_led_mutex); diff --git a/net/netfilter/xt_dscp.c b/net/netfilter/xt_dscp.c index 236ac8008909..a4c2b862f820 100644 --- a/net/netfilter/xt_dscp.c +++ b/net/netfilter/xt_dscp.c @@ -46,10 +46,8 @@ static int dscp_mt_check(const struct xt_mtchk_param *par) { const struct xt_dscp_info *info = par->matchinfo; - if (info->dscp > XT_DSCP_MAX) { - pr_info("dscp %x out of range\n", info->dscp); + if (info->dscp > XT_DSCP_MAX) return -EDOM; - } return 0; } -- cgit v1.2.3 From 1b6cd67191e16a66f69c9881d878204c3143f03f Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:00 +0100 Subject: netfilter: x_tables: use pr ratelimiting in xt core most messages are converted to info, since they occur in response to wrong usage. Size mismatch however is a real error (xtables ABI bug) that should not occur. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/x_tables.c | 70 +++++++++++++++++++++++------------------------- 1 file changed, 34 insertions(+), 36 deletions(-) diff --git a/net/netfilter/x_tables.c b/net/netfilter/x_tables.c index 97e06a04c0d4..fa1655aff8d3 100644 --- a/net/netfilter/x_tables.c +++ b/net/netfilter/x_tables.c @@ -434,36 +434,35 @@ int xt_check_match(struct xt_mtchk_param *par, * ebt_among is exempt from centralized matchsize checking * because it uses a dynamic-size data set. */ - pr_err("%s_tables: %s.%u match: invalid size " - "%u (kernel) != (user) %u\n", - xt_prefix[par->family], par->match->name, - par->match->revision, - XT_ALIGN(par->match->matchsize), size); + pr_err_ratelimited("%s_tables: %s.%u match: invalid size %u (kernel) != (user) %u\n", + xt_prefix[par->family], par->match->name, + par->match->revision, + XT_ALIGN(par->match->matchsize), size); return -EINVAL; } if (par->match->table != NULL && strcmp(par->match->table, par->table) != 0) { - pr_err("%s_tables: %s match: only valid in %s table, not %s\n", - xt_prefix[par->family], par->match->name, - par->match->table, par->table); + pr_info_ratelimited("%s_tables: %s match: only valid in %s table, not %s\n", + xt_prefix[par->family], par->match->name, + par->match->table, par->table); return -EINVAL; } if (par->match->hooks && (par->hook_mask & ~par->match->hooks) != 0) { char used[64], allow[64]; - pr_err("%s_tables: %s match: used from hooks %s, but only " - "valid from %s\n", - xt_prefix[par->family], par->match->name, - textify_hooks(used, sizeof(used), par->hook_mask, - par->family), - textify_hooks(allow, sizeof(allow), par->match->hooks, - par->family)); + pr_info_ratelimited("%s_tables: %s match: used from hooks %s, but only valid from %s\n", + xt_prefix[par->family], par->match->name, + textify_hooks(used, sizeof(used), + par->hook_mask, par->family), + textify_hooks(allow, sizeof(allow), + par->match->hooks, + par->family)); return -EINVAL; } if (par->match->proto && (par->match->proto != proto || inv_proto)) { - pr_err("%s_tables: %s match: only valid for protocol %u\n", - xt_prefix[par->family], par->match->name, - par->match->proto); + pr_info_ratelimited("%s_tables: %s match: only valid for protocol %u\n", + xt_prefix[par->family], par->match->name, + par->match->proto); return -EINVAL; } if (par->match->checkentry != NULL) { @@ -814,36 +813,35 @@ int xt_check_target(struct xt_tgchk_param *par, int ret; if (XT_ALIGN(par->target->targetsize) != size) { - pr_err("%s_tables: %s.%u target: invalid size " - "%u (kernel) != (user) %u\n", - xt_prefix[par->family], par->target->name, - par->target->revision, - XT_ALIGN(par->target->targetsize), size); + pr_err_ratelimited("%s_tables: %s.%u target: invalid size %u (kernel) != (user) %u\n", + xt_prefix[par->family], par->target->name, + par->target->revision, + XT_ALIGN(par->target->targetsize), size); return -EINVAL; } if (par->target->table != NULL && strcmp(par->target->table, par->table) != 0) { - pr_err("%s_tables: %s target: only valid in %s table, not %s\n", - xt_prefix[par->family], par->target->name, - par->target->table, par->table); + pr_info_ratelimited("%s_tables: %s target: only valid in %s table, not %s\n", + xt_prefix[par->family], par->target->name, + par->target->table, par->table); return -EINVAL; } if (par->target->hooks && (par->hook_mask & ~par->target->hooks) != 0) { char used[64], allow[64]; - pr_err("%s_tables: %s target: used from hooks %s, but only " - "usable from %s\n", - xt_prefix[par->family], par->target->name, - textify_hooks(used, sizeof(used), par->hook_mask, - par->family), - textify_hooks(allow, sizeof(allow), par->target->hooks, - par->family)); + pr_info_ratelimited("%s_tables: %s target: used from hooks %s, but only usable from %s\n", + xt_prefix[par->family], par->target->name, + textify_hooks(used, sizeof(used), + par->hook_mask, par->family), + textify_hooks(allow, sizeof(allow), + par->target->hooks, + par->family)); return -EINVAL; } if (par->target->proto && (par->target->proto != proto || inv_proto)) { - pr_err("%s_tables: %s target: only valid for protocol %u\n", - xt_prefix[par->family], par->target->name, - par->target->proto); + pr_info_ratelimited("%s_tables: %s target: only valid for protocol %u\n", + xt_prefix[par->family], par->target->name, + par->target->proto); return -EINVAL; } if (par->target->checkentry != NULL) { -- cgit v1.2.3 From 11f7aee2326f37f9d3abba27bb61d92ec09fbfde Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:01 +0100 Subject: netfilter: xt_CT: use pr ratelimiting checkpatch complains about line > 80 but this would require splitting "literal" over two lines which is worse. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_CT.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) diff --git a/net/netfilter/xt_CT.c b/net/netfilter/xt_CT.c index 5a152e2acfd5..8790190c6feb 100644 --- a/net/netfilter/xt_CT.c +++ b/net/netfilter/xt_CT.c @@ -82,15 +82,14 @@ xt_ct_set_helper(struct nf_conn *ct, const char *helper_name, proto = xt_ct_find_proto(par); if (!proto) { - pr_info("You must specify a L4 protocol, and not use " - "inversions on it.\n"); + pr_info_ratelimited("You must specify a L4 protocol and not use inversions on it\n"); return -ENOENT; } helper = nf_conntrack_helper_try_module_get(helper_name, par->family, proto); if (helper == NULL) { - pr_info("No such helper \"%s\"\n", helper_name); + pr_info_ratelimited("No such helper \"%s\"\n", helper_name); return -ENOENT; } @@ -124,6 +123,7 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, const struct nf_conntrack_l4proto *l4proto; struct ctnl_timeout *timeout; struct nf_conn_timeout *timeout_ext; + const char *errmsg = NULL; int ret = 0; u8 proto; @@ -131,29 +131,29 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, timeout_find_get = rcu_dereference(nf_ct_timeout_find_get_hook); if (timeout_find_get == NULL) { ret = -ENOENT; - pr_info("Timeout policy base is empty\n"); + errmsg = "Timeout policy base is empty"; goto out; } proto = xt_ct_find_proto(par); if (!proto) { ret = -EINVAL; - pr_info("You must specify a L4 protocol, and not use " - "inversions on it.\n"); + errmsg = "You must specify a L4 protocol and not use inversions on it"; goto out; } timeout = timeout_find_get(par->net, timeout_name); if (timeout == NULL) { ret = -ENOENT; - pr_info("No such timeout policy \"%s\"\n", timeout_name); + pr_info_ratelimited("No such timeout policy \"%s\"\n", + timeout_name); goto out; } if (timeout->l3num != par->family) { ret = -EINVAL; - pr_info("Timeout policy `%s' can only be used by L3 protocol " - "number %d\n", timeout_name, timeout->l3num); + pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n", + timeout_name, 3, timeout->l3num); goto err_put_timeout; } /* Make sure the timeout policy matches any existing protocol tracker, @@ -162,9 +162,8 @@ xt_ct_set_timeout(struct nf_conn *ct, const struct xt_tgchk_param *par, l4proto = __nf_ct_l4proto_find(par->family, proto); if (timeout->l4proto->l4proto != l4proto->l4proto) { ret = -EINVAL; - pr_info("Timeout policy `%s' can only be used by L4 protocol " - "number %d\n", - timeout_name, timeout->l4proto->l4proto); + pr_info_ratelimited("Timeout policy `%s' can only be used by L%d protocol number %d\n", + timeout_name, 4, timeout->l4proto->l4proto); goto err_put_timeout; } timeout_ext = nf_ct_timeout_ext_add(ct, timeout, GFP_ATOMIC); @@ -180,6 +179,8 @@ err_put_timeout: __xt_ct_tg_timeout_put(timeout); out: rcu_read_unlock(); + if (errmsg) + pr_info_ratelimited("%s\n", errmsg); return ret; #else return -EOPNOTSUPP; -- cgit v1.2.3 From e016c5e43db51875c2b541b59bd217494d213174 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:02 +0100 Subject: netfilter: xt_NFQUEUE: use pr ratelimiting switch this to info, since these aren't really errors. We only use printk because we cannot report meaningful errors in the xtables framework. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_NFQUEUE.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_NFQUEUE.c b/net/netfilter/xt_NFQUEUE.c index a360b99a958a..a9aca80a32ae 100644 --- a/net/netfilter/xt_NFQUEUE.c +++ b/net/netfilter/xt_NFQUEUE.c @@ -8,6 +8,8 @@ * */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -67,13 +69,13 @@ static int nfqueue_tg_check(const struct xt_tgchk_param *par) init_hashrandom(&jhash_initval); if (info->queues_total == 0) { - pr_err("NFQUEUE: number of total queues is 0\n"); + pr_info_ratelimited("number of total queues is 0\n"); return -EINVAL; } maxid = info->queues_total - 1 + info->queuenum; if (maxid > 0xffff) { - pr_err("NFQUEUE: number of queues (%u) out of range (got %u)\n", - info->queues_total, maxid); + pr_info_ratelimited("number of queues (%u) out of range (got %u)\n", + info->queues_total, maxid); return -ERANGE; } if (par->target->revision == 2 && info->flags > 1) -- cgit v1.2.3 From c82b31c5f5608f7f069c584ac169f5691a92d3f5 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:03 +0100 Subject: netfilter: xt_set: use pr ratelimiting also convert this to info for consistency. These errors are informational message to user, given iptables doesn't have netlink extack equivalent. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_set.c | 50 +++++++++++++++++++++++++------------------------- 1 file changed, 25 insertions(+), 25 deletions(-) diff --git a/net/netfilter/xt_set.c b/net/netfilter/xt_set.c index 16b6b11ee83f..6f4c5217d835 100644 --- a/net/netfilter/xt_set.c +++ b/net/netfilter/xt_set.c @@ -92,12 +92,12 @@ set_match_v0_checkentry(const struct xt_mtchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find set identified by id %u to match\n", - info->match_set.index); + pr_info_ratelimited("Cannot find set identified by id %u to match\n", + info->match_set.index); return -ENOENT; } if (info->match_set.u.flags[IPSET_DIM_MAX - 1] != 0) { - pr_warn("Protocol error: set match dimension is over the limit!\n"); + pr_info_ratelimited("set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -143,12 +143,12 @@ set_match_v1_checkentry(const struct xt_mtchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->match_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find set identified by id %u to match\n", - info->match_set.index); + pr_info_ratelimited("Cannot find set identified by id %u to match\n", + info->match_set.index); return -ENOENT; } if (info->match_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: set match dimension is over the limit!\n"); + pr_info_ratelimited("set match dimension is over the limit!\n"); ip_set_nfnl_put(par->net, info->match_set.index); return -ERANGE; } @@ -241,8 +241,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) if (info->add_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_info_ratelimited("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -250,8 +250,8 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) if (info->del_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_info_ratelimited("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; @@ -259,7 +259,7 @@ set_target_v0_checkentry(const struct xt_tgchk_param *par) } if (info->add_set.u.flags[IPSET_DIM_MAX - 1] != 0 || info->del_set.u.flags[IPSET_DIM_MAX - 1] != 0) { - pr_warn("Protocol error: SET target dimension is over the limit!\n"); + pr_info_ratelimited("SET target dimension over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -316,8 +316,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) if (info->add_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_info_ratelimited("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -325,8 +325,8 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) if (info->del_set.index != IPSET_INVALID_ID) { index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_info_ratelimited("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); return -ENOENT; @@ -334,7 +334,7 @@ set_target_v1_checkentry(const struct xt_tgchk_param *par) } if (info->add_set.dim > IPSET_DIM_MAX || info->del_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: SET target dimension is over the limit!\n"); + pr_info_ratelimited("SET target dimension over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) @@ -444,8 +444,8 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->add_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find add_set index %u as target\n", - info->add_set.index); + pr_info_ratelimited("Cannot find add_set index %u as target\n", + info->add_set.index); return -ENOENT; } } @@ -454,8 +454,8 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) index = ip_set_nfnl_get_byindex(par->net, info->del_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find del_set index %u as target\n", - info->del_set.index); + pr_info_ratelimited("Cannot find del_set index %u as target\n", + info->del_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); @@ -465,7 +465,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) if (info->map_set.index != IPSET_INVALID_ID) { if (strncmp(par->table, "mangle", 7)) { - pr_warn("--map-set only usable from mangle table\n"); + pr_info_ratelimited("--map-set only usable from mangle table\n"); return -EINVAL; } if (((info->flags & IPSET_FLAG_MAP_SKBPRIO) | @@ -473,14 +473,14 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) !(par->hook_mask & (1 << NF_INET_FORWARD | 1 << NF_INET_LOCAL_OUT | 1 << NF_INET_POST_ROUTING))) { - pr_warn("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); + pr_info_ratelimited("mapping of prio or/and queue is allowed only from OUTPUT/FORWARD/POSTROUTING chains\n"); return -EINVAL; } index = ip_set_nfnl_get_byindex(par->net, info->map_set.index); if (index == IPSET_INVALID_ID) { - pr_warn("Cannot find map_set index %u as target\n", - info->map_set.index); + pr_info_ratelimited("Cannot find map_set index %u as target\n", + info->map_set.index); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); @@ -494,7 +494,7 @@ set_target_v3_checkentry(const struct xt_tgchk_param *par) if (info->add_set.dim > IPSET_DIM_MAX || info->del_set.dim > IPSET_DIM_MAX || info->map_set.dim > IPSET_DIM_MAX) { - pr_warn("Protocol error: SET target dimension is over the limit!\n"); + pr_info_ratelimited("SET target dimension over the limit!\n"); if (info->add_set.index != IPSET_INVALID_ID) ip_set_nfnl_put(par->net, info->add_set.index); if (info->del_set.index != IPSET_INVALID_ID) -- cgit v1.2.3 From 7ecbf1033521194e544477377ff7e837d25f1ef3 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:04 +0100 Subject: netfilter: bridge: use pr ratelimiting ebt_among still uses pr_err -- these errors indicate ebtables tool bug, not a usage error. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebt_among.c | 10 +++++----- net/bridge/netfilter/ebt_limit.c | 4 ++-- 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/net/bridge/netfilter/ebt_among.c b/net/bridge/netfilter/ebt_among.c index 279527f8b1fe..ce7152a12bd8 100644 --- a/net/bridge/netfilter/ebt_among.c +++ b/net/bridge/netfilter/ebt_among.c @@ -187,17 +187,17 @@ static int ebt_among_mt_check(const struct xt_mtchk_param *par) expected_length += ebt_mac_wormhash_size(wh_src); if (em->match_size != EBT_ALIGN(expected_length)) { - pr_info("wrong size: %d against expected %d, rounded to %zd\n", - em->match_size, expected_length, - EBT_ALIGN(expected_length)); + pr_err_ratelimited("wrong size: %d against expected %d, rounded to %zd\n", + em->match_size, expected_length, + EBT_ALIGN(expected_length)); return -EINVAL; } if (wh_dst && (err = ebt_mac_wormhash_check_integrity(wh_dst))) { - pr_info("dst integrity fail: %x\n", -err); + pr_err_ratelimited("dst integrity fail: %x\n", -err); return -EINVAL; } if (wh_src && (err = ebt_mac_wormhash_check_integrity(wh_src))) { - pr_info("src integrity fail: %x\n", -err); + pr_err_ratelimited("src integrity fail: %x\n", -err); return -EINVAL; } return 0; diff --git a/net/bridge/netfilter/ebt_limit.c b/net/bridge/netfilter/ebt_limit.c index 61a9f1be1263..165b9d678cf1 100644 --- a/net/bridge/netfilter/ebt_limit.c +++ b/net/bridge/netfilter/ebt_limit.c @@ -72,8 +72,8 @@ static int ebt_limit_mt_check(const struct xt_mtchk_param *par) /* Check for overflow. */ if (info->burst == 0 || user2credits(info->avg * info->burst) < user2credits(info->avg)) { - pr_info("overflow, try lower: %u/%u\n", - info->avg, info->burst); + pr_info_ratelimited("overflow, try lower: %u/%u\n", + info->avg, info->burst); return -EINVAL; } -- cgit v1.2.3 From cc48baefdfff83e3774811f69eb181b8850bd8af Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:05 +0100 Subject: netfilter: x_tables: rate-limit table mismatch warnings Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_rpfilter.c | 4 ++-- net/ipv6/netfilter/ip6t_rpfilter.c | 4 ++-- net/netfilter/xt_CONNSECMARK.c | 4 ++-- net/netfilter/xt_SECMARK.c | 4 ++-- 4 files changed, 8 insertions(+), 8 deletions(-) diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 37fb9552e858..5d107dd9098e 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -111,8 +111,8 @@ static int rpfilter_check(const struct xt_mtchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "raw") != 0) { - pr_info("match only valid in the \'raw\' " - "or \'mangle\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n", + par->table); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index b12e61b7b16c..ddf3111f9810 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -109,8 +109,8 @@ static int rpfilter_check(const struct xt_mtchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "raw") != 0) { - pr_info("match only valid in the \'raw\' " - "or \'mangle\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'raw\' or \'mangle\' table, not \'%s\'\n", + par->table); return -EINVAL; } diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index da56c06a443c..6f30cd399e42 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -91,8 +91,8 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "security") != 0) { - pr_info("target only valid in the \'mangle\' " - "or \'security\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n", + par->table); return -EINVAL; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 9faf5e050b79..5c5cd782fab5 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -86,8 +86,8 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) if (strcmp(par->table, "mangle") != 0 && strcmp(par->table, "security") != 0) { - pr_info("target only valid in the \'mangle\' " - "or \'security\' tables, not \'%s\'.\n", par->table); + pr_info_ratelimited("only valid in \'mangle\' or \'security\' table, not \'%s\'\n", + par->table); return -EINVAL; } -- cgit v1.2.3 From c08e5e1ee6d65917af2bb12c2c568d637a682c44 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:06 +0100 Subject: netfilter: x_tables: use pr ratelimiting in matches/targets all of these print simple error message - use single pr_ratelimit call. checkpatch complains about lines > 80 but this would require splitting several "literals" over multiple lines which is worse. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_HMARK.c | 17 +++++++++++------ net/netfilter/xt_addrtype.c | 33 ++++++++++++++++----------------- net/netfilter/xt_policy.c | 23 +++++++++++++---------- 3 files changed, 40 insertions(+), 33 deletions(-) diff --git a/net/netfilter/xt_HMARK.c b/net/netfilter/xt_HMARK.c index dd08cc1f86c7..9c75f419cd80 100644 --- a/net/netfilter/xt_HMARK.c +++ b/net/netfilter/xt_HMARK.c @@ -9,6 +9,8 @@ * the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -312,15 +314,15 @@ hmark_tg_v4(struct sk_buff *skb, const struct xt_action_param *par) static int hmark_tg_check(const struct xt_tgchk_param *par) { const struct xt_hmark_info *info = par->targinfo; + const char *errmsg = "proto mask must be zero with L3 mode"; if (!info->hmodulus) return -EINVAL; if (info->proto_mask && - (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) { - pr_info("xt_HMARK: proto mask must be zero with L3 mode\n"); - return -EINVAL; - } + (info->flags & XT_HMARK_FLAG(XT_HMARK_METHOD_L3))) + goto err; + if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI_MASK) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT_MASK) | XT_HMARK_FLAG(XT_HMARK_DPORT_MASK)))) @@ -329,10 +331,13 @@ static int hmark_tg_check(const struct xt_tgchk_param *par) if (info->flags & XT_HMARK_FLAG(XT_HMARK_SPI) && (info->flags & (XT_HMARK_FLAG(XT_HMARK_SPORT) | XT_HMARK_FLAG(XT_HMARK_DPORT)))) { - pr_info("xt_HMARK: spi-set and port-set can't be combined\n"); - return -EINVAL; + errmsg = "spi-set and port-set can't be combined"; + goto err; } return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; } static struct xt_target hmark_tg_reg[] __read_mostly = { diff --git a/net/netfilter/xt_addrtype.c b/net/netfilter/xt_addrtype.c index 911a7c0da504..89e281b3bfc2 100644 --- a/net/netfilter/xt_addrtype.c +++ b/net/netfilter/xt_addrtype.c @@ -164,48 +164,47 @@ addrtype_mt_v1(const struct sk_buff *skb, struct xt_action_param *par) static int addrtype_mt_checkentry_v1(const struct xt_mtchk_param *par) { + const char *errmsg = "both incoming and outgoing interface limitation cannot be selected"; struct xt_addrtype_info_v1 *info = par->matchinfo; if (info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN && - info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("both incoming and outgoing " - "interface limitation cannot be selected\n"); - return -EINVAL; - } + info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) + goto err; if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_ADDRTYPE_LIMIT_IFACE_OUT) { - pr_info("output interface limitation " - "not valid in PREROUTING and INPUT\n"); - return -EINVAL; + errmsg = "output interface limitation not valid in PREROUTING and INPUT"; + goto err; } if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_ADDRTYPE_LIMIT_IFACE_IN) { - pr_info("input interface limitation " - "not valid in POSTROUTING and OUTPUT\n"); - return -EINVAL; + errmsg = "input interface limitation not valid in POSTROUTING and OUTPUT"; + goto err; } #if IS_ENABLED(CONFIG_IP6_NF_IPTABLES) if (par->family == NFPROTO_IPV6) { if ((info->source | info->dest) & XT_ADDRTYPE_BLACKHOLE) { - pr_err("ipv6 BLACKHOLE matching not supported\n"); - return -EINVAL; + errmsg = "ipv6 BLACKHOLE matching not supported"; + goto err; } if ((info->source | info->dest) >= XT_ADDRTYPE_PROHIBIT) { - pr_err("ipv6 PROHIBIT (THROW, NAT ..) matching not supported\n"); - return -EINVAL; + errmsg = "ipv6 PROHIBIT (THROW, NAT ..) matching not supported"; + goto err; } if ((info->source | info->dest) & XT_ADDRTYPE_BROADCAST) { - pr_err("ipv6 does not support BROADCAST matching\n"); - return -EINVAL; + errmsg = "ipv6 does not support BROADCAST matching"; + goto err; } } #endif return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; } static struct xt_match addrtype_mt_reg[] __read_mostly = { diff --git a/net/netfilter/xt_policy.c b/net/netfilter/xt_policy.c index 5639fb03bdd9..13f8ccf946d6 100644 --- a/net/netfilter/xt_policy.c +++ b/net/netfilter/xt_policy.c @@ -132,26 +132,29 @@ policy_mt(const struct sk_buff *skb, struct xt_action_param *par) static int policy_mt_check(const struct xt_mtchk_param *par) { const struct xt_policy_info *info = par->matchinfo; + const char *errmsg = "neither incoming nor outgoing policy selected"; + + if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) + goto err; - if (!(info->flags & (XT_POLICY_MATCH_IN|XT_POLICY_MATCH_OUT))) { - pr_info("neither incoming nor outgoing policy selected\n"); - return -EINVAL; - } if (par->hook_mask & ((1 << NF_INET_PRE_ROUTING) | (1 << NF_INET_LOCAL_IN)) && info->flags & XT_POLICY_MATCH_OUT) { - pr_info("output policy not valid in PREROUTING and INPUT\n"); - return -EINVAL; + errmsg = "output policy not valid in PREROUTING and INPUT"; + goto err; } if (par->hook_mask & ((1 << NF_INET_POST_ROUTING) | (1 << NF_INET_LOCAL_OUT)) && info->flags & XT_POLICY_MATCH_IN) { - pr_info("input policy not valid in POSTROUTING and OUTPUT\n"); - return -EINVAL; + errmsg = "input policy not valid in POSTROUTING and OUTPUT"; + goto err; } if (info->len > XT_POLICY_MAX_ELEM) { - pr_info("too many policy elements\n"); - return -EINVAL; + errmsg = "too many policy elements"; + goto err; } return 0; +err: + pr_info_ratelimited("%s\n", errmsg); + return -EINVAL; } static struct xt_match policy_mt_reg[] __read_mostly = { -- cgit v1.2.3 From b26066447bb8599b393b2dd2bbeb68767e09ba07 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 9 Feb 2018 15:52:07 +0100 Subject: netfilter: x_tables: use pr ratelimiting in all remaining spots Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/ipv4/netfilter/ipt_ECN.c | 2 +- net/ipv4/netfilter/ipt_REJECT.c | 4 ++-- net/ipv4/netfilter/ipt_rpfilter.c | 2 +- net/ipv6/netfilter/ip6t_REJECT.c | 4 ++-- net/ipv6/netfilter/ip6t_rpfilter.c | 2 +- net/ipv6/netfilter/ip6t_srh.c | 6 ++++-- net/netfilter/xt_AUDIT.c | 4 ++-- net/netfilter/xt_CHECKSUM.c | 3 ++- net/netfilter/xt_CONNSECMARK.c | 6 +++--- net/netfilter/xt_LED.c | 2 +- net/netfilter/xt_SECMARK.c | 14 ++++++++------ net/netfilter/xt_TCPMSS.c | 10 ++++------ net/netfilter/xt_TPROXY.c | 6 ++---- net/netfilter/xt_bpf.c | 4 +++- net/netfilter/xt_cgroup.c | 8 +++++--- net/netfilter/xt_cluster.c | 8 +++----- net/netfilter/xt_connbytes.c | 4 ++-- net/netfilter/xt_connlabel.c | 7 ++++--- net/netfilter/xt_connmark.c | 8 ++++---- net/netfilter/xt_conntrack.c | 4 ++-- net/netfilter/xt_ecn.c | 4 ++-- net/netfilter/xt_hashlimit.c | 24 +++++++++++++----------- net/netfilter/xt_helper.c | 4 ++-- net/netfilter/xt_ipcomp.c | 2 +- net/netfilter/xt_ipvs.c | 3 ++- net/netfilter/xt_l2tp.c | 22 +++++++++++++--------- net/netfilter/xt_limit.c | 4 ++-- net/netfilter/xt_nat.c | 5 +++-- net/netfilter/xt_nfacct.c | 6 ++++-- net/netfilter/xt_physdev.c | 4 +--- net/netfilter/xt_recent.c | 14 ++++++-------- net/netfilter/xt_socket.c | 10 ++++++---- net/netfilter/xt_state.c | 4 ++-- net/netfilter/xt_time.c | 6 +++--- 34 files changed, 116 insertions(+), 104 deletions(-) diff --git a/net/ipv4/netfilter/ipt_ECN.c b/net/ipv4/netfilter/ipt_ECN.c index 39ff167e6d86..aaaf9a81fbc9 100644 --- a/net/ipv4/netfilter/ipt_ECN.c +++ b/net/ipv4/netfilter/ipt_ECN.c @@ -106,7 +106,7 @@ static int ecn_tg_check(const struct xt_tgchk_param *par) if ((einfo->operation & (IPT_ECN_OP_SET_ECE|IPT_ECN_OP_SET_CWR)) && (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO))) { - pr_info("cannot use TCP operations on a non-tcp rule\n"); + pr_info_ratelimited("cannot use operation on non-tcp rule\n"); return -EINVAL; } return 0; diff --git a/net/ipv4/netfilter/ipt_REJECT.c b/net/ipv4/netfilter/ipt_REJECT.c index 8bd0d7b26632..e8bed3390e58 100644 --- a/net/ipv4/netfilter/ipt_REJECT.c +++ b/net/ipv4/netfilter/ipt_REJECT.c @@ -74,13 +74,13 @@ static int reject_tg_check(const struct xt_tgchk_param *par) const struct ipt_entry *e = par->entryinfo; if (rejinfo->with == IPT_ICMP_ECHOREPLY) { - pr_info("ECHOREPLY no longer supported.\n"); + pr_info_ratelimited("ECHOREPLY no longer supported.\n"); return -EINVAL; } else if (rejinfo->with == IPT_TCP_RESET) { /* Must specify that it's a TCP packet */ if (e->ip.proto != IPPROTO_TCP || (e->ip.invflags & XT_INV_PROTO)) { - pr_info("TCP_RESET invalid for non-tcp\n"); + pr_info_ratelimited("TCP_RESET invalid for non-tcp\n"); return -EINVAL; } } diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 5d107dd9098e..fd01f13c896a 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -105,7 +105,7 @@ static int rpfilter_check(const struct xt_mtchk_param *par) const struct xt_rpfilter_info *info = par->matchinfo; unsigned int options = ~XT_RPFILTER_OPTION_MASK; if (info->flags & options) { - pr_info("unknown options encountered"); + pr_info_ratelimited("unknown options\n"); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6t_REJECT.c b/net/ipv6/netfilter/ip6t_REJECT.c index fa51a205918d..38dea8ff680f 100644 --- a/net/ipv6/netfilter/ip6t_REJECT.c +++ b/net/ipv6/netfilter/ip6t_REJECT.c @@ -85,14 +85,14 @@ static int reject_tg6_check(const struct xt_tgchk_param *par) const struct ip6t_entry *e = par->entryinfo; if (rejinfo->with == IP6T_ICMP6_ECHOREPLY) { - pr_info("ECHOREPLY is not supported.\n"); + pr_info_ratelimited("ECHOREPLY is not supported\n"); return -EINVAL; } else if (rejinfo->with == IP6T_TCP_RESET) { /* Must specify that it's a TCP packet */ if (!(e->ipv6.flags & IP6T_F_PROTO) || e->ipv6.proto != IPPROTO_TCP || (e->ipv6.invflags & XT_INV_PROTO)) { - pr_info("TCP_RESET illegal for non-tcp\n"); + pr_info_ratelimited("TCP_RESET illegal for non-tcp\n"); return -EINVAL; } } diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index ddf3111f9810..94deb69bbbda 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -103,7 +103,7 @@ static int rpfilter_check(const struct xt_mtchk_param *par) unsigned int options = ~XT_RPFILTER_OPTION_MASK; if (info->flags & options) { - pr_info("unknown options encountered"); + pr_info_ratelimited("unknown options\n"); return -EINVAL; } diff --git a/net/ipv6/netfilter/ip6t_srh.c b/net/ipv6/netfilter/ip6t_srh.c index 9642164107ce..33719d5560c8 100644 --- a/net/ipv6/netfilter/ip6t_srh.c +++ b/net/ipv6/netfilter/ip6t_srh.c @@ -122,12 +122,14 @@ static int srh_mt6_check(const struct xt_mtchk_param *par) const struct ip6t_srh *srhinfo = par->matchinfo; if (srhinfo->mt_flags & ~IP6T_SRH_MASK) { - pr_err("unknown srh match flags %X\n", srhinfo->mt_flags); + pr_info_ratelimited("unknown srh match flags %X\n", + srhinfo->mt_flags); return -EINVAL; } if (srhinfo->mt_invflags & ~IP6T_SRH_INV_MASK) { - pr_err("unknown srh invflags %X\n", srhinfo->mt_invflags); + pr_info_ratelimited("unknown srh invflags %X\n", + srhinfo->mt_invflags); return -EINVAL; } diff --git a/net/netfilter/xt_AUDIT.c b/net/netfilter/xt_AUDIT.c index c502419d6306..f368ee6741db 100644 --- a/net/netfilter/xt_AUDIT.c +++ b/net/netfilter/xt_AUDIT.c @@ -120,8 +120,8 @@ static int audit_tg_check(const struct xt_tgchk_param *par) const struct xt_audit_info *info = par->targinfo; if (info->type > XT_AUDIT_TYPE_MAX) { - pr_info("Audit type out of range (valid range: 0..%hhu)\n", - XT_AUDIT_TYPE_MAX); + pr_info_ratelimited("Audit type out of range (valid range: 0..%hhu)\n", + XT_AUDIT_TYPE_MAX); return -ERANGE; } diff --git a/net/netfilter/xt_CHECKSUM.c b/net/netfilter/xt_CHECKSUM.c index ea3c5701fb0f..9f4151ec3e06 100644 --- a/net/netfilter/xt_CHECKSUM.c +++ b/net/netfilter/xt_CHECKSUM.c @@ -36,7 +36,8 @@ static int checksum_tg_check(const struct xt_tgchk_param *par) const struct xt_CHECKSUM_info *einfo = par->targinfo; if (einfo->operation & ~XT_CHECKSUM_OP_FILL) { - pr_info("unsupported CHECKSUM operation %x\n", einfo->operation); + pr_info_ratelimited("unsupported CHECKSUM operation %x\n", + einfo->operation); return -EINVAL; } if (!einfo->operation) diff --git a/net/netfilter/xt_CONNSECMARK.c b/net/netfilter/xt_CONNSECMARK.c index 6f30cd399e42..f3f1caac949b 100644 --- a/net/netfilter/xt_CONNSECMARK.c +++ b/net/netfilter/xt_CONNSECMARK.c @@ -102,14 +102,14 @@ static int connsecmark_tg_check(const struct xt_tgchk_param *par) break; default: - pr_info("invalid mode: %hu\n", info->mode); + pr_info_ratelimited("invalid mode: %hu\n", info->mode); return -EINVAL; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index ece311c11fdc..4472424e7ead 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -136,7 +136,7 @@ static int led_tg_check(const struct xt_tgchk_param *par) err = led_trigger_register(&ledinternal->netfilter_led_trigger); if (err) { - pr_err("Trigger name is already in use.\n"); + pr_info_ratelimited("Trigger name is already in use.\n"); goto exit_alloc; } diff --git a/net/netfilter/xt_SECMARK.c b/net/netfilter/xt_SECMARK.c index 5c5cd782fab5..4ad5fe27e08b 100644 --- a/net/netfilter/xt_SECMARK.c +++ b/net/netfilter/xt_SECMARK.c @@ -60,18 +60,20 @@ static int checkentry_lsm(struct xt_secmark_target_info *info) &info->secid); if (err) { if (err == -EINVAL) - pr_info("invalid security context \'%s\'\n", info->secctx); + pr_info_ratelimited("invalid security context \'%s\'\n", + info->secctx); return err; } if (!info->secid) { - pr_info("unable to map security context \'%s\'\n", info->secctx); + pr_info_ratelimited("unable to map security context \'%s\'\n", + info->secctx); return -ENOENT; } err = security_secmark_relabel_packet(info->secid); if (err) { - pr_info("unable to obtain relabeling permission\n"); + pr_info_ratelimited("unable to obtain relabeling permission\n"); return err; } @@ -92,8 +94,8 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) } if (mode && mode != info->mode) { - pr_info("mode already set to %hu cannot mix with " - "rules for mode %hu\n", mode, info->mode); + pr_info_ratelimited("mode already set to %hu cannot mix with rules for mode %hu\n", + mode, info->mode); return -EINVAL; } @@ -101,7 +103,7 @@ static int secmark_tg_check(const struct xt_tgchk_param *par) case SECMARK_MODE_SEL: break; default: - pr_info("invalid mode: %hu\n", info->mode); + pr_info_ratelimited("invalid mode: %hu\n", info->mode); return -EINVAL; } diff --git a/net/netfilter/xt_TCPMSS.c b/net/netfilter/xt_TCPMSS.c index 99bb8e410f22..98efb202f8b4 100644 --- a/net/netfilter/xt_TCPMSS.c +++ b/net/netfilter/xt_TCPMSS.c @@ -273,8 +273,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) (par->hook_mask & ~((1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info("path-MTU clamping only supported in " - "FORWARD, OUTPUT and POSTROUTING hooks\n"); + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } if (par->nft_compat) @@ -283,7 +282,7 @@ static int tcpmss_tg4_check(const struct xt_tgchk_param *par) xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; - pr_info("Only works on TCP SYN packets\n"); + pr_info_ratelimited("Only works on TCP SYN packets\n"); return -EINVAL; } @@ -298,8 +297,7 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par) (par->hook_mask & ~((1 << NF_INET_FORWARD) | (1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_POST_ROUTING))) != 0) { - pr_info("path-MTU clamping only supported in " - "FORWARD, OUTPUT and POSTROUTING hooks\n"); + pr_info_ratelimited("path-MTU clamping only supported in FORWARD, OUTPUT and POSTROUTING hooks\n"); return -EINVAL; } if (par->nft_compat) @@ -308,7 +306,7 @@ static int tcpmss_tg6_check(const struct xt_tgchk_param *par) xt_ematch_foreach(ematch, e) if (find_syn_match(ematch)) return 0; - pr_info("Only works on TCP SYN packets\n"); + pr_info_ratelimited("Only works on TCP SYN packets\n"); return -EINVAL; } #endif diff --git a/net/netfilter/xt_TPROXY.c b/net/netfilter/xt_TPROXY.c index 17d7705e3bd4..8c89323c06af 100644 --- a/net/netfilter/xt_TPROXY.c +++ b/net/netfilter/xt_TPROXY.c @@ -540,8 +540,7 @@ static int tproxy_tg6_check(const struct xt_tgchk_param *par) !(i->invflags & IP6T_INV_PROTO)) return 0; - pr_info("Can be used only in combination with " - "either -p tcp or -p udp\n"); + pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } #endif @@ -559,8 +558,7 @@ static int tproxy_tg4_check(const struct xt_tgchk_param *par) && !(i->invflags & IPT_INV_PROTO)) return 0; - pr_info("Can be used only in combination with " - "either -p tcp or -p udp\n"); + pr_info_ratelimited("Can be used only with -p tcp or -p udp\n"); return -EINVAL; } diff --git a/net/netfilter/xt_bpf.c b/net/netfilter/xt_bpf.c index 06b090d8e901..a2cf8a6236d6 100644 --- a/net/netfilter/xt_bpf.c +++ b/net/netfilter/xt_bpf.c @@ -7,6 +7,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -34,7 +36,7 @@ static int __bpf_mt_check_bytecode(struct sock_filter *insns, __u16 len, program.filter = insns; if (bpf_prog_create(ret, &program)) { - pr_info("bpf: check failed: parse error\n"); + pr_info_ratelimited("check failed: parse error\n"); return -EINVAL; } diff --git a/net/netfilter/xt_cgroup.c b/net/netfilter/xt_cgroup.c index 891f4e7e8ea7..7df2dece57d3 100644 --- a/net/netfilter/xt_cgroup.c +++ b/net/netfilter/xt_cgroup.c @@ -12,6 +12,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -48,7 +50,7 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) } if (info->has_path && info->has_classid) { - pr_info("xt_cgroup: both path and classid specified\n"); + pr_info_ratelimited("path and classid specified\n"); return -EINVAL; } @@ -56,8 +58,8 @@ static int cgroup_mt_check_v1(const struct xt_mtchk_param *par) if (info->has_path) { cgrp = cgroup_get_from_path(info->path); if (IS_ERR(cgrp)) { - pr_info("xt_cgroup: invalid path, errno=%ld\n", - PTR_ERR(cgrp)); + pr_info_ratelimited("invalid path, errno=%ld\n", + PTR_ERR(cgrp)); return -EINVAL; } info->priv = cgrp; diff --git a/net/netfilter/xt_cluster.c b/net/netfilter/xt_cluster.c index 57ef175dfbfa..0068688995c8 100644 --- a/net/netfilter/xt_cluster.c +++ b/net/netfilter/xt_cluster.c @@ -135,14 +135,12 @@ static int xt_cluster_mt_checkentry(const struct xt_mtchk_param *par) struct xt_cluster_match_info *info = par->matchinfo; if (info->total_nodes > XT_CLUSTER_NODES_MAX) { - pr_info("you have exceeded the maximum " - "number of cluster nodes (%u > %u)\n", - info->total_nodes, XT_CLUSTER_NODES_MAX); + pr_info_ratelimited("you have exceeded the maximum number of cluster nodes (%u > %u)\n", + info->total_nodes, XT_CLUSTER_NODES_MAX); return -EINVAL; } if (info->node_mask >= (1ULL << info->total_nodes)) { - pr_info("this node mask cannot be " - "higher than the total number of nodes\n"); + pr_info_ratelimited("node mask cannot exceed total number of nodes\n"); return -EDOM; } return 0; diff --git a/net/netfilter/xt_connbytes.c b/net/netfilter/xt_connbytes.c index cad0b7b5eb35..93cb018c3055 100644 --- a/net/netfilter/xt_connbytes.c +++ b/net/netfilter/xt_connbytes.c @@ -112,8 +112,8 @@ static int connbytes_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); /* * This filter cannot function correctly unless connection tracking diff --git a/net/netfilter/xt_connlabel.c b/net/netfilter/xt_connlabel.c index 23372879e6e3..4fa4efd24353 100644 --- a/net/netfilter/xt_connlabel.c +++ b/net/netfilter/xt_connlabel.c @@ -57,14 +57,15 @@ static int connlabel_mt_check(const struct xt_mtchk_param *par) int ret; if (info->options & ~options) { - pr_err("Unknown options in mask %x\n", info->options); + pr_info_ratelimited("Unknown options in mask %x\n", + info->options); return -EINVAL; } ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_connmark.c b/net/netfilter/xt_connmark.c index ec377cc6a369..809639ce6f5a 100644 --- a/net/netfilter/xt_connmark.c +++ b/net/netfilter/xt_connmark.c @@ -79,8 +79,8 @@ static int connmark_tg_check(const struct xt_tgchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } @@ -109,8 +109,8 @@ static int connmark_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_conntrack.c b/net/netfilter/xt_conntrack.c index 39cf1d019240..df80fe7d391c 100644 --- a/net/netfilter/xt_conntrack.c +++ b/net/netfilter/xt_conntrack.c @@ -272,8 +272,8 @@ static int conntrack_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_ecn.c b/net/netfilter/xt_ecn.c index 3c831a8efebc..c7ad4afa5fb8 100644 --- a/net/netfilter/xt_ecn.c +++ b/net/netfilter/xt_ecn.c @@ -97,7 +97,7 @@ static int ecn_mt_check4(const struct xt_mtchk_param *par) if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && (ip->proto != IPPROTO_TCP || ip->invflags & IPT_INV_PROTO)) { - pr_info("cannot match TCP bits in rule for non-tcp packets\n"); + pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n"); return -EINVAL; } @@ -139,7 +139,7 @@ static int ecn_mt_check6(const struct xt_mtchk_param *par) if (info->operation & (XT_ECN_OP_MATCH_ECE | XT_ECN_OP_MATCH_CWR) && (ip->proto != IPPROTO_TCP || ip->invflags & IP6T_INV_PROTO)) { - pr_info("cannot match TCP bits in rule for non-tcp packets\n"); + pr_info_ratelimited("cannot match TCP bits for non-tcp packets\n"); return -EINVAL; } diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index ca6847403ca2..aa96027f4418 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -523,7 +523,8 @@ static u64 user2rate(u64 user) if (user != 0) { return div64_u64(XT_HASHLIMIT_SCALE_v2, user); } else { - pr_warn("invalid rate from userspace: %llu\n", user); + pr_info_ratelimited("invalid rate from userspace: %llu\n", + user); return 0; } } @@ -865,33 +866,34 @@ static int hashlimit_mt_check_common(const struct xt_mtchk_param *par, } if (cfg->mode & ~XT_HASHLIMIT_ALL) { - pr_info("Unknown mode mask %X, kernel too old?\n", - cfg->mode); + pr_info_ratelimited("Unknown mode mask %X, kernel too old?\n", + cfg->mode); return -EINVAL; } /* Check for overflow. */ if (revision >= 3 && cfg->mode & XT_HASHLIMIT_RATE_MATCH) { if (cfg->avg == 0 || cfg->avg > U32_MAX) { - pr_info("hashlimit invalid rate\n"); + pr_info_ratelimited("invalid rate\n"); return -ERANGE; } if (cfg->interval == 0) { - pr_info("hashlimit invalid interval\n"); + pr_info_ratelimited("invalid interval\n"); return -EINVAL; } } else if (cfg->mode & XT_HASHLIMIT_BYTES) { if (user2credits_byte(cfg->avg) == 0) { - pr_info("overflow, rate too high: %llu\n", cfg->avg); + pr_info_ratelimited("overflow, rate too high: %llu\n", + cfg->avg); return -EINVAL; } } else if (cfg->burst == 0 || - user2credits(cfg->avg * cfg->burst, revision) < - user2credits(cfg->avg, revision)) { - pr_info("overflow, try lower: %llu/%llu\n", - cfg->avg, cfg->burst); - return -ERANGE; + user2credits(cfg->avg * cfg->burst, revision) < + user2credits(cfg->avg, revision)) { + pr_info_ratelimited("overflow, try lower: %llu/%llu\n", + cfg->avg, cfg->burst); + return -ERANGE; } mutex_lock(&hashlimit_mutex); diff --git a/net/netfilter/xt_helper.c b/net/netfilter/xt_helper.c index 38a78151c0e9..fd077aeaaed9 100644 --- a/net/netfilter/xt_helper.c +++ b/net/netfilter/xt_helper.c @@ -61,8 +61,8 @@ static int helper_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) { - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } info->name[sizeof(info->name) - 1] = '\0'; diff --git a/net/netfilter/xt_ipcomp.c b/net/netfilter/xt_ipcomp.c index 7ca64a50db04..57f1df575701 100644 --- a/net/netfilter/xt_ipcomp.c +++ b/net/netfilter/xt_ipcomp.c @@ -72,7 +72,7 @@ static int comp_mt_check(const struct xt_mtchk_param *par) /* Must specify no unknown invflags */ if (compinfo->invflags & ~XT_IPCOMP_INV_MASK) { - pr_err("unknown flags %X\n", compinfo->invflags); + pr_info_ratelimited("unknown flags %X\n", compinfo->invflags); return -EINVAL; } return 0; diff --git a/net/netfilter/xt_ipvs.c b/net/netfilter/xt_ipvs.c index 42540d26c2b8..1d950a6100af 100644 --- a/net/netfilter/xt_ipvs.c +++ b/net/netfilter/xt_ipvs.c @@ -158,7 +158,8 @@ static int ipvs_mt_check(const struct xt_mtchk_param *par) && par->family != NFPROTO_IPV6 #endif ) { - pr_info("protocol family %u not supported\n", par->family); + pr_info_ratelimited("protocol family %u not supported\n", + par->family); return -EINVAL; } diff --git a/net/netfilter/xt_l2tp.c b/net/netfilter/xt_l2tp.c index 8aee572771f2..c43482bf48e6 100644 --- a/net/netfilter/xt_l2tp.c +++ b/net/netfilter/xt_l2tp.c @@ -216,7 +216,7 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par) /* Check for invalid flags */ if (info->flags & ~(XT_L2TP_TID | XT_L2TP_SID | XT_L2TP_VERSION | XT_L2TP_TYPE)) { - pr_info("unknown flags: %x\n", info->flags); + pr_info_ratelimited("unknown flags: %x\n", info->flags); return -EINVAL; } @@ -225,7 +225,8 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par) (!(info->flags & XT_L2TP_SID)) && ((!(info->flags & XT_L2TP_TYPE)) || (info->type != XT_L2TP_TYPE_CONTROL))) { - pr_info("invalid flags combination: %x\n", info->flags); + pr_info_ratelimited("invalid flags combination: %x\n", + info->flags); return -EINVAL; } @@ -234,19 +235,22 @@ static int l2tp_mt_check(const struct xt_mtchk_param *par) */ if (info->flags & XT_L2TP_VERSION) { if ((info->version < 2) || (info->version > 3)) { - pr_info("wrong L2TP version: %u\n", info->version); + pr_info_ratelimited("wrong L2TP version: %u\n", + info->version); return -EINVAL; } if (info->version == 2) { if ((info->flags & XT_L2TP_TID) && (info->tid > 0xffff)) { - pr_info("v2 tid > 0xffff: %u\n", info->tid); + pr_info_ratelimited("v2 tid > 0xffff: %u\n", + info->tid); return -EINVAL; } if ((info->flags & XT_L2TP_SID) && (info->sid > 0xffff)) { - pr_info("v2 sid > 0xffff: %u\n", info->sid); + pr_info_ratelimited("v2 sid > 0xffff: %u\n", + info->sid); return -EINVAL; } } @@ -268,13 +272,13 @@ static int l2tp_mt_check4(const struct xt_mtchk_param *par) if ((ip->proto != IPPROTO_UDP) && (ip->proto != IPPROTO_L2TP)) { - pr_info("missing protocol rule (udp|l2tpip)\n"); + pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n"); return -EINVAL; } if ((ip->proto == IPPROTO_L2TP) && (info->version == 2)) { - pr_info("v2 doesn't support IP mode\n"); + pr_info_ratelimited("v2 doesn't support IP mode\n"); return -EINVAL; } @@ -295,13 +299,13 @@ static int l2tp_mt_check6(const struct xt_mtchk_param *par) if ((ip->proto != IPPROTO_UDP) && (ip->proto != IPPROTO_L2TP)) { - pr_info("missing protocol rule (udp|l2tpip)\n"); + pr_info_ratelimited("missing protocol rule (udp|l2tpip)\n"); return -EINVAL; } if ((ip->proto == IPPROTO_L2TP) && (info->version == 2)) { - pr_info("v2 doesn't support IP mode\n"); + pr_info_ratelimited("v2 doesn't support IP mode\n"); return -EINVAL; } diff --git a/net/netfilter/xt_limit.c b/net/netfilter/xt_limit.c index 61403b77361c..55d18cd67635 100644 --- a/net/netfilter/xt_limit.c +++ b/net/netfilter/xt_limit.c @@ -106,8 +106,8 @@ static int limit_mt_check(const struct xt_mtchk_param *par) /* Check for overflow. */ if (r->burst == 0 || user2credits(r->avg * r->burst) < user2credits(r->avg)) { - pr_info("Overflow, try lower: %u/%u\n", - r->avg, r->burst); + pr_info_ratelimited("Overflow, try lower: %u/%u\n", + r->avg, r->burst); return -ERANGE; } diff --git a/net/netfilter/xt_nat.c b/net/netfilter/xt_nat.c index 0fd14d1eb09d..bdb689cdc829 100644 --- a/net/netfilter/xt_nat.c +++ b/net/netfilter/xt_nat.c @@ -8,6 +8,8 @@ * published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include #include @@ -19,8 +21,7 @@ static int xt_nat_checkentry_v0(const struct xt_tgchk_param *par) const struct nf_nat_ipv4_multi_range_compat *mr = par->targinfo; if (mr->rangesize != 1) { - pr_info("%s: multiple ranges no longer supported\n", - par->target->name); + pr_info_ratelimited("multiple ranges no longer supported\n"); return -EINVAL; } return nf_ct_netns_get(par->net, par->family); diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index 6f92d25590a8..c8674deed4eb 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -6,6 +6,8 @@ * it under the terms of the GNU General Public License version 2 (or any * later at your option) as published by the Free Software Foundation. */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + #include #include @@ -39,8 +41,8 @@ nfacct_mt_checkentry(const struct xt_mtchk_param *par) nfacct = nfnl_acct_find_get(par->net, info->name); if (nfacct == NULL) { - pr_info("xt_nfacct: accounting object with name `%s' " - "does not exists\n", info->name); + pr_info_ratelimited("accounting object `%s' does not exists\n", + info->name); return -ENOENT; } info->nfacct = nfacct; diff --git a/net/netfilter/xt_physdev.c b/net/netfilter/xt_physdev.c index bb33598e4530..9d6d67b953ac 100644 --- a/net/netfilter/xt_physdev.c +++ b/net/netfilter/xt_physdev.c @@ -107,9 +107,7 @@ static int physdev_mt_check(const struct xt_mtchk_param *par) info->invert & XT_PHYSDEV_OP_BRIDGED) && par->hook_mask & ((1 << NF_INET_LOCAL_OUT) | (1 << NF_INET_FORWARD) | (1 << NF_INET_POST_ROUTING))) { - pr_info("using --physdev-out and --physdev-is-out are only " - "supported in the FORWARD and POSTROUTING chains with " - "bridged traffic.\n"); + pr_info_ratelimited("--physdev-out and --physdev-is-out only supported in the FORWARD and POSTROUTING chains with bridged traffic\n"); if (par->hook_mask & (1 << NF_INET_LOCAL_OUT)) return -EINVAL; } diff --git a/net/netfilter/xt_recent.c b/net/netfilter/xt_recent.c index 245fa350a7a8..6d232d18faff 100644 --- a/net/netfilter/xt_recent.c +++ b/net/netfilter/xt_recent.c @@ -342,8 +342,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, net_get_random_once(&hash_rnd, sizeof(hash_rnd)); if (info->check_set & ~XT_RECENT_VALID_FLAGS) { - pr_info("Unsupported user space flags (%08x)\n", - info->check_set); + pr_info_ratelimited("Unsupported userspace flags (%08x)\n", + info->check_set); return -EINVAL; } if (hweight8(info->check_set & @@ -357,8 +357,8 @@ static int recent_mt_check(const struct xt_mtchk_param *par, if ((info->check_set & XT_RECENT_REAP) && !info->seconds) return -EINVAL; if (info->hit_count >= XT_RECENT_MAX_NSTAMPS) { - pr_info("hitcount (%u) is larger than allowed maximum (%u)\n", - info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); + pr_info_ratelimited("hitcount (%u) is larger than allowed maximum (%u)\n", + info->hit_count, XT_RECENT_MAX_NSTAMPS - 1); return -EINVAL; } if (info->name[0] == '\0' || @@ -587,7 +587,7 @@ recent_mt_proc_write(struct file *file, const char __user *input, add = true; break; default: - pr_info("Need \"+ip\", \"-ip\" or \"/\"\n"); + pr_info_ratelimited("Need \"+ip\", \"-ip\" or \"/\"\n"); return -EINVAL; } @@ -601,10 +601,8 @@ recent_mt_proc_write(struct file *file, const char __user *input, succ = in4_pton(c, size, (void *)&addr, '\n', NULL); } - if (!succ) { - pr_info("illegal address written to procfs\n"); + if (!succ) return -EINVAL; - } spin_lock_bh(&recent_lock); e = recent_entry_lookup(t, &addr, family, 0); diff --git a/net/netfilter/xt_socket.c b/net/netfilter/xt_socket.c index 575d2153e3b8..2ac7f674d19b 100644 --- a/net/netfilter/xt_socket.c +++ b/net/netfilter/xt_socket.c @@ -171,7 +171,8 @@ static int socket_mt_v1_check(const struct xt_mtchk_param *par) return err; if (info->flags & ~XT_SOCKET_FLAGS_V1) { - pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V1); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V1); return -EINVAL; } return 0; @@ -187,7 +188,8 @@ static int socket_mt_v2_check(const struct xt_mtchk_param *par) return err; if (info->flags & ~XT_SOCKET_FLAGS_V2) { - pr_info("unknown flags 0x%x\n", info->flags & ~XT_SOCKET_FLAGS_V2); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V2); return -EINVAL; } return 0; @@ -203,8 +205,8 @@ static int socket_mt_v3_check(const struct xt_mtchk_param *par) if (err) return err; if (info->flags & ~XT_SOCKET_FLAGS_V3) { - pr_info("unknown flags 0x%x\n", - info->flags & ~XT_SOCKET_FLAGS_V3); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_SOCKET_FLAGS_V3); return -EINVAL; } return 0; diff --git a/net/netfilter/xt_state.c b/net/netfilter/xt_state.c index 5fbd79194d21..0b41c0befe3c 100644 --- a/net/netfilter/xt_state.c +++ b/net/netfilter/xt_state.c @@ -44,8 +44,8 @@ static int state_mt_check(const struct xt_mtchk_param *par) ret = nf_ct_netns_get(par->net, par->family); if (ret < 0) - pr_info("cannot load conntrack support for proto=%u\n", - par->family); + pr_info_ratelimited("cannot load conntrack support for proto=%u\n", + par->family); return ret; } diff --git a/net/netfilter/xt_time.c b/net/netfilter/xt_time.c index 1b01eec1fbda..0160f505e337 100644 --- a/net/netfilter/xt_time.c +++ b/net/netfilter/xt_time.c @@ -235,13 +235,13 @@ static int time_mt_check(const struct xt_mtchk_param *par) if (info->daytime_start > XT_TIME_MAX_DAYTIME || info->daytime_stop > XT_TIME_MAX_DAYTIME) { - pr_info("invalid argument - start or " - "stop time greater than 23:59:59\n"); + pr_info_ratelimited("invalid argument - start or stop time greater than 23:59:59\n"); return -EDOM; } if (info->flags & ~XT_TIME_ALL_FLAGS) { - pr_info("unknown flags 0x%x\n", info->flags & ~XT_TIME_ALL_FLAGS); + pr_info_ratelimited("unknown flags 0x%x\n", + info->flags & ~XT_TIME_ALL_FLAGS); return -EINVAL; } -- cgit v1.2.3 From d682026dd3c548a408415cd75882e5d081147f5b Mon Sep 17 00:00:00 2001 From: Zhu Lingshan Date: Mon, 12 Feb 2018 21:45:42 +0800 Subject: .gitignore: ignore ASN.1 auto generated files when build kernel with default configure, files: generatenet/ipv4/netfilter/nf_nat_snmp_basic-asn1.c net/ipv4/netfilter/nf_nat_snmp_basic-asn1.h will be automatically generated by ASN.1 compiler, so No need to track them in git, it's better to ignore them. Signed-off-by: Zhu Lingshan Signed-off-by: Pablo Neira Ayuso --- .gitignore | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.gitignore b/.gitignore index 705e09913dc2..1be78fd8163b 100644 --- a/.gitignore +++ b/.gitignore @@ -127,3 +127,7 @@ all.config # Kdevelop4 *.kdev4 + +#Automatically generated by ASN.1 compiler +net/ipv4/netfilter/nf_nat_snmp_basic-asn1.c +net/ipv4/netfilter/nf_nat_snmp_basic-asn1.h -- cgit v1.2.3 From 10414014bc085aac9f787a5890b33b5605fbcfc4 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Mon, 12 Feb 2018 18:49:39 +0100 Subject: netfilter: x_tables: fix missing timer initialization in xt_LED syzbot reported that xt_LED may try to use the ledinternal->timer without previously initializing it: ------------[ cut here ]------------ kernel BUG at kernel/time/timer.c:958! invalid opcode: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 1826 Comm: kworker/1:2 Not tainted 4.15.0+ #306 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Workqueue: ipv6_addrconf addrconf_dad_work RIP: 0010:__mod_timer kernel/time/timer.c:958 [inline] RIP: 0010:mod_timer+0x7d6/0x13c0 kernel/time/timer.c:1102 RSP: 0018:ffff8801d24fe9f8 EFLAGS: 00010293 RAX: ffff8801d25246c0 RBX: ffff8801aec6cb50 RCX: ffffffff816052c6 RDX: 0000000000000000 RSI: 00000000fffbd14b RDI: ffff8801aec6cb68 RBP: ffff8801d24fec98 R08: 0000000000000000 R09: 1ffff1003a49fd6c R10: ffff8801d24feb28 R11: 0000000000000005 R12: dffffc0000000000 R13: ffff8801d24fec70 R14: 00000000fffbd14b R15: ffff8801af608f90 FS: 0000000000000000(0000) GS:ffff8801db500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000206d6fd0 CR3: 0000000006a22001 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: led_tg+0x1db/0x2e0 net/netfilter/xt_LED.c:75 ip6t_do_table+0xc2a/0x1a30 net/ipv6/netfilter/ip6_tables.c:365 ip6table_raw_hook+0x65/0x80 net/ipv6/netfilter/ip6table_raw.c:42 nf_hook_entry_hookfn include/linux/netfilter.h:120 [inline] nf_hook_slow+0xba/0x1a0 net/netfilter/core.c:483 nf_hook.constprop.27+0x3f6/0x830 include/linux/netfilter.h:243 NF_HOOK include/linux/netfilter.h:286 [inline] ndisc_send_skb+0xa51/0x1370 net/ipv6/ndisc.c:491 ndisc_send_ns+0x38a/0x870 net/ipv6/ndisc.c:633 addrconf_dad_work+0xb9e/0x1320 net/ipv6/addrconf.c:4008 process_one_work+0xbbf/0x1af0 kernel/workqueue.c:2113 worker_thread+0x223/0x1990 kernel/workqueue.c:2247 kthread+0x33c/0x400 kernel/kthread.c:238 ret_from_fork+0x3a/0x50 arch/x86/entry/entry_64.S:429 Code: 85 2a 0b 00 00 4d 8b 3c 24 4d 85 ff 75 9f 4c 8b bd 60 fd ff ff e8 bb 57 10 00 65 ff 0d 94 9a a1 7e e9 d9 fc ff ff e8 aa 57 10 00 <0f> 0b e8 a3 57 10 00 e9 14 fb ff ff e8 99 57 10 00 4c 89 bd 70 RIP: __mod_timer kernel/time/timer.c:958 [inline] RSP: ffff8801d24fe9f8 RIP: mod_timer+0x7d6/0x13c0 kernel/time/timer.c:1102 RSP: ffff8801d24fe9f8 ---[ end trace f661ab06f5dd8b3d ]--- The ledinternal struct can be shared between several different xt_LED targets, but the related timer is currently initialized only if the first target requires it. Fix it by unconditionally initializing the timer struct. v1 -> v2: call del_timer_sync() unconditionally, too. Fixes: 268cb38e1802 ("netfilter: x_tables: add LED trigger target") Reported-by: syzbot+10c98dc5725c6c8fc7fb@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_LED.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c index 4472424e7ead..19846445504d 100644 --- a/net/netfilter/xt_LED.c +++ b/net/netfilter/xt_LED.c @@ -140,9 +140,10 @@ static int led_tg_check(const struct xt_tgchk_param *par) goto exit_alloc; } - /* See if we need to set up a timer */ - if (ledinfo->delay > 0) - timer_setup(&ledinternal->timer, led_timeout_callback, 0); + /* Since the letinternal timer can be shared between multiple targets, + * always set it up, even if the current target does not need it + */ + timer_setup(&ledinternal->timer, led_timeout_callback, 0); list_add_tail(&ledinternal->list, &xt_led_triggers); @@ -179,8 +180,7 @@ static void led_tg_destroy(const struct xt_tgdtor_param *par) list_del(&ledinternal->list); - if (ledinfo->delay > 0) - del_timer_sync(&ledinternal->timer); + del_timer_sync(&ledinternal->timer); led_trigger_unregister(&ledinternal->netfilter_led_trigger); -- cgit v1.2.3 From db57ccf0f2f4624b4c4758379f8165277504fbd7 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 14 Feb 2018 17:21:19 +0100 Subject: netfilter: nat: cope with negative port range syzbot reported a division by 0 bug in the netfilter nat code: divide error: 0000 [#1] SMP KASAN Dumping ftrace buffer: (ftrace buffer empty) Modules linked in: CPU: 1 PID: 4168 Comm: syzkaller034710 Not tainted 4.16.0-rc1+ #309 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:nf_nat_l4proto_unique_tuple+0x291/0x530 net/netfilter/nf_nat_proto_common.c:88 RSP: 0018:ffff8801b2466778 EFLAGS: 00010246 RAX: 000000000000f153 RBX: ffff8801b2466dd8 RCX: ffff8801b2466c7c RDX: 0000000000000000 RSI: ffff8801b2466c58 RDI: ffff8801db5293ac RBP: ffff8801b24667d8 R08: ffff8801b8ba6dc0 R09: ffffffff88af5900 R10: ffff8801b24666f0 R11: 0000000000000000 R12: 000000002990f153 R13: 0000000000000001 R14: 0000000000000000 R15: ffff8801b2466c7c FS: 00000000017e3880(0000) GS:ffff8801db500000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00000000208fdfe4 CR3: 00000001b5340002 CR4: 00000000001606e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: dccp_unique_tuple+0x40/0x50 net/netfilter/nf_nat_proto_dccp.c:30 get_unique_tuple+0xc28/0x1c10 net/netfilter/nf_nat_core.c:362 nf_nat_setup_info+0x1c2/0xe00 net/netfilter/nf_nat_core.c:406 nf_nat_redirect_ipv6+0x306/0x730 net/netfilter/nf_nat_redirect.c:124 redirect_tg6+0x7f/0xb0 net/netfilter/xt_REDIRECT.c:34 ip6t_do_table+0xc2a/0x1a30 net/ipv6/netfilter/ip6_tables.c:365 ip6table_nat_do_chain+0x65/0x80 net/ipv6/netfilter/ip6table_nat.c:41 nf_nat_ipv6_fn+0x594/0xa80 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c:302 nf_nat_ipv6_local_fn+0x33/0x5d0 net/ipv6/netfilter/nf_nat_l3proto_ipv6.c:407 ip6table_nat_local_fn+0x2c/0x40 net/ipv6/netfilter/ip6table_nat.c:69 nf_hook_entry_hookfn include/linux/netfilter.h:120 [inline] nf_hook_slow+0xba/0x1a0 net/netfilter/core.c:483 nf_hook include/linux/netfilter.h:243 [inline] NF_HOOK include/linux/netfilter.h:286 [inline] ip6_xmit+0x10ec/0x2260 net/ipv6/ip6_output.c:277 inet6_csk_xmit+0x2fc/0x580 net/ipv6/inet6_connection_sock.c:139 dccp_transmit_skb+0x9ac/0x10f0 net/dccp/output.c:142 dccp_connect+0x369/0x670 net/dccp/output.c:564 dccp_v6_connect+0xe17/0x1bf0 net/dccp/ipv6.c:946 __inet_stream_connect+0x2d4/0xf00 net/ipv4/af_inet.c:620 inet_stream_connect+0x58/0xa0 net/ipv4/af_inet.c:684 SYSC_connect+0x213/0x4a0 net/socket.c:1639 SyS_connect+0x24/0x30 net/socket.c:1620 do_syscall_64+0x282/0x940 arch/x86/entry/common.c:287 entry_SYSCALL_64_after_hwframe+0x26/0x9b RIP: 0033:0x441c69 RSP: 002b:00007ffe50cc0be8 EFLAGS: 00000217 ORIG_RAX: 000000000000002a RAX: ffffffffffffffda RBX: ffffffffffffffff RCX: 0000000000441c69 RDX: 000000000000001c RSI: 00000000208fdfe4 RDI: 0000000000000003 RBP: 00000000006cc018 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000538 R11: 0000000000000217 R12: 0000000000403590 R13: 0000000000403620 R14: 0000000000000000 R15: 0000000000000000 Code: 48 89 f0 83 e0 07 83 c0 01 38 d0 7c 08 84 d2 0f 85 46 02 00 00 48 8b 45 c8 44 0f b7 20 e8 88 97 04 fd 31 d2 41 0f b7 c4 4c 89 f9 <41> f7 f6 48 c1 e9 03 48 b8 00 00 00 00 00 fc ff df 0f b6 0c 01 RIP: nf_nat_l4proto_unique_tuple+0x291/0x530 net/netfilter/nf_nat_proto_common.c:88 RSP: ffff8801b2466778 The problem is that currently we don't have any check on the configured port range. A port range == -1 triggers the bug, while other negative values may require a very long time to complete the following loop. This commit addresses the issue swapping the two ends on negative ranges. The check is performed in nf_nat_l4proto_unique_tuple() since the nft nat loads the port values from nft registers at runtime. v1 -> v2: use the correct 'Fixes' tag v2 -> v3: update commit message, drop unneeded READ_ONCE() Fixes: 5b1158e909ec ("[NETFILTER]: Add NAT support for nf_conntrack") Reported-by: syzbot+8012e198bd037f4871e5@syzkaller.appspotmail.com Signed-off-by: Paolo Abeni Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_proto_common.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/net/netfilter/nf_nat_proto_common.c b/net/netfilter/nf_nat_proto_common.c index fbce552a796e..7d7466dbf663 100644 --- a/net/netfilter/nf_nat_proto_common.c +++ b/net/netfilter/nf_nat_proto_common.c @@ -41,7 +41,7 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, const struct nf_conn *ct, u16 *rover) { - unsigned int range_size, min, i; + unsigned int range_size, min, max, i; __be16 *portptr; u_int16_t off; @@ -71,7 +71,10 @@ void nf_nat_l4proto_unique_tuple(const struct nf_nat_l3proto *l3proto, } } else { min = ntohs(range->min_proto.all); - range_size = ntohs(range->max_proto.all) - min + 1; + max = ntohs(range->max_proto.all); + if (unlikely(max < min)) + swap(max, min); + range_size = max - min + 1; } if (range->flags & NF_NAT_RANGE_PROTO_RANDOM) { -- cgit v1.2.3 From 52c84d36b7e2f8197a9a6174d6f901a7c7afb850 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 14 Feb 2018 22:42:54 -0800 Subject: tools: bpftool: preserve JSON for batch mode when dumping insns to file Print a "null" JSON object to standard output when bpftool is used to print program instructions to a file, so as to avoid breaking JSON output on batch mode. This null object was added for most commands in a previous commit, but this specific case had been omitted. Fixes: 004b45c0e51a ("tools: bpftool: provide JSON output for all possible commands") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/prog.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index e8e2baaf93c2..e549e329be82 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -774,6 +774,9 @@ static int do_dump(int argc, char **argv) n < 0 ? strerror(errno) : "short write"); goto err_free; } + + if (json_output) + jsonw_null(json_wtr); } else { if (member_len == &info.jited_prog_len) { const char *name = NULL; -- cgit v1.2.3 From 9be6d411b0c473d31f756993b8b41bb16b0679c1 Mon Sep 17 00:00:00 2001 From: Quentin Monnet Date: Wed, 14 Feb 2018 22:42:55 -0800 Subject: tools: bpftool: preserve JSON output on errors on batch file parsing Before this patch, perror() function is used in some cases when bpftool fails to parse its input file in batch mode. This function does not integrate well with the rest of the output when JSON is used, so we replace it by something that is compliant. Most calls to perror() had already been replaced in a previous patch, this one is a leftover. Fixes: d319c8e101c5 ("tools: bpftool: preserve JSON output on errors on batch file parsing") Signed-off-by: Quentin Monnet Acked-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- tools/bpf/bpftool/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 3a0396d87c42..185acfa229b5 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -244,7 +244,7 @@ static int do_batch(int argc, char **argv) } if (errno && errno != ENOENT) { - perror("reading batch file failed"); + p_err("reading batch file failed: %s", strerror(errno)); err = -1; } else { p_info("processed %d lines", lines); -- cgit v1.2.3 From 9c2d63b843a5c8a8d0559cc067b5398aa5ec3ffc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 16 Feb 2018 01:10:29 +0100 Subject: bpf: fix mlock precharge on arraymaps syzkaller recently triggered OOM during percpu map allocation; while there is work in progress by Dennis Zhou to add __GFP_NORETRY semantics for percpu allocator under pressure, there seems also a missing bpf_map_precharge_memlock() check in array map allocation. Given today the actual bpf_map_charge_memlock() happens after the find_and_alloc_map() in syscall path, the bpf_map_precharge_memlock() is there to bail out early before we go and do the map setup work when we find that we hit the limits anyway. Therefore add this for array map as well. Fixes: 6c9059817432 ("bpf: pre-allocate hash map elements") Fixes: a10423b87a7e ("bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map") Reported-by: syzbot+adb03f3f0bb57ce3acda@syzkaller.appspotmail.com Signed-off-by: Daniel Borkmann Cc: Dennis Zhou Signed-off-by: Alexei Starovoitov --- kernel/bpf/arraymap.c | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index b1f66480135b..a364c408f25a 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -73,11 +73,11 @@ static int array_map_alloc_check(union bpf_attr *attr) static struct bpf_map *array_map_alloc(union bpf_attr *attr) { bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY; - int numa_node = bpf_map_attr_numa_node(attr); + int ret, numa_node = bpf_map_attr_numa_node(attr); u32 elem_size, index_mask, max_entries; bool unpriv = !capable(CAP_SYS_ADMIN); + u64 cost, array_size, mask64; struct bpf_array *array; - u64 array_size, mask64; elem_size = round_up(attr->value_size, 8); @@ -109,8 +109,19 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) array_size += (u64) max_entries * elem_size; /* make sure there is no u32 overflow later in round_up() */ - if (array_size >= U32_MAX - PAGE_SIZE) + cost = array_size; + if (cost >= U32_MAX - PAGE_SIZE) return ERR_PTR(-ENOMEM); + if (percpu) { + cost += (u64)attr->max_entries * elem_size * num_possible_cpus(); + if (cost >= U32_MAX - PAGE_SIZE) + return ERR_PTR(-ENOMEM); + } + cost = round_up(cost, PAGE_SIZE) >> PAGE_SHIFT; + + ret = bpf_map_precharge_memlock(cost); + if (ret < 0) + return ERR_PTR(ret); /* allocate all map elements and zero-initialize them */ array = bpf_map_area_alloc(array_size, numa_node); @@ -121,20 +132,13 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr) /* copy mandatory map attributes */ bpf_map_init_from_attr(&array->map, attr); + array->map.pages = cost; array->elem_size = elem_size; - if (!percpu) - goto out; - - array_size += (u64) attr->max_entries * elem_size * num_possible_cpus(); - - if (array_size >= U32_MAX - PAGE_SIZE || - bpf_array_alloc_percpu(array)) { + if (percpu && bpf_array_alloc_percpu(array)) { bpf_map_area_free(array); return ERR_PTR(-ENOMEM); } -out: - array->map.pages = round_up(array_size, PAGE_SIZE) >> PAGE_SHIFT; return &array->map; } -- cgit v1.2.3 From de526f401284e1638d4c97cb5a4c292ac3f37655 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Feb 2018 08:11:48 -0800 Subject: netfilter: xt_hashlimit: fix lock imbalance syszkaller found that rcu was not held in hashlimit_mt_common() We only need to enable BH at this point. Fixes: bea74641e378 ("netfilter: xt_hashlimit: add rate match mode") Signed-off-by: Eric Dumazet Reported-by: syzkaller Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_hashlimit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/netfilter/xt_hashlimit.c b/net/netfilter/xt_hashlimit.c index aa96027f4418..66f5aca62a08 100644 --- a/net/netfilter/xt_hashlimit.c +++ b/net/netfilter/xt_hashlimit.c @@ -775,7 +775,7 @@ hashlimit_mt_common(const struct sk_buff *skb, struct xt_action_param *par, if (!dh->rateinfo.prev_window && (dh->rateinfo.current_rate <= dh->rateinfo.burst)) { spin_unlock(&dh->lock); - rcu_read_unlock_bh(); + local_bh_enable(); return !(cfg->mode & XT_HASHLIMIT_INVERT); } else { goto overlimit; -- cgit v1.2.3 From bee92d06157fc39d5d7836a061c7d41289a55797 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 2 Feb 2018 16:31:23 +0100 Subject: cfg80211: fix cfg80211_beacon_dup gcc-8 warns about some obviously incorrect code: net/mac80211/cfg.c: In function 'cfg80211_beacon_dup': net/mac80211/cfg.c:2896:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict] From the context, I conclude that we want to copy from beacon into new_beacon, as we do in the rest of the function. Cc: stable@vger.kernel.org Fixes: 73da7d5bab79 ("mac80211: add channel switch command and beacon callbacks") Signed-off-by: Arnd Bergmann Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index fb15d3b97cb2..84f757c5d91a 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -2863,7 +2863,7 @@ cfg80211_beacon_dup(struct cfg80211_beacon_data *beacon) } if (beacon->probe_resp_len) { new_beacon->probe_resp_len = beacon->probe_resp_len; - beacon->probe_resp = pos; + new_beacon->probe_resp = pos; memcpy(pos, beacon->probe_resp, beacon->probe_resp_len); pos += beacon->probe_resp_len; } -- cgit v1.2.3 From ce162bfbc0b601841886965baba14877127c7c7c Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 24 Jan 2018 08:40:51 +0100 Subject: mac80211_hwsim: don't use WQ_MEM_RECLAIM We're obviously not part of a memory reclaim path, so don't set the flag. This also causes a warning in check_flush_dependency() since we end up in a code path that flushes a non-reclaim workqueue, and we shouldn't do that if we were really part of reclaim. Reported-by: syzbot+41cdaf4232c50e658934@syzkaller.appspotmail.com Signed-off-by: Johannes Berg --- drivers/net/wireless/mac80211_hwsim.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c index f6d4a50f1bdb..829ac22b72fc 100644 --- a/drivers/net/wireless/mac80211_hwsim.c +++ b/drivers/net/wireless/mac80211_hwsim.c @@ -3455,7 +3455,7 @@ static int __init init_mac80211_hwsim(void) spin_lock_init(&hwsim_radio_lock); - hwsim_wq = alloc_workqueue("hwsim_wq",WQ_MEM_RECLAIM,0); + hwsim_wq = alloc_workqueue("hwsim_wq", 0, 0); if (!hwsim_wq) return -ENOMEM; -- cgit v1.2.3 From 651b9920d7a694ffb1f885aef2bbb068a25d9d66 Mon Sep 17 00:00:00 2001 From: Felix Fietkau Date: Sat, 10 Feb 2018 13:20:34 +0100 Subject: mac80211: round IEEE80211_TX_STATUS_HEADROOM up to multiple of 4 This ensures that mac80211 allocated management frames are properly aligned, which makes copying them more efficient. For instance, mt76 uses iowrite32_copy to copy beacon frames to beacon template memory on the chip. Misaligned 32-bit accesses cause CPU exceptions on MIPS and should be avoided. Signed-off-by: Felix Fietkau Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index eec143cca1c0..c9077a832977 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -4141,7 +4141,7 @@ void ieee80211_sta_uapsd_trigger(struct ieee80211_sta *sta, u8 tid); * The TX headroom reserved by mac80211 for its own tx_status functions. * This is enough for the radiotap header. */ -#define IEEE80211_TX_STATUS_HEADROOM 14 +#define IEEE80211_TX_STATUS_HEADROOM ALIGN(14, 4) /** * ieee80211_sta_set_buffered - inform mac80211 about driver-buffered frames -- cgit v1.2.3 From d78d9ee9d40aca4781d2c5334972544601a4c3a2 Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 19 Feb 2018 14:48:35 +0200 Subject: mac80211: fix a possible leak of station stats If sta_info_alloc fails after allocating the per CPU statistics, they are not properly freed. Fixes: c9c5962b56c1 ("mac80211: enable collecting station statistics per-CPU") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 0c5627f8a104..8d7e3732bb61 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -433,6 +433,7 @@ free_txq: if (sta->sta.txq[0]) kfree(to_txq_info(sta->sta.txq[0])); free: + free_percpu(sta->pcpu_rx_stats); #ifdef CONFIG_MAC80211_MESH kfree(sta->mesh); #endif -- cgit v1.2.3 From 95f3ce6a77893ac828ba841df44421620de4314b Mon Sep 17 00:00:00 2001 From: Sara Sharon Date: Mon, 19 Feb 2018 14:48:37 +0200 Subject: mac80211: fix calling sleeping function in atomic context sta_info_alloc can be called from atomic paths (such as RX path) so we need to call pcpu_alloc with the correct gfp. Fixes: c9c5962b56c1 ("mac80211: enable collecting station statistics per-CPU") Signed-off-by: Sara Sharon Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/sta_info.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c index 8d7e3732bb61..af0b608ee8ed 100644 --- a/net/mac80211/sta_info.c +++ b/net/mac80211/sta_info.c @@ -314,7 +314,7 @@ struct sta_info *sta_info_alloc(struct ieee80211_sub_if_data *sdata, if (ieee80211_hw_check(hw, USES_RSS)) { sta->pcpu_rx_stats = - alloc_percpu(struct ieee80211_sta_rx_stats); + alloc_percpu_gfp(struct ieee80211_sta_rx_stats, gfp); if (!sta->pcpu_rx_stats) goto free; } -- cgit v1.2.3 From 3027a8e799b20fc922496a12f8ad2f9f36a8a696 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 19 Feb 2018 14:48:38 +0200 Subject: cfg80211: clear wep keys after disconnection When a low level driver calls cfg80211_disconnected(), wep keys are not cleared. As a result, following connection requests will fail since cfg80211 internal state shows a connection is still in progress. Fix this by clearing the wep keys when disconnecting. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/wireless/sme.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/wireless/sme.c b/net/wireless/sme.c index fdb3646274a5..701cfd7acc1b 100644 --- a/net/wireless/sme.c +++ b/net/wireless/sme.c @@ -1032,6 +1032,8 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie, wdev->current_bss = NULL; wdev->ssid_len = 0; wdev->conn_owner_nlportid = 0; + kzfree(wdev->connect_keys); + wdev->connect_keys = NULL; nl80211_send_disconnected(rdev, dev, reason, ie, ie_len, from_ap); -- cgit v1.2.3 From 191da271ac260700db3e5b4bb982a17ca78769d6 Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 19 Feb 2018 14:48:42 +0200 Subject: mac80211: Do not disconnect on invalid operating class Some APs include a non global operating class in their extended channel switch information element. In such a case, as the operating class is not known, mac80211 would decide to disconnect. However the specification states that the operating class needs to be taken from Annex E, but it does not specify from which table it should be taken, so it is valid for an AP to use a non global operating class. To avoid possibly unneeded disconnection, in such a case ignore the operating class and assume that the current band is used, and if the resulting channel and band configuration is invalid disconnect. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/spectmgmt.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c index ee0181778a42..029334835747 100644 --- a/net/mac80211/spectmgmt.c +++ b/net/mac80211/spectmgmt.c @@ -8,6 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2008, Intel Corporation * Copyright 2008, Johannes Berg + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -27,7 +28,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, u32 sta_flags, u8 *bssid, struct ieee80211_csa_ie *csa_ie) { - enum nl80211_band new_band; + enum nl80211_band new_band = current_band; int new_freq; u8 new_chan_no; struct ieee80211_channel *new_chan; @@ -55,15 +56,13 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata, elems->ext_chansw_ie->new_operating_class, &new_band)) { sdata_info(sdata, - "cannot understand ECSA IE operating class %d, disconnecting\n", + "cannot understand ECSA IE operating class, %d, ignoring\n", elems->ext_chansw_ie->new_operating_class); - return -EINVAL; } new_chan_no = elems->ext_chansw_ie->new_ch_num; csa_ie->count = elems->ext_chansw_ie->count; csa_ie->mode = elems->ext_chansw_ie->mode; } else if (elems->ch_switch_ie) { - new_band = current_band; new_chan_no = elems->ch_switch_ie->new_ch_num; csa_ie->count = elems->ch_switch_ie->count; csa_ie->mode = elems->ch_switch_ie->mode; -- cgit v1.2.3 From 3b07029729e347f288c70227cfe3c66b085d6b0b Mon Sep 17 00:00:00 2001 From: Ilan Peer Date: Mon, 19 Feb 2018 14:48:43 +0200 Subject: mac80211: Fix sending ADDBA response for an ongoing session In case an ADDBA request is received while there is already an ongoing BA sessions with the same parameters, i.e., update flow, an ADBBA response with decline status was sent twice. Fix it. Signed-off-by: Ilan Peer Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- net/mac80211/agg-rx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c index d444752dbf40..d64303390913 100644 --- a/net/mac80211/agg-rx.c +++ b/net/mac80211/agg-rx.c @@ -8,6 +8,7 @@ * Copyright 2007, Michael Wu * Copyright 2007-2010, Intel Corporation * Copyright(c) 2015-2017 Intel Deutschland GmbH + * Copyright (C) 2018 Intel Corporation * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -315,9 +316,6 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta, * driver so reject the timeout update. */ status = WLAN_STATUS_REQUEST_DECLINED; - ieee80211_send_addba_resp(sta->sdata, sta->sta.addr, - tid, dialog_token, status, - 1, buf_size, timeout); goto end; } -- cgit v1.2.3 From cfc2c740533368b96e2be5e0a4e8c3cace7d9814 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Feb 2018 19:36:28 -0800 Subject: netfilter: IDLETIMER: be syzkaller friendly We had one report from syzkaller [1] First issue is that INIT_WORK() should be done before mod_timer() or we risk timer being fired too soon, even with a 1 second timer. Second issue is that we need to reject too big info->timeout to avoid overflows in msecs_to_jiffies(info->timeout * 1000), or risk looping, if result after overflow is 0. [1] WARNING: CPU: 1 PID: 5129 at kernel/workqueue.c:1444 __queue_work+0xdf4/0x1230 kernel/workqueue.c:1444 Kernel panic - not syncing: panic_on_warn set ... CPU: 1 PID: 5129 Comm: syzkaller159866 Not tainted 4.16.0-rc1+ #230 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:17 [inline] dump_stack+0x194/0x257 lib/dump_stack.c:53 panic+0x1e4/0x41c kernel/panic.c:183 __warn+0x1dc/0x200 kernel/panic.c:547 report_bug+0x211/0x2d0 lib/bug.c:184 fixup_bug.part.11+0x37/0x80 arch/x86/kernel/traps.c:178 fixup_bug arch/x86/kernel/traps.c:247 [inline] do_error_trap+0x2d7/0x3e0 arch/x86/kernel/traps.c:296 do_invalid_op+0x1b/0x20 arch/x86/kernel/traps.c:315 invalid_op+0x22/0x40 arch/x86/entry/entry_64.S:988 RIP: 0010:__queue_work+0xdf4/0x1230 kernel/workqueue.c:1444 RSP: 0018:ffff8801db507538 EFLAGS: 00010006 RAX: ffff8801aeb46080 RBX: ffff8801db530200 RCX: ffffffff81481404 RDX: 0000000000000100 RSI: ffffffff86b42640 RDI: 0000000000000082 RBP: ffff8801db507758 R08: 1ffff1003b6a0de5 R09: 000000000000000c R10: ffff8801db5073f0 R11: 0000000000000020 R12: 1ffff1003b6a0eb6 R13: ffff8801b1067ae0 R14: 00000000000001f8 R15: dffffc0000000000 queue_work_on+0x16a/0x1c0 kernel/workqueue.c:1488 queue_work include/linux/workqueue.h:488 [inline] schedule_work include/linux/workqueue.h:546 [inline] idletimer_tg_expired+0x44/0x60 net/netfilter/xt_IDLETIMER.c:116 call_timer_fn+0x228/0x820 kernel/time/timer.c:1326 expire_timers kernel/time/timer.c:1363 [inline] __run_timers+0x7ee/0xb70 kernel/time/timer.c:1666 run_timer_softirq+0x4c/0x70 kernel/time/timer.c:1692 __do_softirq+0x2d7/0xb85 kernel/softirq.c:285 invoke_softirq kernel/softirq.c:365 [inline] irq_exit+0x1cc/0x200 kernel/softirq.c:405 exiting_irq arch/x86/include/asm/apic.h:541 [inline] smp_apic_timer_interrupt+0x16b/0x700 arch/x86/kernel/apic/apic.c:1052 apic_timer_interrupt+0xa9/0xb0 arch/x86/entry/entry_64.S:829 RIP: 0010:arch_local_irq_restore arch/x86/include/asm/paravirt.h:777 [inline] RIP: 0010:__raw_spin_unlock_irqrestore include/linux/spinlock_api_smp.h:160 [inline] RIP: 0010:_raw_spin_unlock_irqrestore+0x5e/0xba kernel/locking/spinlock.c:184 RSP: 0018:ffff8801c20173c8 EFLAGS: 00000282 ORIG_RAX: ffffffffffffff12 RAX: dffffc0000000000 RBX: 0000000000000282 RCX: 0000000000000006 RDX: 1ffffffff0d592cd RSI: 1ffff10035d68d23 RDI: 0000000000000282 RBP: ffff8801c20173d8 R08: 1ffff10038402e47 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000000 R12: ffffffff8820e5c8 R13: ffff8801b1067ad8 R14: ffff8801aea7c268 R15: ffff8801aea7c278 __debug_object_init+0x235/0x1040 lib/debugobjects.c:378 debug_object_init+0x17/0x20 lib/debugobjects.c:391 __init_work+0x2b/0x60 kernel/workqueue.c:506 idletimer_tg_create net/netfilter/xt_IDLETIMER.c:152 [inline] idletimer_tg_checkentry+0x691/0xb00 net/netfilter/xt_IDLETIMER.c:213 xt_check_target+0x22c/0x7d0 net/netfilter/x_tables.c:850 check_target net/ipv6/netfilter/ip6_tables.c:533 [inline] find_check_entry.isra.7+0x935/0xcf0 net/ipv6/netfilter/ip6_tables.c:575 translate_table+0xf52/0x1690 net/ipv6/netfilter/ip6_tables.c:744 do_replace net/ipv6/netfilter/ip6_tables.c:1160 [inline] do_ip6t_set_ctl+0x370/0x5f0 net/ipv6/netfilter/ip6_tables.c:1686 nf_sockopt net/netfilter/nf_sockopt.c:106 [inline] nf_setsockopt+0x67/0xc0 net/netfilter/nf_sockopt.c:115 ipv6_setsockopt+0x10b/0x130 net/ipv6/ipv6_sockglue.c:927 udpv6_setsockopt+0x45/0x80 net/ipv6/udp.c:1422 sock_common_setsockopt+0x95/0xd0 net/core/sock.c:2976 SYSC_setsockopt net/socket.c:1850 [inline] SyS_setsockopt+0x189/0x360 net/socket.c:1829 do_syscall_64+0x282/0x940 arch/x86/entry/common.c:287 Fixes: 0902b469bd25 ("netfilter: xtables: idletimer target implementation") Signed-off-by: Eric Dumazet Reported-by: syzkaller Signed-off-by: Pablo Neira Ayuso --- net/netfilter/xt_IDLETIMER.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/net/netfilter/xt_IDLETIMER.c b/net/netfilter/xt_IDLETIMER.c index 6c2482b709b1..1ac6600bfafd 100644 --- a/net/netfilter/xt_IDLETIMER.c +++ b/net/netfilter/xt_IDLETIMER.c @@ -146,11 +146,11 @@ static int idletimer_tg_create(struct idletimer_tg_info *info) timer_setup(&info->timer->timer, idletimer_tg_expired, 0); info->timer->refcnt = 1; + INIT_WORK(&info->timer->work, idletimer_tg_work); + mod_timer(&info->timer->timer, msecs_to_jiffies(info->timeout * 1000) + jiffies); - INIT_WORK(&info->timer->work, idletimer_tg_work); - return 0; out_free_attr: @@ -191,7 +191,10 @@ static int idletimer_tg_checkentry(const struct xt_tgchk_param *par) pr_debug("timeout value is zero\n"); return -EINVAL; } - + if (info->timeout >= INT_MAX / 1000) { + pr_debug("timeout value is too big\n"); + return -EINVAL; + } if (info->label[0] == '\0' || strnlen(info->label, MAX_IDLETIMER_LABEL_SIZE) == MAX_IDLETIMER_LABEL_SIZE) { -- cgit v1.2.3 From abe27a885d9e6575e663a16176dabc58ce9d7188 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Mon, 19 Feb 2018 20:12:57 -0600 Subject: ibmvnic: Check for NULL skb's in NAPI poll routine After introduction of commit d0869c0071e4, there were some instances of RX queue entries from a previous session (before the device was closed and reopened) returned to the NAPI polling routine. Since the corresponding socket buffers were freed, this resulted in a panic on reopen. Include a check for a NULL skb here to avoid this. Fixes: d0869c0071e4 ("ibmvnic: Clean RX pool buffers during device close") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 996f47568f9e..1495cb99f924 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -1901,6 +1901,11 @@ restart_poll: dev_kfree_skb_any(rx_buff->skb); remove_buff_from_pool(adapter, rx_buff); continue; + } else if (!rx_buff->skb) { + /* free the entry */ + next->rx_comp.first = 0; + remove_buff_from_pool(adapter, rx_buff); + continue; } length = be32_to_cpu(next->rx_comp.len); -- cgit v1.2.3 From b1a2ce825737b0165cc08e6f98f8c0ea1affdd60 Mon Sep 17 00:00:00 2001 From: Jeremy Cline Date: Tue, 20 Feb 2018 01:00:07 +0000 Subject: tools/libbpf: Avoid possibly using uninitialized variable Fixes a GCC maybe-uninitialized warning introduced by 48cca7e44f9f. "text" is only initialized inside the if statement so only print debug info there. Fixes: 48cca7e44f9f ("libbpf: add support for bpf_call") Signed-off-by: Jeremy Cline Signed-off-by: Daniel Borkmann --- tools/lib/bpf/libbpf.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 97073d649c1a..5bbbf285af74 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -1060,11 +1060,12 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, prog->insns = new_insn; prog->main_prog_cnt = prog->insns_cnt; prog->insns_cnt = new_cnt; + pr_debug("added %zd insn from %s to prog %s\n", + text->insns_cnt, text->section_name, + prog->section_name); } insn = &prog->insns[relo->insn_idx]; insn->imm += prog->main_prog_cnt - relo->insn_idx; - pr_debug("added %zd insn from %s to prog %s\n", - text->insns_cnt, text->section_name, prog->section_name); return 0; } -- cgit v1.2.3 From 8babd44d2079079f9d5a4aca7005aed80236efe0 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Wed, 20 Dec 2017 08:48:24 +0200 Subject: net/mlx5e: Fix TCP checksum in LRO buffers When receiving an LRO packet, the checksum field is set by the hardware to the checksum of the first coalesced packet. Obviously, this checksum is not valid for the merged LRO packet and should be fixed. We can use the CQE checksum which covers the checksum of the entire merged packet TCP payload to help us calculate the checksum incrementally. Tested by sending IPv4/6 traffic with LRO enabled, RX checksum disabled and watching nstat checksum error counters (in addition to the obvious bandwidth drop caused by checksum errors). This bug is usually "hidden" since LRO packets would go through the CHECKSUM_UNNECESSARY flow which does not validate the packet checksum. It's important to note that previous to this patch, LRO packets provided with CHECKSUM_UNNECESSARY are indeed packets with a correct validated checksum (even though the checksum inside the TCP header is incorrect), since the hardware LRO aggregation is terminated upon receiving a packet with bad checksum. Fixes: e586b3b0baee ("net/mlx5: Ethernet Datapath files") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 49 ++++++++++++++++++------- 1 file changed, 35 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 0d4bb0688faa..e5c3ab46a24a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -36,6 +36,7 @@ #include #include #include +#include #include "en.h" #include "en_tc.h" #include "eswitch.h" @@ -546,20 +547,33 @@ bool mlx5e_post_rx_mpwqes(struct mlx5e_rq *rq) return true; } +static void mlx5e_lro_update_tcp_hdr(struct mlx5_cqe64 *cqe, struct tcphdr *tcp) +{ + u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); + u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || + (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); + + tcp->check = 0; + tcp->psh = get_cqe_lro_tcppsh(cqe); + + if (tcp_ack) { + tcp->ack = 1; + tcp->ack_seq = cqe->lro_ack_seq_num; + tcp->window = cqe->lro_tcp_win; + } +} + static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, u32 cqe_bcnt) { struct ethhdr *eth = (struct ethhdr *)(skb->data); struct tcphdr *tcp; int network_depth = 0; + __wsum check; __be16 proto; u16 tot_len; void *ip_p; - u8 l4_hdr_type = get_cqe_l4_hdr_type(cqe); - u8 tcp_ack = (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_NO_DATA) || - (l4_hdr_type == CQE_L4_HDR_TYPE_TCP_ACK_AND_DATA); - proto = __vlan_get_protocol(skb, eth->h_proto, &network_depth); tot_len = cqe_bcnt - network_depth; @@ -576,23 +590,30 @@ static void mlx5e_lro_update_hdr(struct sk_buff *skb, struct mlx5_cqe64 *cqe, ipv4->check = 0; ipv4->check = ip_fast_csum((unsigned char *)ipv4, ipv4->ihl); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_tcpudp_magic(ipv4->saddr, ipv4->daddr, + tot_len - sizeof(struct iphdr), + IPPROTO_TCP, check); } else { + u16 payload_len = tot_len - sizeof(struct ipv6hdr); struct ipv6hdr *ipv6 = ip_p; tcp = ip_p + sizeof(struct ipv6hdr); skb_shinfo(skb)->gso_type = SKB_GSO_TCPV6; ipv6->hop_limit = cqe->lro_min_ttl; - ipv6->payload_len = cpu_to_be16(tot_len - - sizeof(struct ipv6hdr)); - } - - tcp->psh = get_cqe_lro_tcppsh(cqe); - - if (tcp_ack) { - tcp->ack = 1; - tcp->ack_seq = cqe->lro_ack_seq_num; - tcp->window = cqe->lro_tcp_win; + ipv6->payload_len = cpu_to_be16(payload_len); + + mlx5e_lro_update_tcp_hdr(cqe, tcp); + check = csum_partial(tcp, tcp->doff * 4, + csum_unfold((__force __sum16)cqe->check_sum)); + /* Almost done, don't forget the pseudo header */ + tcp->check = csum_ipv6_magic(&ipv6->saddr, &ipv6->daddr, payload_len, + IPPROTO_TCP, check); } } -- cgit v1.2.3 From ef7a3518f7dd4f4cf5e5b5358c93d1eb78df28fb Mon Sep 17 00:00:00 2001 From: Inbar Karmy Date: Thu, 7 Dec 2017 17:26:33 +0200 Subject: net/mlx5e: Fix loopback self test when GRO is off When GRO is off, the transport header pointer in sk_buff is initialized to network's header. To find the udp header, instead of using udp_hdr() which assumes skb_network_header was set, manually calculate the udp header offset. Fixes: 0952da791c97 ("net/mlx5e: Add support for loopback selftest") Signed-off-by: Inbar Karmy Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c index 5a4608281f38..707976482c09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c @@ -216,7 +216,8 @@ mlx5e_test_loopback_validate(struct sk_buff *skb, if (iph->protocol != IPPROTO_UDP) goto out; - udph = udp_hdr(skb); + /* Don't assume skb_transport_header() was set */ + udph = (struct udphdr *)((u8 *)iph + 4 * iph->ihl); if (udph->dest != htons(9)) goto out; -- cgit v1.2.3 From f600c6088018d1dbc5777d18daa83660f7ea4a64 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Thu, 25 Jan 2018 11:18:09 +0200 Subject: net/mlx5e: Verify inline header size do not exceed SKB linear size Driver tries to copy at least MLX5E_MIN_INLINE bytes into the control segment of the WQE. It assumes that the linear part contains at least MLX5E_MIN_INLINE bytes, which can be wrong. Cited commit verified that driver will not copy more bytes into the inline header part that the actual size of the packet. Re-factor this check to make sure we do not exceed the linear part as well. This fix is aligned with the current driver's assumption that the entire L2 will be present in the linear part of the SKB. Fixes: 6aace17e64f4 ("net/mlx5e: Fix inline header size for small packets") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c index 569b42a01026..11b4f1089d1c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c @@ -176,7 +176,7 @@ static inline u16 mlx5e_calc_min_inline(enum mlx5_inline_modes mode, default: hlen = mlx5e_skb_l2_header_offset(skb); } - return min_t(u16, hlen, skb->len); + return min_t(u16, hlen, skb_headlen(skb)); } static inline void mlx5e_tx_skb_pull_inline(unsigned char **skb_data, -- cgit v1.2.3 From 9afe9a5353778994d4396f3d5ff639221bfa5cc9 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 1 Jan 2018 13:19:51 +0000 Subject: net/mlx5e: Eliminate build warnings on no previous prototype Fix these gcc warnings on drivers/net/ethernet/mellanox/mlx5: [..]/core/lib/clock.c:454:6: warning: no previous prototype for 'mlx5_init_clock' [-Wmissing-prototypes] [..]/core/lib/clock.c:510:6: warning: no previous prototype for 'mlx5_cleanup_clock' [-Wmissing-prototypes] [..]/core/en_main.c:3141:5: warning: no previous prototype for 'mlx5e_setup_tc' [-Wmissing-prototypes] Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 4 ++-- drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 47bab842c5ee..a64b9226d281 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -2994,8 +2994,8 @@ static int mlx5e_setup_tc_block(struct net_device *dev, } #endif -int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, - void *type_data) +static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type, + void *type_data) { switch (type) { #ifdef CONFIG_MLX5_ESWITCH diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c index e159243e0fcf..857035583ccd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c @@ -34,6 +34,7 @@ #include #include #include "en.h" +#include "clock.h" enum { MLX5_CYCLES_SHIFT = 23 -- cgit v1.2.3 From 4f5c02f949973b7c9dfa8a7c23d766b1208d208f Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Mon, 1 Jan 2018 13:29:53 +0000 Subject: net/mlx5: Address static checker warnings on non-constant initializers Address these sparse warnings on drivers/net/ethernet/mellanox/mlx5 [..]/core/diag/fs_tracepoint.c:99:53: warning: non-constant initializer for static object [..]/core/diag/fs_tracepoint.c:102:53: warning: non-constant initializer for static object etc Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c index 0be4575b58a2..fd509160c8f6 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c @@ -96,10 +96,10 @@ static void print_lyr_2_4_hdrs(struct trace_seq *p, "%pI4"); } else if (ethertype.v == ETH_P_IPV6) { static const struct in6_addr full_ones = { - .in6_u.u6_addr32 = {htonl(0xffffffff), - htonl(0xffffffff), - htonl(0xffffffff), - htonl(0xffffffff)}, + .in6_u.u6_addr32 = {__constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff), + __constant_htonl(0xffffffff)}, }; DECLARE_MASK_VAL(struct in6_addr, src_ipv6); DECLARE_MASK_VAL(struct in6_addr, dst_ipv6); -- cgit v1.2.3 From 001a2fc0c8cc29241305e44ffbce52d1daf8782b Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Tue, 30 Jan 2018 13:16:58 +0200 Subject: net/mlx5e: Return error if prio is specified when offloading eswitch vlan push This isn't supported when we emulate eswitch vlan push action which is the current state of things. Fixes: 8b32580df1cb ('net/mlx5e: Add TC vlan action for SRIOV offloads') Signed-off-by: Or Gerlitz Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index fd98b0dc610f..fa86a1466718 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -2529,7 +2529,8 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv, struct tcf_exts *exts, if (tcf_vlan_action(a) == TCA_VLAN_ACT_POP) { attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_POP; } else if (tcf_vlan_action(a) == TCA_VLAN_ACT_PUSH) { - if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q)) + if (tcf_vlan_push_proto(a) != htons(ETH_P_8021Q) || + tcf_vlan_push_prio(a)) return -EOPNOTSUPP; attr->action |= MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH; -- cgit v1.2.3 From 2f0db87901698cd73d828cc6fb1957b8916fc911 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Thu, 25 Jan 2018 18:00:41 +0200 Subject: net/mlx5e: Specify numa node when allocating drop rq When allocating a drop rq, no numa node is explicitly set which means allocations are done on node zero. This is not necessarily the nearest numa node to the HCA, and even worse, might even be a memoryless numa node. Choose the numa_node given to us by the pci device in order to properly allocate the coherent dma memory instead of assuming zero is valid. Fixes: 556dd1b9c313 ("net/mlx5e: Set drop RQ's necessary parameters only") Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index a64b9226d281..da94c8cba5ee 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -1768,13 +1768,16 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, param->wq.linear = 1; } -static void mlx5e_build_drop_rq_param(struct mlx5e_rq_param *param) +static void mlx5e_build_drop_rq_param(struct mlx5_core_dev *mdev, + struct mlx5e_rq_param *param) { void *rqc = param->rqc; void *wq = MLX5_ADDR_OF(rqc, rqc, wq); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST); MLX5_SET(wq, wq, log_wq_stride, ilog2(sizeof(struct mlx5e_rx_wqe))); + + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); } static void mlx5e_build_sq_param_common(struct mlx5e_priv *priv, @@ -2634,6 +2637,9 @@ static int mlx5e_alloc_drop_cq(struct mlx5_core_dev *mdev, struct mlx5e_cq *cq, struct mlx5e_cq_param *param) { + param->wq.buf_numa_node = dev_to_node(&mdev->pdev->dev); + param->wq.db_numa_node = dev_to_node(&mdev->pdev->dev); + return mlx5e_alloc_cq_common(mdev, param, cq); } @@ -2645,7 +2651,7 @@ static int mlx5e_open_drop_rq(struct mlx5_core_dev *mdev, struct mlx5e_cq *cq = &drop_rq->cq; int err; - mlx5e_build_drop_rq_param(&rq_param); + mlx5e_build_drop_rq_param(mdev, &rq_param); err = mlx5e_alloc_drop_cq(mdev, cq, &cq_param); if (err) -- cgit v1.2.3 From c67f100edae0d2f43e8b35955f7710d702efd590 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Fri, 2 Feb 2018 09:32:53 -0600 Subject: net/mlx5: Use 128B cacheline size for 128B or larger cachelines The adapter uses the cache_line_128byte setting to set the bounds for end padding. On systems where the cacheline size is greater than 128B use 128B instead of the default of 64B. This results in fewer partial cacheline writes. There's a 50% chance it will pad to the end of a 256B cache line vs only 25% when using 64B. Fixes: f32f5bd2eb7e ("net/mlx5: Configure cache line size for start and end padding") Signed-off-by: Daniel Jurgens Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/main.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index 2ef641c91c26..ae391e4b7070 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -551,7 +551,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) MLX5_SET(cmd_hca_cap, set_hca_cap, cache_line_128byte, - cache_line_size() == 128 ? 1 : 0); + cache_line_size() >= 128 ? 1 : 0); if (MLX5_CAP_GEN_MAX(dev, dct)) MLX5_SET(cmd_hca_cap, set_hca_cap, dct, 1); -- cgit v1.2.3 From 96de67a77293b4da48a05f6ec0385f60006a7ba6 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Sun, 11 Feb 2018 13:26:06 +0200 Subject: net/mlx5: Add header re-write to the checks for conflicting actions We can't allow only some of the rules sharing an FTE to ask for header re-write, add it to the conflicting action checks. Fixes: 0d235c3fabb7 ('net/mlx5: Add hash table to search FTEs in a flow-group') Signed-off-by: Or Gerlitz Reviewed-by: Matan Barak Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index c025c98700e4..6caa4a7ad869 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1429,7 +1429,8 @@ static bool check_conflicting_actions(u32 action1, u32 action2) if (xored_actions & (MLX5_FLOW_CONTEXT_ACTION_DROP | MLX5_FLOW_CONTEXT_ACTION_ENCAP | - MLX5_FLOW_CONTEXT_ACTION_DECAP)) + MLX5_FLOW_CONTEXT_ACTION_DECAP | + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR)) return true; return false; -- cgit v1.2.3 From 26a0f6e82997d5c8345782b55d3a7894421f777f Mon Sep 17 00:00:00 2001 From: Eugenia Emantayev Date: Wed, 31 Jan 2018 09:36:29 +0200 Subject: net/mlx5: E-Switch, Fix drop counters use before creation First use of drop counters happens in esw_apply_vport_conf function, while they are allocated later in the flow. Fix that by moving esw_vport_create_drop_counters function to be called before the first use. Fixes: b8a0dbe3a90b ("net/mlx5e: E-switch, Add steering drop counters") Signed-off-by: Eugenia Emantayev Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index 5ecf2cddc16d..c2b1d7d351fc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1529,6 +1529,10 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, esw_debug(esw->dev, "Enabling VPORT(%d)\n", vport_num); + /* Create steering drop counters for ingress and egress ACLs */ + if (vport_num && esw->mode == SRIOV_LEGACY) + esw_vport_create_drop_counters(vport); + /* Restore old vport configuration */ esw_apply_vport_conf(esw, vport); @@ -1545,10 +1549,6 @@ static void esw_enable_vport(struct mlx5_eswitch *esw, int vport_num, if (!vport_num) vport->info.trusted = true; - /* create steering drop counters for ingress and egress ACLs */ - if (vport_num && esw->mode == SRIOV_LEGACY) - esw_vport_create_drop_counters(vport); - esw_vport_change_handle_locked(vport); esw->enabled_vports++; -- cgit v1.2.3 From 9238e380e823a39983ee8d6b6ee8d1a9c4ba8a65 Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 6 Feb 2018 10:52:19 +0200 Subject: net/mlx5: Fix error handling when adding flow rules If building match list or adding existing fg fails when node is locked, function returned without unlocking it. This happened if node version changed or adding existing fg returned with EAGAIN after jumping to search_again_locked label. Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Vlad Buslov Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 6caa4a7ad869..31fc2cfac3b3 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -1759,8 +1759,11 @@ search_again_locked: /* Collect all fgs which has a matching match_criteria */ err = build_match_list(&match_head, ft, spec); - if (err) + if (err) { + if (take_write) + up_write_ref_node(&ft->node); return ERR_PTR(err); + } if (!take_write) up_read_ref_node(&ft->node); @@ -1769,8 +1772,11 @@ search_again_locked: dest_num, version); free_match_list(&match_head); if (!IS_ERR(rule) || - (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) + (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { + if (take_write) + up_write_ref_node(&ft->node); return rule; + } if (!take_write) { nested_down_write_ref_node(&ft->node, FS_LOCK_GRANDPARENT); -- cgit v1.2.3 From 5ae437ad5a2ed573b1ebb04e0afa70b8869f88dd Mon Sep 17 00:00:00 2001 From: Roman Kapl Date: Mon, 19 Feb 2018 21:32:51 +0100 Subject: net: sched: report if filter is too large to dump So far, if the filter was too large to fit in the allocated skb, the kernel did not return any error and stopped dumping. Modify the dumper so that it returns -EMSGSIZE when a filter fails to dump and it is the first filter in the skb. If we are not first, we will get a next chance with more room. I understand this is pretty near to being an API change, but the original design (silent truncation) can be considered a bug. Note: The error case can happen pretty easily if you create a filter with 32 actions and have 4kb pages. Also recent versions of iproute try to be clever with their buffer allocation size, which in turn leads to Signed-off-by: Roman Kapl Acked-by: Jiri Pirko Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index a7dc7271042a..247b7cc20c13 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1397,13 +1397,18 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb) nla_get_u32(tca[TCA_CHAIN]) != chain->index) continue; if (!tcf_chain_dump(chain, q, parent, skb, cb, - index_start, &index)) + index_start, &index)) { + err = -EMSGSIZE; break; + } } cb->args[0] = index; out: + /* If we did no progress, the error (EMSGSIZE) is real */ + if (skb->len == 0 && err) + return err; return skb->len; } -- cgit v1.2.3 From 7324f5399b06cdbbd1520b8fde8024035953179d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:04 +0100 Subject: virtio_net: disable XDP_REDIRECT in receive_mergeable() case The virtio_net code have three different RX code-paths in receive_buf(). Two of these code paths can handle XDP, but one of them is broken for at least XDP_REDIRECT. Function(1): receive_big() does not support XDP. Function(2): receive_small() support XDP fully and uses build_skb(). Function(3): receive_mergeable() broken XDP_REDIRECT uses napi_alloc_skb(). The simple explanation is that receive_mergeable() is broken because it uses napi_alloc_skb(), which violates XDP given XDP assumes packet header+data in single page and enough tail room for skb_shared_info. The longer explaination is that receive_mergeable() tries to work-around and satisfy these XDP requiresments e.g. by having a function xdp_linearize_page() that allocates and memcpy RX buffers around (in case packet is scattered across multiple rx buffers). This does currently satisfy XDP_PASS, XDP_DROP and XDP_TX (but only because we have not implemented bpf_xdp_adjust_tail yet). The XDP_REDIRECT action combined with cpumap is broken, and cause hard to debug crashes. The main issue is that the RX packet does not have the needed tail-room (SKB_DATA_ALIGN(skb_shared_info)), causing skb_shared_info to overlap the next packets head-room (in which cpumap stores info). Reproducing depend on the packet payload length and if RX-buffer size happened to have tail-room for skb_shared_info or not. But to make this even harder to troubleshoot, the RX-buffer size is runtime dynamically change based on an Exponentially Weighted Moving Average (EWMA) over the packet length, when refilling RX rings. This patch only disable XDP_REDIRECT support in receive_mergeable() case, because it can cause a real crash. IMHO we should consider NOT supporting XDP in receive_mergeable() at all, because the principles behind XDP are to gain speed by (1) code simplicity, (2) sacrificing memory and (3) where possible moving runtime checks to setup time. These principles are clearly being violated in receive_mergeable(), that e.g. runtime track average buffer size to save memory consumption. In the longer run, we should consider introducing a separate receive function when attaching an XDP program, and also change the memory model to be compatible with XDP when attaching an XDP prog. Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT") Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 626c27352ae2..0ca91942a884 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -677,7 +677,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); - int err; head_skb = NULL; @@ -754,12 +753,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, goto err_xdp; rcu_read_unlock(); goto xdp_xmit; - case XDP_REDIRECT: - err = xdp_do_redirect(dev, &xdp, xdp_prog); - if (!err) - *xdp_xmit = true; - rcu_read_unlock(); - goto xdp_xmit; default: bpf_warn_invalid_xdp_action(act); case XDP_ABORTED: -- cgit v1.2.3 From 95dbe9e7b3720efa5cf83d21f44f6d953f7cf4a2 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:10 +0100 Subject: virtio_net: fix XDP code path in receive_small() When configuring virtio_net to use the code path 'receive_small()', in-order to get correct XDP_REDIRECT support, I discovered TCP packets would get silently dropped when loading an XDP program action XDP_PASS. The bug seems to be that receive_small() when XDP is loaded check that hdr->hdr.flags is zero, which seems wrong as hdr.flags contains the flags VIRTIO_NET_HDR_F_* : #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* Use csum_start, csum_offset */ #define VIRTIO_NET_HDR_F_DATA_VALID 2 /* Csum is valid */ TCP got dropped as it had the VIRTIO_NET_HDR_F_DATA_VALID flag set. The flags that are relevant here are the VIRTIO_NET_HDR_GSO_* flags stored in hdr->hdr.gso_type. Thus, the fix is just check that none of the gso_type flags have been set. Fixes: bb91accf2733 ("virtio-net: XDP support for small buffers") Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 0ca91942a884..10c8fc46b588 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -558,7 +558,7 @@ static struct sk_buff *receive_small(struct net_device *dev, void *orig_data; u32 act; - if (unlikely(hdr->hdr.gso_type || hdr->hdr.flags)) + if (unlikely(hdr->hdr.gso_type)) goto err_xdp; if (unlikely(xdp_headroom < virtnet_get_headroom(vi))) { -- cgit v1.2.3 From 11b7d897ccc1fb5a3d3f9eb1e6b4574671e5dd7d Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:15 +0100 Subject: virtio_net: fix memory leak in XDP_REDIRECT XDP_REDIRECT calling xdp_do_redirect() can fail for multiple reasons (which can be inspected by tracepoints). The current semantics is that on failure the driver calling xdp_do_redirect() must handle freeing or recycling the page associated with this frame. This can be seen as an optimization, as drivers usually have an optimized XDP_DROP code path for frame recycling in place already. The virtio_net driver didn't handle when xdp_do_redirect() failed. This caused a memory leak as the page refcnt wasn't decremented on failures. The function __virtnet_xdp_xmit() did handle one type of failure, when the xmit queue virtqueue_add_outbuf() is full, which "hides" releasing a refcnt on the page. Instead the function __virtnet_xdp_xmit() must follow API of xdp_do_redirect(), which on errors leave it up to the caller to free the page, of the failed send operation. Fixes: 186b3c998c50 ("virtio-net: support XDP_REDIRECT") Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 37 ++++++++++++++++++++++--------------- 1 file changed, 22 insertions(+), 15 deletions(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 10c8fc46b588..1e0e0fce3ab2 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -443,12 +443,8 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, sg_init_one(sq->sg, xdp->data, xdp->data_end - xdp->data); err = virtqueue_add_outbuf(sq->vq, sq->sg, 1, xdp->data, GFP_ATOMIC); - if (unlikely(err)) { - struct page *page = virt_to_head_page(xdp->data); - - put_page(page); - return false; - } + if (unlikely(err)) + return false; /* Caller handle free/refcnt */ return true; } @@ -546,8 +542,11 @@ static struct sk_buff *receive_small(struct net_device *dev, unsigned int buflen = SKB_DATA_ALIGN(GOOD_PACKET_LEN + headroom) + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); struct page *page = virt_to_head_page(buf); - unsigned int delta = 0, err; + unsigned int delta = 0; struct page *xdp_page; + bool sent; + int err; + len -= vi->hdr_len; rcu_read_lock(); @@ -596,16 +595,19 @@ static struct sk_buff *receive_small(struct net_device *dev, delta = orig_data - xdp.data; break; case XDP_TX: - if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) + sent = __virtnet_xdp_xmit(vi, &xdp); + if (unlikely(!sent)) { trace_xdp_exception(vi->dev, xdp_prog, act); - else - *xdp_xmit = true; + goto err_xdp; + } + *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; case XDP_REDIRECT: err = xdp_do_redirect(dev, &xdp, xdp_prog); - if (!err) - *xdp_xmit = true; + if (err) + goto err_xdp; + *xdp_xmit = true; rcu_read_unlock(); goto xdp_xmit; default: @@ -677,6 +679,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, struct bpf_prog *xdp_prog; unsigned int truesize; unsigned int headroom = mergeable_ctx_to_headroom(ctx); + bool sent; head_skb = NULL; @@ -745,10 +748,14 @@ static struct sk_buff *receive_mergeable(struct net_device *dev, } break; case XDP_TX: - if (unlikely(!__virtnet_xdp_xmit(vi, &xdp))) + sent = __virtnet_xdp_xmit(vi, &xdp); + if (unlikely(!sent)) { trace_xdp_exception(vi->dev, xdp_prog, act); - else - *xdp_xmit = true; + if (unlikely(xdp_page != page)) + put_page(xdp_page); + goto err_xdp; + } + *xdp_xmit = true; if (unlikely(xdp_page != page)) goto err_xdp; rcu_read_unlock(); -- cgit v1.2.3 From 8dcc5b0ab0ec9a2efb3362d380272546b8b2ee26 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Tue, 20 Feb 2018 14:32:20 +0100 Subject: virtio_net: fix ndo_xdp_xmit crash towards dev not ready for XDP When a driver implements the ndo_xdp_xmit() function, there is (currently) no generic way to determine whether it is safe to call. It is e.g. unsafe to call the drivers ndo_xdp_xmit, if it have not allocated the needed XDP TX queues yet. This is the case for virtio_net, which first allocates the XDP TX queues once an XDP/bpf prog is attached (in virtnet_xdp_set()). Thus, a crash will occur for virtio_net when redirecting to another virtio_net device's ndo_xdp_xmit, which have not attached a XDP prog. The sample xdp_redirect_map tries to attach a dummy XDP prog to take this into account, but it can also easily fail if the virtio_net (or actually underlying vhost driver) have not allocated enough extra queues for the device. Allocating more queue this is currently a manual config. Hint for libvirt XML add: The solution in this patch is to check that the device have loaded an XDP/bpf prog before proceeding. This is similar to the check performed in driver ixgbe. Signed-off-by: Jesper Dangaard Brouer Acked-by: John Fastabend Signed-off-by: David S. Miller --- drivers/net/virtio_net.c | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 1e0e0fce3ab2..9bb9e562b893 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -452,8 +452,18 @@ static bool __virtnet_xdp_xmit(struct virtnet_info *vi, static int virtnet_xdp_xmit(struct net_device *dev, struct xdp_buff *xdp) { struct virtnet_info *vi = netdev_priv(dev); - bool sent = __virtnet_xdp_xmit(vi, xdp); + struct receive_queue *rq = vi->rq; + struct bpf_prog *xdp_prog; + bool sent; + + /* Only allow ndo_xdp_xmit if XDP is loaded on dev, as this + * indicate XDP resources have been successfully allocated. + */ + xdp_prog = rcu_dereference(rq->xdp_prog); + if (!xdp_prog) + return -ENXIO; + sent = __virtnet_xdp_xmit(vi, xdp); if (!sent) return -ENOSPC; return 0; -- cgit v1.2.3 From cfd092f2db8b4b6727e1c03ef68a7842e1023573 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Tue, 20 Feb 2018 15:22:05 -0600 Subject: amd-xgbe: Restore PCI interrupt enablement setting on resume After resuming from suspend, the PCI device support must re-enable the interrupt setting so that interrupts are actually delivered. Signed-off-by: Tom Lendacky Signed-off-by: David S. Miller --- drivers/net/ethernet/amd/xgbe/xgbe-pci.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c index 3e5833cf1fab..eb23f9ba1a9a 100644 --- a/drivers/net/ethernet/amd/xgbe/xgbe-pci.c +++ b/drivers/net/ethernet/amd/xgbe/xgbe-pci.c @@ -426,6 +426,8 @@ static int xgbe_pci_resume(struct pci_dev *pdev) struct net_device *netdev = pdata->netdev; int ret = 0; + XP_IOWRITE(pdata, XP_INT_EN, 0x1fffff); + pdata->lpm_ctrl &= ~MDIO_CTRL1_LPOWER; XMDIO_WRITE(pdata, MDIO_MMD_PCS, MDIO_CTRL1, pdata->lpm_ctrl); -- cgit v1.2.3 From a7dcdf6ea1b264ee7655a8cafe844f06eed3906a Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 20 Feb 2018 23:07:33 +0100 Subject: bpf: clean up unused-variable warning The only user of this variable is inside of an #ifdef, causing a warning without CONFIG_INET: net/core/filter.c: In function '____bpf_sock_ops_cb_flags_set': net/core/filter.c:3382:6: error: unused variable 'val' [-Werror=unused-variable] int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; This replaces the #ifdef with a nicer IS_ENABLED() check that makes the code more readable and avoids the warning. Fixes: b13d88072172 ("bpf: Adds field bpf_sock_ops_cb_flags to tcp_sock") Signed-off-by: Arnd Bergmann Signed-off-by: Daniel Borkmann --- net/core/filter.c | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/net/core/filter.c b/net/core/filter.c index 08ab4c65a998..0c121adbdbaa 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3381,17 +3381,13 @@ BPF_CALL_2(bpf_sock_ops_cb_flags_set, struct bpf_sock_ops_kern *, bpf_sock, struct sock *sk = bpf_sock->sk; int val = argval & BPF_SOCK_OPS_ALL_CB_FLAGS; - if (!sk_fullsock(sk)) + if (!IS_ENABLED(CONFIG_INET) || !sk_fullsock(sk)) return -EINVAL; -#ifdef CONFIG_INET if (val) tcp_sk(sk)->bpf_sock_ops_cb_flags = val; return argval & (~BPF_SOCK_OPS_ALL_CB_FLAGS); -#else - return -EINVAL; -#endif } static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { -- cgit v1.2.3 From b52db43a3d2e34b4ef2bb563d95227bb755027df Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 21 Feb 2018 17:51:16 +0100 Subject: selftests/bpf: tcpbpf_kern: use in6_* macros from glibc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Both glibc and the kernel have in6_* macros definitions. Build fails because it picks up wrong in6_* macro from the kernel header and not the header from glibc. Fixes build error below: clang -I. -I./include/uapi -I../../../include/uapi -Wno-compare-distinct-pointer-types \ -O2 -target bpf -emit-llvm -c test_tcpbpf_kern.c -o - | \ llc -march=bpf -mcpu=generic -filetype=obj -o .../tools/testing/selftests/bpf/test_tcpbpf_kern.o In file included from test_tcpbpf_kern.c:12: .../netinet/in.h:101:5: error: expected identifier IPPROTO_HOPOPTS = 0, /* IPv6 Hop-by-Hop options. */ ^ .../linux/in6.h:131:26: note: expanded from macro 'IPPROTO_HOPOPTS' ^ In file included from test_tcpbpf_kern.c:12: /usr/include/netinet/in.h:103:5: error: expected identifier IPPROTO_ROUTING = 43, /* IPv6 routing header. */ ^ .../linux/in6.h:132:26: note: expanded from macro 'IPPROTO_ROUTING' ^ In file included from test_tcpbpf_kern.c:12: .../netinet/in.h:105:5: error: expected identifier IPPROTO_FRAGMENT = 44, /* IPv6 fragmentation header. */ ^ Since both glibc and the kernel have in6_* macros definitions, use the one from glibc. Kernel headers will check for previous libc definitions by including include/linux/libc-compat.h. Reported-by: Daniel Díaz Signed-off-by: Anders Roxell Tested-by: Daniel Díaz Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_tcpbpf_kern.c | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/testing/selftests/bpf/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/test_tcpbpf_kern.c index 57119ad57a3f..3e645ee41ed5 100644 --- a/tools/testing/selftests/bpf/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/test_tcpbpf_kern.c @@ -5,7 +5,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From 31a8260d3e34aaddf821388b8e0d589f44401f75 Mon Sep 17 00:00:00 2001 From: Anders Roxell Date: Wed, 21 Feb 2018 22:30:01 +0100 Subject: selftests/bpf: update gitignore with test_libbpf_open MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit bpf builds a test program for loading BPF ELF files. Add the executable to the .gitignore list. Signed-off-by: Anders Roxell Tested-by: Daniel Díaz Acked-by: David S. Miller Acked-by: Shuah Khan Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/.gitignore | 1 + 1 file changed, 1 insertion(+) diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index cc15af2e54fe..9cf83f895d98 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -11,3 +11,4 @@ test_progs test_tcpbpf_user test_verifier_log feature +test_libbpf_open -- cgit v1.2.3 From 80475c48c6a8a65171e035e0915dc7996b5a0a65 Mon Sep 17 00:00:00 2001 From: Li Zhijian Date: Thu, 22 Feb 2018 10:34:02 +0800 Subject: selftests/bpf/test_maps: exit child process without error in ENOMEM case test_maps contains a series of stress tests, and previously it will break the rest tests when it failed to alloc memory. ----------------------- Failed to create hashmap key=8 value=262144 'Cannot allocate memory' Failed to create hashmap key=16 value=262144 'Cannot allocate memory' Failed to create hashmap key=8 value=262144 'Cannot allocate memory' Failed to create hashmap key=8 value=262144 'Cannot allocate memory' test_maps: test_maps.c:955: run_parallel: Assertion `status == 0' failed. Aborted not ok 1..3 selftests: test_maps [FAIL] ----------------------- after this patch, the rest tests will be continue when it occurs an ENOMEM failure CC: Alexei Starovoitov CC: Philip Li Suggested-by: Daniel Borkmann Signed-off-by: Li Zhijian Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_maps.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 436c4c72414f..9e03a4c356a4 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -126,6 +126,8 @@ static void test_hashmap_sizes(int task, void *data) fd = bpf_create_map(BPF_MAP_TYPE_HASH, i, j, 2, map_flags); if (fd < 0) { + if (errno == ENOMEM) + return; printf("Failed to create hashmap key=%d value=%d '%s'\n", i, j, strerror(errno)); exit(1); -- cgit v1.2.3 From b87b6194be631c94785fe93398651e804ed43e28 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Wed, 21 Feb 2018 04:41:59 +0100 Subject: netlink: put module reference if dump start fails Before, if cb->start() failed, the module reference would never be put, because cb->cb_running is intentionally false at this point. Users are generally annoyed by this because they can no longer unload modules that leak references. Also, it may be possible to tediously wrap a reference counter back to zero, especially since module.c still uses atomic_inc instead of refcount_inc. This patch expands the error path to simply call module_put if cb->start() fails. Fixes: 41c87425a1ac ("netlink: do not set cb_running if dump's start() errs") Signed-off-by: Jason A. Donenfeld Signed-off-by: David S. Miller --- net/netlink/af_netlink.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index 2ad445c1d27c..07e8478068f0 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2308,7 +2308,7 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, if (cb->start) { ret = cb->start(cb); if (ret) - goto error_unlock; + goto error_put; } nlk->cb_running = true; @@ -2328,6 +2328,8 @@ int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, */ return -EINTR; +error_put: + module_put(control->module); error_unlock: sock_put(sk); mutex_unlock(nlk->cb_mutex); -- cgit v1.2.3 From 88e80c62671ceecdbb77c902731ec95a4bfa62f9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Feb 2018 21:42:26 -0800 Subject: smsc75xx: fix smsc75xx_set_features() If an attempt is made to disable RX checksums, USB adapter is changed but netdev->features is not, because smsc75xx_set_features() returns a non zero value. This throws errors from netdev_rx_csum_fault() : : hw csum failure Signed-off-by: Eric Dumazet Cc: Steve Glendinning Signed-off-by: David S. Miller --- drivers/net/usb/smsc75xx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/drivers/net/usb/smsc75xx.c b/drivers/net/usb/smsc75xx.c index d0a113743195..7a6a1fe79309 100644 --- a/drivers/net/usb/smsc75xx.c +++ b/drivers/net/usb/smsc75xx.c @@ -954,10 +954,11 @@ static int smsc75xx_set_features(struct net_device *netdev, /* it's racing here! */ ret = smsc75xx_write_reg(dev, RFE_CTL, pdata->rfe_ctl); - if (ret < 0) + if (ret < 0) { netdev_warn(dev->net, "Error writing RFE_CTL\n"); - - return ret; + return ret; + } + return 0; } static int smsc75xx_wait_ready(struct usbnet *dev, int in_pm) -- cgit v1.2.3 From 350c9f484bde93ef229682eedd98cd5f74350f7f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 21 Feb 2018 06:43:03 -0800 Subject: tcp_bbr: better deal with suboptimal GSO MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit BBR uses tcp_tso_autosize() in an attempt to probe what would be the burst sizes and to adjust cwnd in bbr_target_cwnd() with following gold formula : /* Allow enough full-sized skbs in flight to utilize end systems. */ cwnd += 3 * bbr->tso_segs_goal; But GSO can be lacking or be constrained to very small units (ip link set dev ... gso_max_segs 2) What we really want is to have enough packets in flight so that both GSO and GRO are efficient. So in the case GSO is off or downgraded, we still want to have the same number of packets in flight as if GSO/TSO was fully operational, so that GRO can hopefully be working efficiently. To fix this issue, we make tcp_tso_autosize() unaware of sk->sk_gso_max_segs Only tcp_tso_segs() has to enforce the gso_max_segs limit. Tested: ethtool -K eth0 tso off gso off tc qd replace dev eth0 root pfifo_fast Before patch: for f in {1..5}; do ./super_netperf 1 -H lpaa24 -- -K bbr; done     691  (ss -temoi shows cwnd is stuck around 6 )     667     651     631     517 After patch : # for f in {1..5}; do ./super_netperf 1 -H lpaa24 -- -K bbr; done    1733 (ss -temoi shows cwnd is around 386 )    1778    1746    1781    1718 Fixes: 0f8782ea1497 ("tcp_bbr: add BBR congestion control") Signed-off-by: Eric Dumazet Reported-by: Oleksandr Natalenko Acked-by: Neal Cardwell Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- net/ipv4/tcp_output.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c index b2bca373f8be..6818042cd8a9 100644 --- a/net/ipv4/tcp_output.c +++ b/net/ipv4/tcp_output.c @@ -1730,7 +1730,7 @@ u32 tcp_tso_autosize(const struct sock *sk, unsigned int mss_now, */ segs = max_t(u32, bytes / mss_now, min_tso_segs); - return min_t(u32, segs, sk->sk_gso_max_segs); + return segs; } EXPORT_SYMBOL(tcp_tso_autosize); @@ -1742,9 +1742,10 @@ static u32 tcp_tso_segs(struct sock *sk, unsigned int mss_now) const struct tcp_congestion_ops *ca_ops = inet_csk(sk)->icsk_ca_ops; u32 tso_segs = ca_ops->tso_segs_goal ? ca_ops->tso_segs_goal(sk) : 0; - return tso_segs ? : - tcp_tso_autosize(sk, mss_now, - sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + if (!tso_segs) + tso_segs = tcp_tso_autosize(sk, mss_now, + sock_net(sk)->ipv4.sysctl_tcp_min_tso_segs); + return min_t(u32, tso_segs, sk->sk_gso_max_segs); } /* Returns the portion of skb which can be sent right away */ -- cgit v1.2.3 From 1fe4b1184c2ae2bfbf9e8b14c9c0c1945c98f205 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 21 Feb 2018 11:00:54 -0800 Subject: net: ipv4: Set addr_type in hash_keys for forwarded case The result of the skb flow dissect is copied from keys to hash_keys to ensure only the intended data is hashed. The original L4 hash patch overlooked setting the addr_type for this case; add it. Fixes: bf4e0a3db97eb ("net: ipv4: add support for ECMP hash policy choice") Reported-by: Ido Schimmel Signed-off-by: David Ahern Acked-by: Nikolay Aleksandrov Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- net/ipv4/route.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 49cc1c1df1ba..a4f44d815a61 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -1826,6 +1826,8 @@ int fib_multipath_hash(const struct fib_info *fi, const struct flowi4 *fl4, return skb_get_hash_raw(skb) >> 1; memset(&hash_keys, 0, sizeof(hash_keys)); skb_flow_dissect_flow_keys(skb, &keys, flag); + + hash_keys.control.addr_type = FLOW_DISSECTOR_KEY_IPV4_ADDRS; hash_keys.addrs.v4addrs.src = keys.addrs.v4addrs.src; hash_keys.addrs.v4addrs.dst = keys.addrs.v4addrs.dst; hash_keys.ports.src = keys.ports.src; -- cgit v1.2.3 From 83090e7d35caaabc8daa65fd698275951455bbec Mon Sep 17 00:00:00 2001 From: Finn Thain Date: Thu, 22 Feb 2018 09:24:59 +1100 Subject: net/smc9194: Remove bogus CONFIG_MAC reference AFAIK the only version of smc9194.c with Mac support is the one in the linux-mac68k CVS repo, which never made it to the mainline. Despite that, from v2.3.45, arch/m68k/config.in listed CONFIG_SMC9194 under CONFIG_MAC. This mistake got carried over into Kconfig in v2.5.55. (See pre-git era "[PATCH] add m68k dependencies to net driver config".) Signed-off-by: Finn Thain Signed-off-by: David S. Miller --- drivers/net/ethernet/smsc/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig index 63aca9f847e1..4c2f612e4414 100644 --- a/drivers/net/ethernet/smsc/Kconfig +++ b/drivers/net/ethernet/smsc/Kconfig @@ -20,7 +20,7 @@ if NET_VENDOR_SMSC config SMC9194 tristate "SMC 9194 support" - depends on (ISA || MAC && BROKEN) + depends on ISA select CRC32 ---help--- This is support for the SMC9xxx based Ethernet cards. Choose this -- cgit v1.2.3 From a2c0f039bbd0f9ebf375176d05b056e3f3b5c4f7 Mon Sep 17 00:00:00 2001 From: Thomas Falcon Date: Wed, 21 Feb 2018 18:18:30 -0600 Subject: ibmvnic: Fix early release of login buffer The login buffer is released before the driver can perform sanity checks between resources the driver requested and what firmware will provide. Don't release the login buffer until the sanity check is performed. Fixes: 34f0f4e3f488 ("ibmvnic: Fix login buffer memory leaks") Signed-off-by: Thomas Falcon Signed-off-by: David S. Miller --- drivers/net/ethernet/ibm/ibmvnic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 1495cb99f924..1b3cc8bb0705 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -3760,7 +3760,6 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, dma_unmap_single(dev, adapter->login_buf_token, adapter->login_buf_sz, DMA_BIDIRECTIONAL); - release_login_buffer(adapter); dma_unmap_single(dev, adapter->login_rsp_buf_token, adapter->login_rsp_buf_sz, DMA_BIDIRECTIONAL); @@ -3791,6 +3790,7 @@ static int handle_login_rsp(union ibmvnic_crq *login_rsp_crq, ibmvnic_remove(adapter->vdev); return -EIO; } + release_login_buffer(adapter); complete(&adapter->init_done); return 0; -- cgit v1.2.3 From 657308f73e674e86b60509a430a46e569bf02846 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 22 Feb 2018 20:55:28 +0100 Subject: regulatory: add NUL to request alpha2 Similar to the ancient commit a5fe8e7695dc ("regulatory: add NUL to alpha2"), add another byte to alpha2 in the request struct so that when we use nla_put_string(), we don't overrun anything. Fixes: 73d54c9e74c4 ("cfg80211: add regulatory netlink multicast group") Reported-by: Kees Cook Signed-off-by: Johannes Berg --- include/net/regulatory.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/include/net/regulatory.h b/include/net/regulatory.h index ebc5a2ed8631..f83cacce3308 100644 --- a/include/net/regulatory.h +++ b/include/net/regulatory.h @@ -78,7 +78,7 @@ struct regulatory_request { int wiphy_idx; enum nl80211_reg_initiator initiator; enum nl80211_user_reg_hint_type user_reg_hint_type; - char alpha2[2]; + char alpha2[3]; enum nl80211_dfs_regions dfs_region; bool intersect; bool processed; -- cgit v1.2.3 From 32fff239de37ef226d5b66329dd133f64d63b22d Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2018 08:33:24 -0800 Subject: bpf: add schedule points in percpu arrays management syszbot managed to trigger RCU detected stalls in bpf_array_free_percpu() It takes time to allocate a huge percpu map, but even more time to free it. Since we run in process context, use cond_resched() to yield cpu if needed. Fixes: a10423b87a7e ("bpf: introduce BPF_MAP_TYPE_PERCPU_ARRAY map") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Daniel Borkmann --- kernel/bpf/arraymap.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c index a364c408f25a..14750e7c5ee4 100644 --- a/kernel/bpf/arraymap.c +++ b/kernel/bpf/arraymap.c @@ -26,8 +26,10 @@ static void bpf_array_free_percpu(struct bpf_array *array) { int i; - for (i = 0; i < array->map.max_entries; i++) + for (i = 0; i < array->map.max_entries; i++) { free_percpu(array->pptrs[i]); + cond_resched(); + } } static int bpf_array_alloc_percpu(struct bpf_array *array) @@ -43,6 +45,7 @@ static int bpf_array_alloc_percpu(struct bpf_array *array) return -ENOMEM; } array->pptrs[i] = ptr; + cond_resched(); } return 0; -- cgit v1.2.3 From 6c5f61023c5b0edb0c8a64c902fe97c6453b1852 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 22 Feb 2018 10:10:35 -0800 Subject: bpf: fix rcu lockdep warning for lpm_trie map_free callback Commit 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback function") fixed a memory leak and removed unnecessary locks in map_free callback function. Unfortrunately, it introduced a lockdep warning. When lockdep checking is turned on, running tools/testing/selftests/bpf/test_lpm_map will have: [ 98.294321] ============================= [ 98.294807] WARNING: suspicious RCU usage [ 98.295359] 4.16.0-rc2+ #193 Not tainted [ 98.295907] ----------------------------- [ 98.296486] /home/yhs/work/bpf/kernel/bpf/lpm_trie.c:572 suspicious rcu_dereference_check() usage! [ 98.297657] [ 98.297657] other info that might help us debug this: [ 98.297657] [ 98.298663] [ 98.298663] rcu_scheduler_active = 2, debug_locks = 1 [ 98.299536] 2 locks held by kworker/2:1/54: [ 98.300152] #0: ((wq_completion)"events"){+.+.}, at: [<00000000196bc1f0>] process_one_work+0x157/0x5c0 [ 98.301381] #1: ((work_completion)(&map->work)){+.+.}, at: [<00000000196bc1f0>] process_one_work+0x157/0x5c0 Since actual trie tree removal happens only after no other accesses to the tree are possible, replacing rcu_dereference_protected(*slot, lockdep_is_held(&trie->lock)) with rcu_dereference_protected(*slot, 1) fixed the issue. Fixes: 9a3efb6b661f ("bpf: fix memory leak in lpm_trie map_free callback function") Reported-by: Eric Dumazet Suggested-by: Eric Dumazet Signed-off-by: Yonghong Song Reviewed-by: Eric Dumazet Acked-by: David S. Miller Signed-off-by: Daniel Borkmann --- kernel/bpf/lpm_trie.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c index a75e02c961b5..b4b5b81e7251 100644 --- a/kernel/bpf/lpm_trie.c +++ b/kernel/bpf/lpm_trie.c @@ -569,8 +569,7 @@ static void trie_free(struct bpf_map *map) slot = &trie->root; for (;;) { - node = rcu_dereference_protected(*slot, - lockdep_is_held(&trie->lock)); + node = rcu_dereference_protected(*slot, 1); if (!node) goto out; -- cgit v1.2.3 From 370c10522e96bf1b2e7fd9e906dbe8fb5be895d2 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 22 Feb 2018 12:11:55 +0300 Subject: net: aquantia: Fix error handling in aq_pci_probe() We should check "self->aq_hw" for allocation failure, and also we should free it on the error paths. Fixes: 23ee07ad3c2f ("net: aquantia: Cleanup pci functions module") Signed-off-by: Dan Carpenter Signed-off-by: David S. Miller --- drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c index 22889fc158f2..87c4308b52a7 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c @@ -226,6 +226,10 @@ static int aq_pci_probe(struct pci_dev *pdev, goto err_ioremap; self->aq_hw = kzalloc(sizeof(*self->aq_hw), GFP_KERNEL); + if (!self->aq_hw) { + err = -ENOMEM; + goto err_ioremap; + } self->aq_hw->aq_nic_cfg = aq_nic_get_cfg(self); for (bar = 0; bar < 4; ++bar) { @@ -235,19 +239,19 @@ static int aq_pci_probe(struct pci_dev *pdev, mmio_pa = pci_resource_start(pdev, bar); if (mmio_pa == 0U) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } reg_sz = pci_resource_len(pdev, bar); if ((reg_sz <= 24 /*ATL_REGS_SIZE*/)) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } self->aq_hw->mmio = ioremap_nocache(mmio_pa, reg_sz); if (!self->aq_hw->mmio) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } break; } @@ -255,7 +259,7 @@ static int aq_pci_probe(struct pci_dev *pdev, if (bar == 4) { err = -EIO; - goto err_ioremap; + goto err_free_aq_hw; } numvecs = min((u8)AQ_CFG_VECS_DEF, @@ -290,6 +294,8 @@ err_register: aq_pci_free_irq_vectors(self); err_hwinit: iounmap(self->aq_hw->mmio); +err_free_aq_hw: + kfree(self->aq_hw); err_ioremap: free_netdev(ndev); err_pci_func: -- cgit v1.2.3 From 93c62c45ed5fad1b87e3a45835b251cd68de9c46 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 22 Feb 2018 14:38:14 +0000 Subject: rxrpc: Fix send in rxrpc_send_data_packet() All the kernel_sendmsg() calls in rxrpc_send_data_packet() need to send both parts of the iov[] buffer, but one of them does not. Fix it so that it does. Without this, short IPv6 rxrpc DATA packets may be seen that have the rxrpc header included, but no payload. Fixes: 5a924b8951f8 ("rxrpc: Don't store the rxrpc header in the Tx queue sk_buffs") Reported-by: Marc Dionne Signed-off-by: David Howells Signed-off-by: David S. Miller --- net/rxrpc/output.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c index 42410e910aff..cf73dc006c3b 100644 --- a/net/rxrpc/output.c +++ b/net/rxrpc/output.c @@ -445,7 +445,7 @@ send_fragmentable: (char *)&opt, sizeof(opt)); if (ret == 0) { ret = kernel_sendmsg(conn->params.local->socket, &msg, - iov, 1, iov[0].iov_len); + iov, 2, len); opt = IPV6_PMTUDISC_DO; kernel_setsockopt(conn->params.local->socket, -- cgit v1.2.3 From a493a87f38cfa48caaa95c9347be2d914c6fdf29 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 22 Feb 2018 15:12:53 +0100 Subject: bpf, x64: implement retpoline for tail call Implement a retpoline [0] for the BPF tail call JIT'ing that converts the indirect jump via jmp %rax that is used to make the long jump into another JITed BPF image. Since this is subject to speculative execution, we need to control the transient instruction sequence here as well when CONFIG_RETPOLINE is set, and direct it into a pause + lfence loop. The latter aligns also with what gcc / clang emits (e.g. [1]). JIT dump after patch: # bpftool p d x i 1 0: (18) r2 = map[id:1] 2: (b7) r3 = 0 3: (85) call bpf_tail_call#12 4: (b7) r0 = 2 5: (95) exit With CONFIG_RETPOLINE: # bpftool p d j i 1 [...] 33: cmp %edx,0x24(%rsi) 36: jbe 0x0000000000000072 |* 38: mov 0x24(%rbp),%eax 3e: cmp $0x20,%eax 41: ja 0x0000000000000072 | 43: add $0x1,%eax 46: mov %eax,0x24(%rbp) 4c: mov 0x90(%rsi,%rdx,8),%rax 54: test %rax,%rax 57: je 0x0000000000000072 | 59: mov 0x28(%rax),%rax 5d: add $0x25,%rax 61: callq 0x000000000000006d |+ 66: pause | 68: lfence | 6b: jmp 0x0000000000000066 | 6d: mov %rax,(%rsp) | 71: retq | 72: mov $0x2,%eax [...] * relative fall-through jumps in error case + retpoline for indirect jump Without CONFIG_RETPOLINE: # bpftool p d j i 1 [...] 33: cmp %edx,0x24(%rsi) 36: jbe 0x0000000000000063 |* 38: mov 0x24(%rbp),%eax 3e: cmp $0x20,%eax 41: ja 0x0000000000000063 | 43: add $0x1,%eax 46: mov %eax,0x24(%rbp) 4c: mov 0x90(%rsi,%rdx,8),%rax 54: test %rax,%rax 57: je 0x0000000000000063 | 59: mov 0x28(%rax),%rax 5d: add $0x25,%rax 61: jmpq *%rax |- 63: mov $0x2,%eax [...] * relative fall-through jumps in error case - plain indirect jump as before [0] https://support.google.com/faqs/answer/7625886 [1] https://github.com/gcc-mirror/gcc/commit/a31e654fa107be968b802786d747e962c2fcdb2b Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- arch/x86/include/asm/nospec-branch.h | 37 ++++++++++++++++++++++++++++++++++++ arch/x86/net/bpf_jit_comp.c | 9 +++++---- 2 files changed, 42 insertions(+), 4 deletions(-) diff --git a/arch/x86/include/asm/nospec-branch.h b/arch/x86/include/asm/nospec-branch.h index 76b058533e47..81a1be326571 100644 --- a/arch/x86/include/asm/nospec-branch.h +++ b/arch/x86/include/asm/nospec-branch.h @@ -177,4 +177,41 @@ static inline void indirect_branch_prediction_barrier(void) } #endif /* __ASSEMBLY__ */ + +/* + * Below is used in the eBPF JIT compiler and emits the byte sequence + * for the following assembly: + * + * With retpolines configured: + * + * callq do_rop + * spec_trap: + * pause + * lfence + * jmp spec_trap + * do_rop: + * mov %rax,(%rsp) + * retq + * + * Without retpolines configured: + * + * jmp *%rax + */ +#ifdef CONFIG_RETPOLINE +# define RETPOLINE_RAX_BPF_JIT_SIZE 17 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT1_off32(0xE8, 7); /* callq do_rop */ \ + /* spec_trap: */ \ + EMIT2(0xF3, 0x90); /* pause */ \ + EMIT3(0x0F, 0xAE, 0xE8); /* lfence */ \ + EMIT2(0xEB, 0xF9); /* jmp spec_trap */ \ + /* do_rop: */ \ + EMIT4(0x48, 0x89, 0x04, 0x24); /* mov %rax,(%rsp) */ \ + EMIT1(0xC3); /* retq */ +#else +# define RETPOLINE_RAX_BPF_JIT_SIZE 2 +# define RETPOLINE_RAX_BPF_JIT() \ + EMIT2(0xFF, 0xE0); /* jmp *%rax */ +#endif + #endif /* _ASM_X86_NOSPEC_BRANCH_H_ */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 4923d92f918d..45e4eb5bcbb2 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include /* @@ -290,7 +291,7 @@ static void emit_bpf_tail_call(u8 **pprog) EMIT2(0x89, 0xD2); /* mov edx, edx */ EMIT3(0x39, 0x56, /* cmp dword ptr [rsi + 16], edx */ offsetof(struct bpf_array, map.max_entries)); -#define OFFSET1 43 /* number of bytes to jump */ +#define OFFSET1 (41 + RETPOLINE_RAX_BPF_JIT_SIZE) /* number of bytes to jump */ EMIT2(X86_JBE, OFFSET1); /* jbe out */ label1 = cnt; @@ -299,7 +300,7 @@ static void emit_bpf_tail_call(u8 **pprog) */ EMIT2_off32(0x8B, 0x85, 36); /* mov eax, dword ptr [rbp + 36] */ EMIT3(0x83, 0xF8, MAX_TAIL_CALL_CNT); /* cmp eax, MAX_TAIL_CALL_CNT */ -#define OFFSET2 32 +#define OFFSET2 (30 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JA, OFFSET2); /* ja out */ label2 = cnt; EMIT3(0x83, 0xC0, 0x01); /* add eax, 1 */ @@ -313,7 +314,7 @@ static void emit_bpf_tail_call(u8 **pprog) * goto out; */ EMIT3(0x48, 0x85, 0xC0); /* test rax,rax */ -#define OFFSET3 10 +#define OFFSET3 (8 + RETPOLINE_RAX_BPF_JIT_SIZE) EMIT2(X86_JE, OFFSET3); /* je out */ label3 = cnt; @@ -326,7 +327,7 @@ static void emit_bpf_tail_call(u8 **pprog) * rdi == ctx (1st arg) * rax == prog->bpf_func + prologue_size */ - EMIT2(0xFF, 0xE0); /* jmp rax */ + RETPOLINE_RAX_BPF_JIT(); /* out: */ BUILD_BUG_ON(cnt - label1 != OFFSET1); -- cgit v1.2.3 From 16338a9b3ac30740d49f5dfed81bac0ffa53b9c7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 23 Feb 2018 01:03:43 +0100 Subject: bpf, arm64: fix out of bounds access in tail call I recently noticed a crash on arm64 when feeding a bogus index into BPF tail call helper. The crash would not occur when the interpreter is used, but only in case of JIT. Output looks as follows: [ 347.007486] Unable to handle kernel paging request at virtual address fffb850e96492510 [...] [ 347.043065] [fffb850e96492510] address between user and kernel address ranges [ 347.050205] Internal error: Oops: 96000004 [#1] SMP [...] [ 347.190829] x13: 0000000000000000 x12: 0000000000000000 [ 347.196128] x11: fffc047ebe782800 x10: ffff808fd7d0fd10 [ 347.201427] x9 : 0000000000000000 x8 : 0000000000000000 [ 347.206726] x7 : 0000000000000000 x6 : 001c991738000000 [ 347.212025] x5 : 0000000000000018 x4 : 000000000000ba5a [ 347.217325] x3 : 00000000000329c4 x2 : ffff808fd7cf0500 [ 347.222625] x1 : ffff808fd7d0fc00 x0 : ffff808fd7cf0500 [ 347.227926] Process test_verifier (pid: 4548, stack limit = 0x000000007467fa61) [ 347.235221] Call trace: [ 347.237656] 0xffff000002f3a4fc [ 347.240784] bpf_test_run+0x78/0xf8 [ 347.244260] bpf_prog_test_run_skb+0x148/0x230 [ 347.248694] SyS_bpf+0x77c/0x1110 [ 347.251999] el0_svc_naked+0x30/0x34 [ 347.255564] Code: 9100075a d280220a 8b0a002a d37df04b (f86b694b) [...] In this case the index used in BPF r3 is the same as in r1 at the time of the call, meaning we fed a pointer as index; here, it had the value 0xffff808fd7cf0500 which sits in x2. While I found tail calls to be working in general (also for hitting the error cases), I noticed the following in the code emission: # bpftool p d j i 988 [...] 38: ldr w10, [x1,x10] 3c: cmp w2, w10 40: b.ge 0x000000000000007c <-- signed cmp 44: mov x10, #0x20 // #32 48: cmp x26, x10 4c: b.gt 0x000000000000007c 50: add x26, x26, #0x1 54: mov x10, #0x110 // #272 58: add x10, x1, x10 5c: lsl x11, x2, #3 60: ldr x11, [x10,x11] <-- faulting insn (f86b694b) 64: cbz x11, 0x000000000000007c [...] Meaning, the tests passed because commit ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper") was using signed compares instead of unsigned which as a result had the test wrongly passing. Change this but also the tail call count test both into unsigned and cap the index as u32. Latter we did as well in 90caccdd8cc0 ("bpf: fix bpf_tail_call() x64 JIT") and is needed in addition here, too. Tested on HiSilicon Hi1616. Result after patch: # bpftool p d j i 268 [...] 38: ldr w10, [x1,x10] 3c: add w2, w2, #0x0 40: cmp w2, w10 44: b.cs 0x0000000000000080 48: mov x10, #0x20 // #32 4c: cmp x26, x10 50: b.hi 0x0000000000000080 54: add x26, x26, #0x1 58: mov x10, #0x110 // #272 5c: add x10, x1, x10 60: lsl x11, x2, #3 64: ldr x11, [x10,x11] 68: cbz x11, 0x0000000000000080 [...] Fixes: ddb55992b04d ("arm64: bpf: implement bpf_tail_call() helper") Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 5 +++-- tools/testing/selftests/bpf/test_verifier.c | 26 ++++++++++++++++++++++++++ 2 files changed, 29 insertions(+), 2 deletions(-) diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 1d4f1da7c58f..a93350451e8e 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -250,8 +250,9 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) off = offsetof(struct bpf_array, map.max_entries); emit_a64_mov_i64(tmp, off, ctx); emit(A64_LDR32(tmp, r2, tmp), ctx); + emit(A64_MOV(0, r3, r3), ctx); emit(A64_CMP(0, r3, tmp), ctx); - emit(A64_B_(A64_COND_GE, jmp_offset), ctx); + emit(A64_B_(A64_COND_CS, jmp_offset), ctx); /* if (tail_call_cnt > MAX_TAIL_CALL_CNT) * goto out; @@ -259,7 +260,7 @@ static int emit_bpf_tail_call(struct jit_ctx *ctx) */ emit_a64_mov_i64(tmp, MAX_TAIL_CALL_CNT, ctx); emit(A64_CMP(1, tcc, tmp), ctx); - emit(A64_B_(A64_COND_GT, jmp_offset), ctx); + emit(A64_B_(A64_COND_HI, jmp_offset), ctx); emit(A64_ADD_I(1, tcc, tcc, 1), ctx); /* prog = array->ptrs[index]; diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index c0f16e93f9bd..c73592fa3d41 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -2586,6 +2586,32 @@ static struct bpf_test tests[] = { .result_unpriv = REJECT, .result = ACCEPT, }, + { + "runtime/jit: pass negative index to tail_call", + .insns = { + BPF_MOV64_IMM(BPF_REG_3, -1), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 1 }, + .result = ACCEPT, + }, + { + "runtime/jit: pass > 32bit index to tail_call", + .insns = { + BPF_LD_IMM64(BPF_REG_3, 0x100000000ULL), + BPF_LD_MAP_FD(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, + BPF_FUNC_tail_call), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }, + .fixup_prog = { 2 }, + .result = ACCEPT, + }, { "stack pointer arithmetic", .insns = { -- cgit v1.2.3 From 4e14bf4236490306004782813b8b4494b18f5e60 Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Thu, 22 Feb 2018 18:20:30 +0300 Subject: macvlan: fix use-after-free in macvlan_common_newlink() The following use-after-free was reported by KASan when running LTP macvtap01 test on 4.16-rc2: [10642.528443] BUG: KASAN: use-after-free in macvlan_common_newlink+0x12ef/0x14a0 [macvlan] [10642.626607] Read of size 8 at addr ffff880ba49f2100 by task ip/18450 ... [10642.963873] Call Trace: [10642.994352] dump_stack+0x5c/0x7c [10643.035325] print_address_description+0x75/0x290 [10643.092938] kasan_report+0x28d/0x390 [10643.137971] ? macvlan_common_newlink+0x12ef/0x14a0 [macvlan] [10643.207963] macvlan_common_newlink+0x12ef/0x14a0 [macvlan] [10643.275978] macvtap_newlink+0x171/0x260 [macvtap] [10643.334532] rtnl_newlink+0xd4f/0x1300 ... [10646.256176] Allocated by task 18450: [10646.299964] kasan_kmalloc+0xa6/0xd0 [10646.343746] kmem_cache_alloc_trace+0xf1/0x210 [10646.397826] macvlan_common_newlink+0x6de/0x14a0 [macvlan] [10646.464386] macvtap_newlink+0x171/0x260 [macvtap] [10646.522728] rtnl_newlink+0xd4f/0x1300 ... [10647.022028] Freed by task 18450: [10647.061549] __kasan_slab_free+0x138/0x180 [10647.111468] kfree+0x9e/0x1c0 [10647.147869] macvlan_port_destroy+0x3db/0x650 [macvlan] [10647.211411] rollback_registered_many+0x5b9/0xb10 [10647.268715] rollback_registered+0xd9/0x190 [10647.319675] register_netdevice+0x8eb/0xc70 [10647.370635] macvlan_common_newlink+0xe58/0x14a0 [macvlan] [10647.437195] macvtap_newlink+0x171/0x260 [macvtap] Commit d02fd6e7d293 ("macvlan: Fix one possible double free") handles the case when register_netdevice() invokes ndo_uninit() on error and as a result free the port. But 'macvlan_port_get_rtnl(dev))' check (returns dev->rx_handler_data), which was added by this commit in order to prevent double free, is not quite correct: * for macvlan it always returns NULL because 'lowerdev' is the one that was used to register rx handler (port) in macvlan_port_create() as well as to unregister it in macvlan_port_destroy(). * for macvtap it always returns a valid pointer because macvtap registers its own rx handler before macvlan_common_newlink(). Fixes: d02fd6e7d293 ("macvlan: Fix one possible double free") Signed-off-by: Alexey Kodanev Signed-off-by: David S. Miller --- drivers/net/macvlan.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c index a0f2be81d52e..8fc02d9db3d0 100644 --- a/drivers/net/macvlan.c +++ b/drivers/net/macvlan.c @@ -1451,7 +1451,7 @@ destroy_macvlan_port: /* the macvlan port may be freed by macvlan_uninit when fail to register. * so we destroy the macvlan port only when it's valid. */ - if (create && macvlan_port_get_rtnl(dev)) + if (create && macvlan_port_get_rtnl(lowerdev)) macvlan_port_destroy(port->dev); return err; } -- cgit v1.2.3 From ca79bec237f5809a7c3c59bd41cd0880aa889966 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 22 Feb 2018 16:55:34 +0100 Subject: ipv6 sit: work around bogus gcc-8 -Wrestrict warning gcc-8 has a new warning that detects overlapping input and output arguments in memcpy(). It triggers for sit_init_net() calling ipip6_tunnel_clone_6rd(), which is actually correct: net/ipv6/sit.c: In function 'sit_init_net': net/ipv6/sit.c:192:3: error: 'memcpy' source argument is the same as destination [-Werror=restrict] The problem here is that the logic detecting the memcpy() arguments finds them to be the same, but the conditional that tests for the input and output of ipip6_tunnel_clone_6rd() to be identical is not a compile-time constant. We know that netdev_priv(t->dev) is the same as t for a tunnel device, and comparing "dev" directly here lets the compiler figure out as well that 'dev == sitn->fb_tunnel_dev' when called from sit_init_net(), so it no longer warns. This code is old, so Cc stable to make sure that we don't get the warning for older kernels built with new gcc. Cc: Martin Sebor Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=83456 Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- net/ipv6/sit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 3873d3877135..3a1775a62973 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -182,7 +182,7 @@ static void ipip6_tunnel_clone_6rd(struct net_device *dev, struct sit_net *sitn) #ifdef CONFIG_IPV6_SIT_6RD struct ip_tunnel *t = netdev_priv(dev); - if (t->dev == sitn->fb_tunnel_dev) { + if (dev == sitn->fb_tunnel_dev) { ipv6_addr_set(&t->ip6rd.prefix, htonl(0x20020000), 0, 0, 0); t->ip6rd.relay_prefix = 0; t->ip6rd.prefixlen = 16; -- cgit v1.2.3 From d903ec77118c09f93a610b384d83a6df33a64fe6 Mon Sep 17 00:00:00 2001 From: Andy Spencer Date: Thu, 22 Feb 2018 11:05:33 -0800 Subject: gianfar: simplify FCS handling and fix memory leak Previously, buffer descriptors containing only the frame check sequence (FCS) were skipped and not added to the skb. However, the page reference count was still incremented, leading to a memory leak. Fixing this inside gfar_add_rx_frag() is difficult due to reserved memory handling and page reuse. Instead, move the FCS handling to gfar_process_frame() and trim off the FCS before passing the skb up the networking stack. Signed-off-by: Andy Spencer Signed-off-by: Jim Gruen Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/gianfar.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/drivers/net/ethernet/freescale/gianfar.c b/drivers/net/ethernet/freescale/gianfar.c index 3bdeb295514b..f5c87bd35fa1 100644 --- a/drivers/net/ethernet/freescale/gianfar.c +++ b/drivers/net/ethernet/freescale/gianfar.c @@ -2934,29 +2934,17 @@ static bool gfar_add_rx_frag(struct gfar_rx_buff *rxb, u32 lstatus, { int size = lstatus & BD_LENGTH_MASK; struct page *page = rxb->page; - bool last = !!(lstatus & BD_LFLAG(RXBD_LAST)); - - /* Remove the FCS from the packet length */ - if (last) - size -= ETH_FCS_LEN; if (likely(first)) { skb_put(skb, size); } else { /* the last fragments' length contains the full frame length */ - if (last) + if (lstatus & BD_LFLAG(RXBD_LAST)) size -= skb->len; - /* Add the last fragment if it contains something other than - * the FCS, otherwise drop it and trim off any part of the FCS - * that was already received. - */ - if (size > 0) - skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, - rxb->page_offset + RXBUF_ALIGNMENT, - size, GFAR_RXB_TRUESIZE); - else if (size < 0) - pskb_trim(skb, skb->len + size); + skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, page, + rxb->page_offset + RXBUF_ALIGNMENT, + size, GFAR_RXB_TRUESIZE); } /* try reuse page */ @@ -3069,6 +3057,9 @@ static void gfar_process_frame(struct net_device *ndev, struct sk_buff *skb) if (priv->padding) skb_pull(skb, priv->padding); + /* Trim off the FCS */ + pskb_trim(skb, skb->len - ETH_FCS_LEN); + if (ndev->features & NETIF_F_RXCSUM) gfar_rx_checksum(skb, fcb); -- cgit v1.2.3 From a5f7add332b4ea6d4b9480971b3b0f5e66466ae9 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 22 Feb 2018 19:45:27 -0800 Subject: net_sched: gen_estimator: fix broken estimators based on percpu stats pfifo_fast got percpu stats lately, uncovering a bug I introduced last year in linux-4.10. I missed the fact that we have to clear our temporary storage before calling __gnet_stats_copy_basic() in the case of percpu stats. Without this fix, rate estimators (tc qd replace dev xxx root est 1sec 4sec pfifo_fast) are utterly broken. Fixes: 1c0d32fde5bd ("net_sched: gen_estimator: complete rewrite of rate estimators") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/core/gen_estimator.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/core/gen_estimator.c b/net/core/gen_estimator.c index 0a3f88f08727..98fd12721221 100644 --- a/net/core/gen_estimator.c +++ b/net/core/gen_estimator.c @@ -66,6 +66,7 @@ struct net_rate_estimator { static void est_fetch_counters(struct net_rate_estimator *e, struct gnet_stats_basic_packed *b) { + memset(b, 0, sizeof(*b)); if (e->stats_lock) spin_lock(e->stats_lock); -- cgit v1.2.3