From 46f7487e161b195a1bd7ddbd9c6aba9c93ec881a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Wed, 6 Mar 2019 12:44:17 +0100 Subject: netfilter: nat: don't register device notifier twice Otherwise, we get notifier list corruption. This is the most simple fix: remove the device notifier call chain from the ipv6 masquerade register function and handle it only in the ipv4 version. The better fix is merge nf_nat_masquerade_ipv4/6_(un)register_notifier into a single nf_nat_masquerade_(un)register_notifiers but to do this its needed to first merge the two masquerade modules into a single xt_MASQUERADE. Furthermore, we need to use different refcounts for ipv4/ipv6 until we can merge MASQUERADE. Fixes: d1aca8ab3104a ("netfilter: nat: merge ipv4 and ipv6 masquerade functionality") Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_nat_masquerade.c | 35 +++++++++++++++++++---------------- 1 file changed, 19 insertions(+), 16 deletions(-) diff --git a/net/netfilter/nf_nat_masquerade.c b/net/netfilter/nf_nat_masquerade.c index 86fa4dcc63c5..d85c4d902e7b 100644 --- a/net/netfilter/nf_nat_masquerade.c +++ b/net/netfilter/nf_nat_masquerade.c @@ -11,7 +11,8 @@ #include static DEFINE_MUTEX(masq_mutex); -static unsigned int masq_refcnt __read_mostly; +static unsigned int masq_refcnt4 __read_mostly; +static unsigned int masq_refcnt6 __read_mostly; unsigned int nf_nat_masquerade_ipv4(struct sk_buff *skb, unsigned int hooknum, @@ -141,8 +142,13 @@ int nf_nat_masquerade_ipv4_register_notifier(void) int ret = 0; mutex_lock(&masq_mutex); + if (WARN_ON_ONCE(masq_refcnt4 == UINT_MAX)) { + ret = -EOVERFLOW; + goto out_unlock; + } + /* check if the notifier was already set */ - if (++masq_refcnt > 1) + if (++masq_refcnt4 > 1) goto out_unlock; /* Register for device down reports */ @@ -160,7 +166,7 @@ int nf_nat_masquerade_ipv4_register_notifier(void) err_unregister: unregister_netdevice_notifier(&masq_dev_notifier); err_dec: - masq_refcnt--; + masq_refcnt4--; out_unlock: mutex_unlock(&masq_mutex); return ret; @@ -171,7 +177,7 @@ void nf_nat_masquerade_ipv4_unregister_notifier(void) { mutex_lock(&masq_mutex); /* check if the notifier still has clients */ - if (--masq_refcnt > 0) + if (--masq_refcnt4 > 0) goto out_unlock; unregister_netdevice_notifier(&masq_dev_notifier); @@ -321,25 +327,23 @@ int nf_nat_masquerade_ipv6_register_notifier(void) int ret = 0; mutex_lock(&masq_mutex); - /* check if the notifier is already set */ - if (++masq_refcnt > 1) + if (WARN_ON_ONCE(masq_refcnt6 == UINT_MAX)) { + ret = -EOVERFLOW; goto out_unlock; + } - ret = register_netdevice_notifier(&masq_dev_notifier); - if (ret) - goto err_dec; + /* check if the notifier is already set */ + if (++masq_refcnt6 > 1) + goto out_unlock; ret = register_inet6addr_notifier(&masq_inet6_notifier); if (ret) - goto err_unregister; + goto err_dec; mutex_unlock(&masq_mutex); return ret; - -err_unregister: - unregister_netdevice_notifier(&masq_dev_notifier); err_dec: - masq_refcnt--; + masq_refcnt6--; out_unlock: mutex_unlock(&masq_mutex); return ret; @@ -350,11 +354,10 @@ void nf_nat_masquerade_ipv6_unregister_notifier(void) { mutex_lock(&masq_mutex); /* check if the notifier still has clients */ - if (--masq_refcnt > 0) + if (--masq_refcnt6 > 0) goto out_unlock; unregister_inet6addr_notifier(&masq_inet6_notifier); - unregister_netdevice_notifier(&masq_dev_notifier); out_unlock: mutex_unlock(&masq_mutex); } -- cgit v1.2.3 From 40ba1d9b4d19796afc9b7ece872f5f3e8f5e2c13 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Mar 2019 00:58:53 +0100 Subject: netfilter: nf_tables: fix set double-free in abort path The abort path can cause a double-free of an anonymous set. Added-and-to-be-aborted rule looks like this: udp dport { 137, 138 } drop The to-be-aborted transaction list looks like this: newset newsetelem newsetelem rule This gets walked in reverse order, so first pass disables the rule, the set elements, then the set. After synchronize_rcu(), we then destroy those in same order: rule, set element, set element, newset. Problem is that the anonymous set has already been bound to the rule, so the rule (lookup expression destructor) already frees the set, when then cause use-after-free when trying to delete the elements from this set, then try to free the set again when handling the newset expression. Rule releases the bound set in first place from the abort path, this causes the use-after-free on set element removal when undoing the new element transactions. To handle this, skip new element transaction if set is bound from the abort path. This is still causes the use-after-free on set element removal. To handle this, remove transaction from the list when the set is already bound. Joint work with Florian Westphal. Fixes: f6ac85858976 ("netfilter: nf_tables: unbind set in rule from commit path") Bugzilla: https://bugzilla.netfilter.org/show_bug.cgi?id=1325 Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++---- net/netfilter/nf_tables_api.c | 17 +++++++++++------ 2 files changed, 13 insertions(+), 10 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index c331e96a713b..ed0687b0e0f4 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -416,7 +416,8 @@ struct nft_set { unsigned char *udata; /* runtime data below here */ const struct nft_set_ops *ops ____cacheline_aligned; - u16 flags:14, + u16 flags:13, + bound:1, genmask:2; u8 klen; u8 dlen; @@ -1344,15 +1345,12 @@ struct nft_trans_rule { struct nft_trans_set { struct nft_set *set; u32 set_id; - bool bound; }; #define nft_trans_set(trans) \ (((struct nft_trans_set *)trans->data)->set) #define nft_trans_set_id(trans) \ (((struct nft_trans_set *)trans->data)->set_id) -#define nft_trans_set_bound(trans) \ - (((struct nft_trans_set *)trans->data)->bound) struct nft_trans_chain { bool update; diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index faf6bd10a19f..1333bf97dc26 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -142,7 +142,7 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set) list_for_each_entry_reverse(trans, &net->nft.commit_list, list) { if (trans->msg_type == NFT_MSG_NEWSET && nft_trans_set(trans) == set) { - nft_trans_set_bound(trans) = true; + set->bound = true; break; } } @@ -6709,8 +6709,7 @@ static void nf_tables_abort_release(struct nft_trans *trans) nf_tables_rule_destroy(&trans->ctx, nft_trans_rule(trans)); break; case NFT_MSG_NEWSET: - if (!nft_trans_set_bound(trans)) - nft_set_destroy(nft_trans_set(trans)); + nft_set_destroy(nft_trans_set(trans)); break; case NFT_MSG_NEWSETELEM: nft_set_elem_destroy(nft_trans_elem_set(trans), @@ -6783,8 +6782,11 @@ static int __nf_tables_abort(struct net *net) break; case NFT_MSG_NEWSET: trans->ctx.table->use--; - if (!nft_trans_set_bound(trans)) - list_del_rcu(&nft_trans_set(trans)->list); + if (nft_trans_set(trans)->bound) { + nft_trans_destroy(trans); + break; + } + list_del_rcu(&nft_trans_set(trans)->list); break; case NFT_MSG_DELSET: trans->ctx.table->use++; @@ -6792,8 +6794,11 @@ static int __nf_tables_abort(struct net *net) nft_trans_destroy(trans); break; case NFT_MSG_NEWSETELEM: + if (nft_trans_elem_set(trans)->bound) { + nft_trans_destroy(trans); + break; + } te = (struct nft_trans_elem *)trans->data; - te->set->ops->remove(net, te->set, &te->elem); atomic_dec(&te->set->nelems); break; -- cgit v1.2.3 From 273fe3f1006ea5ebc63d6729e43e8e45e32b256a Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 8 Mar 2019 15:30:03 +0100 Subject: netfilter: nf_tables: bogus EBUSY when deleting set after flush MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Set deletion after flush coming in the same batch results in EBUSY. Add set use counter to track the number of references to this set from rules. We cannot rely on the list of bindings for this since such list is still populated from the preparation phase. Reported-by: Václav Zindulka Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 6 ++++++ net/netfilter/nf_tables_api.c | 28 +++++++++++++++++++++++++++- net/netfilter/nft_dynset.c | 13 +++++++++---- net/netfilter/nft_lookup.c | 13 +++++++++---- net/netfilter/nft_objref.c | 13 +++++++++---- 5 files changed, 60 insertions(+), 13 deletions(-) diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index ed0687b0e0f4..3e9ab643eedf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -382,6 +382,7 @@ void nft_unregister_set(struct nft_set_type *type); * @dtype: data type (verdict or numeric type defined by userspace) * @objtype: object type (see NFT_OBJECT_* definitions) * @size: maximum set size + * @use: number of rules references to this set * @nelems: number of elements * @ndeact: number of deactivated elements queued for removal * @timeout: default timeout value in jiffies @@ -407,6 +408,7 @@ struct nft_set { u32 dtype; u32 objtype; u32 size; + u32 use; atomic_t nelems; u32 ndeact; u64 timeout; @@ -467,6 +469,10 @@ struct nft_set_binding { u32 flags; }; +enum nft_trans_phase; +void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, + enum nft_trans_phase phase); int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *binding); void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 1333bf97dc26..ca09ef037ca5 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -3672,6 +3672,9 @@ err1: static void nft_set_destroy(struct nft_set *set) { + if (WARN_ON(set->use > 0)) + return; + set->ops->destroy(set); module_put(to_set_type(set->ops)->owner); kfree(set->name); @@ -3712,7 +3715,7 @@ static int nf_tables_delset(struct net *net, struct sock *nlsk, NL_SET_BAD_ATTR(extack, attr); return PTR_ERR(set); } - if (!list_empty(&set->bindings) || + if (set->use || (nlh->nlmsg_flags & NLM_F_NONREC && atomic_read(&set->nelems) > 0)) { NL_SET_BAD_ATTR(extack, attr); return -EBUSY; @@ -3742,6 +3745,9 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set, struct nft_set_binding *i; struct nft_set_iter iter; + if (set->use == UINT_MAX) + return -EOVERFLOW; + if (!list_empty(&set->bindings) && nft_set_is_anonymous(set)) return -EBUSY; @@ -3769,6 +3775,7 @@ bind: binding->chain = ctx->chain; list_add_tail_rcu(&binding->list, &set->bindings); nft_set_trans_bind(ctx, set); + set->use++; return 0; } @@ -3788,6 +3795,25 @@ void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set, } EXPORT_SYMBOL_GPL(nf_tables_unbind_set); +void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set, + struct nft_set_binding *binding, + enum nft_trans_phase phase) +{ + switch (phase) { + case NFT_TRANS_PREPARE: + set->use--; + return; + case NFT_TRANS_ABORT: + case NFT_TRANS_RELEASE: + set->use--; + /* fall through */ + default: + nf_tables_unbind_set(ctx, set, binding, + phase == NFT_TRANS_COMMIT); + } +} +EXPORT_SYMBOL_GPL(nf_tables_deactivate_set); + void nf_tables_destroy_set(const struct nft_ctx *ctx, struct nft_set *set) { if (list_empty(&set->bindings) && nft_set_is_anonymous(set)) diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c index a8a74a16f9c4..e461007558e8 100644 --- a/net/netfilter/nft_dynset.c +++ b/net/netfilter/nft_dynset.c @@ -240,11 +240,15 @@ static void nft_dynset_deactivate(const struct nft_ctx *ctx, { struct nft_dynset *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} + +static void nft_dynset_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_dynset *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); + priv->set->use++; } static void nft_dynset_destroy(const struct nft_ctx *ctx, @@ -292,6 +296,7 @@ static const struct nft_expr_ops nft_dynset_ops = { .eval = nft_dynset_eval, .init = nft_dynset_init, .destroy = nft_dynset_destroy, + .activate = nft_dynset_activate, .deactivate = nft_dynset_deactivate, .dump = nft_dynset_dump, }; diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c index 14496da5141d..161c3451a747 100644 --- a/net/netfilter/nft_lookup.c +++ b/net/netfilter/nft_lookup.c @@ -127,11 +127,15 @@ static void nft_lookup_deactivate(const struct nft_ctx *ctx, { struct nft_lookup *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} + +static void nft_lookup_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_lookup *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); + priv->set->use++; } static void nft_lookup_destroy(const struct nft_ctx *ctx, @@ -222,6 +226,7 @@ static const struct nft_expr_ops nft_lookup_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_lookup)), .eval = nft_lookup_eval, .init = nft_lookup_init, + .activate = nft_lookup_activate, .deactivate = nft_lookup_deactivate, .destroy = nft_lookup_destroy, .dump = nft_lookup_dump, diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c index 79ef074c18ca..457a9ceb46af 100644 --- a/net/netfilter/nft_objref.c +++ b/net/netfilter/nft_objref.c @@ -162,11 +162,15 @@ static void nft_objref_map_deactivate(const struct nft_ctx *ctx, { struct nft_objref_map *priv = nft_expr_priv(expr); - if (phase == NFT_TRANS_PREPARE) - return; + nf_tables_deactivate_set(ctx, priv->set, &priv->binding, phase); +} + +static void nft_objref_map_activate(const struct nft_ctx *ctx, + const struct nft_expr *expr) +{ + struct nft_objref_map *priv = nft_expr_priv(expr); - nf_tables_unbind_set(ctx, priv->set, &priv->binding, - phase == NFT_TRANS_COMMIT); + priv->set->use++; } static void nft_objref_map_destroy(const struct nft_ctx *ctx, @@ -183,6 +187,7 @@ static const struct nft_expr_ops nft_objref_map_ops = { .size = NFT_EXPR_SIZE(sizeof(struct nft_objref_map)), .eval = nft_objref_map_eval, .init = nft_objref_map_init, + .activate = nft_objref_map_activate, .deactivate = nft_objref_map_deactivate, .destroy = nft_objref_map_destroy, .dump = nft_objref_map_dump, -- cgit v1.2.3 From 3f3a390dbd59d236f62cff8e8b20355ef7069e3d Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Mon, 11 Mar 2019 13:04:16 +0100 Subject: netfilter: nf_tables: use-after-free in dynamic operations Smatch reports: net/netfilter/nf_tables_api.c:2167 nf_tables_expr_destroy() error: dereferencing freed memory 'expr->ops' net/netfilter/nf_tables_api.c 2162 static void nf_tables_expr_destroy(const struct nft_ctx *ctx, 2163 struct nft_expr *expr) 2164 { 2165 if (expr->ops->destroy) 2166 expr->ops->destroy(ctx, expr); ^^^^ --> 2167 module_put(expr->ops->type->owner); ^^^^^^^^^ 2168 } Smatch says there are three functions which free expr->ops. Fixes: b8e204006340 ("netfilter: nft_compat: use .release_ops and remove list of extension") Reported-by: Dan Carpenter Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index ca09ef037ca5..9d8f51dfc593 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -2162,9 +2162,11 @@ err1: static void nf_tables_expr_destroy(const struct nft_ctx *ctx, struct nft_expr *expr) { + const struct nft_expr_type *type = expr->ops->type; + if (expr->ops->destroy) expr->ops->destroy(ctx, expr); - module_put(expr->ops->type->owner); + module_put(type->owner); } struct nft_expr *nft_expr_init(const struct nft_ctx *ctx, -- cgit v1.2.3 From a843dc4ebaecd15fca1f4d35a97210f72ea1473b Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 11 Mar 2019 16:29:32 +0800 Subject: net: sit: fix UBSAN Undefined behaviour in check_6rd In func check_6rd,tunnel->ip6rd.relay_prefixlen may equal to 32,so UBSAN complain about it. UBSAN: Undefined behaviour in net/ipv6/sit.c:781:47 shift exponent 32 is too large for 32-bit type 'unsigned int' CPU: 6 PID: 20036 Comm: syz-executor.0 Not tainted 4.19.27 #2 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0xca/0x13e lib/dump_stack.c:113 ubsan_epilogue+0xe/0x81 lib/ubsan.c:159 __ubsan_handle_shift_out_of_bounds+0x293/0x2e8 lib/ubsan.c:425 check_6rd.constprop.9+0x433/0x4e0 net/ipv6/sit.c:781 try_6rd net/ipv6/sit.c:806 [inline] ipip6_tunnel_xmit net/ipv6/sit.c:866 [inline] sit_tunnel_xmit+0x141c/0x2720 net/ipv6/sit.c:1033 __netdev_start_xmit include/linux/netdevice.h:4300 [inline] netdev_start_xmit include/linux/netdevice.h:4309 [inline] xmit_one net/core/dev.c:3243 [inline] dev_hard_start_xmit+0x17c/0x780 net/core/dev.c:3259 __dev_queue_xmit+0x1656/0x2500 net/core/dev.c:3829 neigh_output include/net/neighbour.h:501 [inline] ip6_finish_output2+0xa36/0x2290 net/ipv6/ip6_output.c:120 ip6_finish_output+0x3e7/0xa20 net/ipv6/ip6_output.c:154 NF_HOOK_COND include/linux/netfilter.h:278 [inline] ip6_output+0x1e2/0x720 net/ipv6/ip6_output.c:171 dst_output include/net/dst.h:444 [inline] ip6_local_out+0x99/0x170 net/ipv6/output_core.c:176 ip6_send_skb+0x9d/0x2f0 net/ipv6/ip6_output.c:1697 ip6_push_pending_frames+0xc0/0x100 net/ipv6/ip6_output.c:1717 rawv6_push_pending_frames net/ipv6/raw.c:616 [inline] rawv6_sendmsg+0x2435/0x3530 net/ipv6/raw.c:946 inet_sendmsg+0xf8/0x5c0 net/ipv4/af_inet.c:798 sock_sendmsg_nosec net/socket.c:621 [inline] sock_sendmsg+0xc8/0x110 net/socket.c:631 ___sys_sendmsg+0x6cf/0x890 net/socket.c:2114 __sys_sendmsg+0xf0/0x1b0 net/socket.c:2152 do_syscall_64+0xc8/0x580 arch/x86/entry/common.c:290 entry_SYSCALL_64_after_hwframe+0x49/0xbe Signed-off-by: linmiaohe Signed-off-by: David S. Miller --- net/ipv6/sit.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c index 09e440e8dfae..07e21a82ce4c 100644 --- a/net/ipv6/sit.c +++ b/net/ipv6/sit.c @@ -778,8 +778,9 @@ static bool check_6rd(struct ip_tunnel *tunnel, const struct in6_addr *v6dst, pbw0 = tunnel->ip6rd.prefixlen >> 5; pbi0 = tunnel->ip6rd.prefixlen & 0x1f; - d = (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> - tunnel->ip6rd.relay_prefixlen; + d = tunnel->ip6rd.relay_prefixlen < 32 ? + (ntohl(v6dst->s6_addr32[pbw0]) << pbi0) >> + tunnel->ip6rd.relay_prefixlen : 0; pbi1 = pbi0 - tunnel->ip6rd.relay_prefixlen; if (pbi1 > 0) -- cgit v1.2.3 From a623a7a1a5670c25a16881f5078072d272d96b71 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 11 Mar 2019 16:38:17 +0100 Subject: y2038: fix socket.h header inclusion Referencing the __kernel_long_t type caused some user space applications to stop compiling when they had not already included linux/posix_types.h, e.g. s/multicast.c -o ext/sockets/multicast.lo In file included from /builddir/build/BUILD/php-7.3.3/main/php.h:468, from /builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c:27: /builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c: In function 'zm_startup_sockets': /builddir/build/BUILD/php-7.3.3/ext/sockets/sockets.c:776:40: error: '__kernel_long_t' undeclared (first use in this function) 776 | REGISTER_LONG_CONSTANT("SO_SNDTIMEO", SO_SNDTIMEO, CONST_CS | CONST_PERSISTENT); It is safe to include that header here, since it only contains kernel internal types that do not conflict with other user space types. It's still possible that some related build failures remain, but those are likely to be for code that is not already y2038 safe. Reported-by: Laura Abbott Fixes: a9beb86ae6e5 ("sock: Add SO_RCVTIMEO_NEW and SO_SNDTIMEO_NEW") Signed-off-by: Arnd Bergmann Signed-off-by: David S. Miller --- arch/alpha/include/uapi/asm/socket.h | 2 +- arch/mips/include/uapi/asm/socket.h | 2 +- arch/parisc/include/uapi/asm/socket.h | 2 +- arch/sparc/include/uapi/asm/socket.h | 2 +- include/uapi/asm-generic/socket.h | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/arch/alpha/include/uapi/asm/socket.h b/arch/alpha/include/uapi/asm/socket.h index 0d0fddb7e738..976e89b116e5 100644 --- a/arch/alpha/include/uapi/asm/socket.h +++ b/arch/alpha/include/uapi/asm/socket.h @@ -2,8 +2,8 @@ #ifndef _UAPI_ASM_SOCKET_H #define _UAPI_ASM_SOCKET_H +#include #include -#include /* For setsockopt(2) */ /* diff --git a/arch/mips/include/uapi/asm/socket.h b/arch/mips/include/uapi/asm/socket.h index eb9f33f8a8b3..d41765cfbc6e 100644 --- a/arch/mips/include/uapi/asm/socket.h +++ b/arch/mips/include/uapi/asm/socket.h @@ -10,8 +10,8 @@ #ifndef _UAPI_ASM_SOCKET_H #define _UAPI_ASM_SOCKET_H +#include #include -#include /* * For setsockopt(2) diff --git a/arch/parisc/include/uapi/asm/socket.h b/arch/parisc/include/uapi/asm/socket.h index 16e428f03526..66c5dd245ac7 100644 --- a/arch/parisc/include/uapi/asm/socket.h +++ b/arch/parisc/include/uapi/asm/socket.h @@ -2,8 +2,8 @@ #ifndef _UAPI_ASM_SOCKET_H #define _UAPI_ASM_SOCKET_H +#include #include -#include /* For setsockopt(2) */ #define SOL_SOCKET 0xffff diff --git a/arch/sparc/include/uapi/asm/socket.h b/arch/sparc/include/uapi/asm/socket.h index 88fe4f978aca..9265a9eece15 100644 --- a/arch/sparc/include/uapi/asm/socket.h +++ b/arch/sparc/include/uapi/asm/socket.h @@ -2,8 +2,8 @@ #ifndef _ASM_SOCKET_H #define _ASM_SOCKET_H +#include #include -#include /* For setsockopt(2) */ #define SOL_SOCKET 0xffff diff --git a/include/uapi/asm-generic/socket.h b/include/uapi/asm-generic/socket.h index c8b430cb6dc4..8c1391c89171 100644 --- a/include/uapi/asm-generic/socket.h +++ b/include/uapi/asm-generic/socket.h @@ -2,8 +2,8 @@ #ifndef __ASM_GENERIC_SOCKET_H #define __ASM_GENERIC_SOCKET_H +#include #include -#include /* For setsockopt(2) */ #define SOL_SOCKET 1 -- cgit v1.2.3 From 6237634d8fcc65c9e3348382910e7cdb15084c68 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Thu, 31 Jan 2019 14:56:01 +0200 Subject: net/mlx5: Fix multiple updates of steering rules in parallel There might be a condition where the fte found is not active yet. In this case we should not use it, but continue to search for another, or allocate a new one. Fixes: bd71b08ec2ee ("net/mlx5: Support multiple updates of steering rules in parallel") Signed-off-by: Eli Britstein Reviewed-by: Maor Gottlieb Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index f2cfa012315e..86f986308a90 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -491,6 +491,7 @@ static void del_hw_fte(struct fs_node *node) mlx5_core_warn(dev, "flow steering can't delete fte in index %d of flow group id %d\n", fte->index, fg->id); + node->active = 0; } } @@ -1601,6 +1602,11 @@ lookup_fte_locked(struct mlx5_flow_group *g, fte_tmp = NULL; goto out; } + if (!fte_tmp->node.active) { + tree_put_node(&fte_tmp->node); + fte_tmp = NULL; + goto out; + } nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); out: -- cgit v1.2.3 From e7aafc8f048050c7f1274db2ff0db60b0cc95700 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Tue, 8 Jan 2019 09:29:22 +0200 Subject: net/mlx5: Add modify FTE helper function Add modify FTE helper function and use it when deleting a rule, as a pre-step towards consolidated FTE modification, with no functional change. Signed-off-by: Eli Britstein Reviewed-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 50 +++++++++++++---------- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 1 + 2 files changed, 30 insertions(+), 21 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 86f986308a90..5ff8c5f44b35 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -420,22 +420,37 @@ static void del_sw_flow_table(struct fs_node *node) kfree(ft); } -static void del_sw_hw_rule(struct fs_node *node) +static void modify_fte(struct fs_fte *fte) { struct mlx5_flow_root_namespace *root; - struct mlx5_flow_rule *rule; struct mlx5_flow_table *ft; struct mlx5_flow_group *fg; - struct fs_fte *fte; - int modify_mask; - struct mlx5_core_dev *dev = get_dev(node); + struct mlx5_core_dev *dev; int err; - bool update_fte = false; - fs_get_obj(rule, node); - fs_get_obj(fte, rule->node.parent); + if (!fte->modify_mask) + return; + fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); + dev = get_dev(&fte->node); + + root = find_root(&ft->node); + err = root->cmds->update_fte(dev, ft, fg->id, fte->modify_mask, fte); + if (err) + mlx5_core_warn(dev, + "%s can't del rule fg id=%d fte_index=%d\n", + __func__, fg->id, fte->index); + fte->modify_mask = 0; +} + +static void del_sw_hw_rule(struct fs_node *node) +{ + struct mlx5_flow_rule *rule; + struct fs_fte *fte; + + fs_get_obj(rule, node); + fs_get_obj(fte, rule->node.parent); trace_mlx5_fs_del_rule(rule); if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) { mutex_lock(&rule->dest_attr.ft->lock); @@ -445,27 +460,20 @@ static void del_sw_hw_rule(struct fs_node *node) if (rule->dest_attr.type == MLX5_FLOW_DESTINATION_TYPE_COUNTER && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | - BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); + fte->modify_mask |= + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION) | + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS); fte->action.action &= ~MLX5_FLOW_CONTEXT_ACTION_COUNT; - update_fte = true; goto out; } if ((fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST) && --fte->dests_size) { - modify_mask = BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); - update_fte = true; + fte->modify_mask |= + BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); } out: - root = find_root(&ft->node); - if (update_fte && fte->dests_size) { - err = root->cmds->update_fte(dev, ft, fg->id, modify_mask, fte); - if (err) - mlx5_core_warn(dev, - "%s can't del rule fg id=%d fte_index=%d\n", - __func__, fg->id, fte->index); - } + modify_fte(fte); kfree(rule); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 2dc86347af58..87de0e4d9124 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -172,6 +172,7 @@ struct fs_fte { enum fs_fte_status status; struct mlx5_fc *counter; struct rhash_head hash; + int modify_mask; }; /* Type of children is mlx5_flow_table/namespace */ -- cgit v1.2.3 From 476d61b783e5481bbfaac4518b0b3d2b5addbc19 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Thu, 31 Jan 2019 14:38:32 +0200 Subject: net/mlx5: Add a locked flag to node removal functions Add a locked flag to the node removal functions to signal if the parent is already locked from the caller function or not as a pre-step towards outside lock. Currently always use false with no functional change. Signed-off-by: Eli Britstein Reviewed-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 86 ++++++++++++----------- 1 file changed, 44 insertions(+), 42 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 5ff8c5f44b35..42a6952ff9d0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -263,10 +263,11 @@ static void nested_down_write_ref_node(struct fs_node *node, } } -static void down_write_ref_node(struct fs_node *node) +static void down_write_ref_node(struct fs_node *node, bool locked) { if (node) { - down_write(&node->lock); + if (!locked) + down_write(&node->lock); refcount_inc(&node->refcount); } } @@ -277,13 +278,14 @@ static void up_read_ref_node(struct fs_node *node) up_read(&node->lock); } -static void up_write_ref_node(struct fs_node *node) +static void up_write_ref_node(struct fs_node *node, bool locked) { refcount_dec(&node->refcount); - up_write(&node->lock); + if (!locked) + up_write(&node->lock); } -static void tree_put_node(struct fs_node *node) +static void tree_put_node(struct fs_node *node, bool locked) { struct fs_node *parent_node = node->parent; @@ -294,27 +296,27 @@ static void tree_put_node(struct fs_node *node) /* Only root namespace doesn't have parent and we just * need to free its node. */ - down_write_ref_node(parent_node); + down_write_ref_node(parent_node, locked); list_del_init(&node->list); if (node->del_sw_func) node->del_sw_func(node); - up_write_ref_node(parent_node); + up_write_ref_node(parent_node, locked); } else { kfree(node); } node = NULL; } if (!node && parent_node) - tree_put_node(parent_node); + tree_put_node(parent_node, locked); } -static int tree_remove_node(struct fs_node *node) +static int tree_remove_node(struct fs_node *node, bool locked) { if (refcount_read(&node->refcount) > 1) { refcount_dec(&node->refcount); return -EEXIST; } - tree_put_node(node); + tree_put_node(node, locked); return 0; } @@ -867,7 +869,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, fs_get_obj(fte, rule->node.parent); if (!(fte->action.action & MLX5_FLOW_CONTEXT_ACTION_FWD_DEST)) return -EINVAL; - down_write_ref_node(&fte->node); + down_write_ref_node(&fte->node, false); fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); @@ -875,7 +877,7 @@ static int _mlx5_modify_rule_destination(struct mlx5_flow_rule *rule, root = find_root(&ft->node); err = root->cmds->update_fte(get_dev(&ft->node), ft, fg->id, modify_mask, fte); - up_write_ref_node(&fte->node); + up_write_ref_node(&fte->node, false); return err; } @@ -1025,11 +1027,11 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa if (err) goto destroy_ft; ft->node.active = true; - down_write_ref_node(&fs_prio->node); + down_write_ref_node(&fs_prio->node, false); tree_add_node(&ft->node, &fs_prio->node); list_add_flow_table(ft, fs_prio); fs_prio->num_ft++; - up_write_ref_node(&fs_prio->node); + up_write_ref_node(&fs_prio->node, false); mutex_unlock(&root->chain_lock); trace_mlx5_fs_add_ft(ft); return ft; @@ -1123,17 +1125,17 @@ struct mlx5_flow_group *mlx5_create_flow_group(struct mlx5_flow_table *ft, if (ft->autogroup.active) return ERR_PTR(-EPERM); - down_write_ref_node(&ft->node); + down_write_ref_node(&ft->node, false); fg = alloc_insert_flow_group(ft, match_criteria_enable, match_criteria, start_index, end_index, ft->node.children.prev); - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); if (IS_ERR(fg)) return fg; err = root->cmds->create_flow_group(dev, ft, fg_in, &fg->id); if (err) { - tree_put_node(&fg->node); + tree_put_node(&fg->node, false); return ERR_PTR(err); } trace_mlx5_fs_add_fg(fg); @@ -1530,10 +1532,10 @@ static void free_match_list(struct match_list_head *head) struct match_list *iter, *match_tmp; list_del(&head->first.list); - tree_put_node(&head->first.g->node); + tree_put_node(&head->first.g->node, false); list_for_each_entry_safe(iter, match_tmp, &head->list, list) { - tree_put_node(&iter->g->node); + tree_put_node(&iter->g->node, false); list_del(&iter->list); kfree(iter); } @@ -1611,7 +1613,7 @@ lookup_fte_locked(struct mlx5_flow_group *g, goto out; } if (!fte_tmp->node.active) { - tree_put_node(&fte_tmp->node); + tree_put_node(&fte_tmp->node, false); fte_tmp = NULL; goto out; } @@ -1619,7 +1621,7 @@ lookup_fte_locked(struct mlx5_flow_group *g, nested_down_write_ref_node(&fte_tmp->node, FS_LOCK_CHILD); out: if (take_write) - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); else up_read_ref_node(&g->node); return fte_tmp; @@ -1661,8 +1663,8 @@ search_again_locked: continue; rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte_tmp); - up_write_ref_node(&fte_tmp->node); - tree_put_node(&fte_tmp->node); + up_write_ref_node(&fte_tmp->node, false); + tree_put_node(&fte_tmp->node, false); kmem_cache_free(steering->ftes_cache, fte); return rule; } @@ -1698,7 +1700,7 @@ skip_search: err = insert_fte(g, fte); if (err) { - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); if (err == -ENOSPC) continue; kmem_cache_free(steering->ftes_cache, fte); @@ -1706,11 +1708,11 @@ skip_search: } nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte); - up_write_ref_node(&fte->node); - tree_put_node(&fte->node); + up_write_ref_node(&fte->node, false); + tree_put_node(&fte->node, false); return rule; } rule = ERR_PTR(-ENOENT); @@ -1752,7 +1754,7 @@ search_again_locked: err = build_match_list(&match_head, ft, spec); if (err) { if (take_write) - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); else up_read_ref_node(&ft->node); return ERR_PTR(err); @@ -1767,7 +1769,7 @@ search_again_locked: if (!IS_ERR(rule) || (PTR_ERR(rule) != -ENOENT && PTR_ERR(rule) != -EAGAIN)) { if (take_write) - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); return rule; } @@ -1783,12 +1785,12 @@ search_again_locked: g = alloc_auto_flow_group(ft, spec); if (IS_ERR(g)) { rule = ERR_CAST(g); - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); return rule; } nested_down_write_ref_node(&g->node, FS_LOCK_PARENT); - up_write_ref_node(&ft->node); + up_write_ref_node(&ft->node, false); err = create_auto_flow_group(ft, g); if (err) @@ -1807,17 +1809,17 @@ search_again_locked: } nested_down_write_ref_node(&fte->node, FS_LOCK_CHILD); - up_write_ref_node(&g->node); + up_write_ref_node(&g->node, false); rule = add_rule_fg(g, spec->match_value, flow_act, dest, dest_num, fte); - up_write_ref_node(&fte->node); - tree_put_node(&fte->node); - tree_put_node(&g->node); + up_write_ref_node(&fte->node, false); + tree_put_node(&fte->node, false); + tree_put_node(&g->node, false); return rule; err_release_fg: - up_write_ref_node(&g->node); - tree_put_node(&g->node); + up_write_ref_node(&g->node, false); + tree_put_node(&g->node, false); return ERR_PTR(err); } @@ -1883,7 +1885,7 @@ void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) int i; for (i = handle->num_rules - 1; i >= 0; i--) - tree_remove_node(&handle->rule[i]->node); + tree_remove_node(&handle->rule[i]->node, false); kfree(handle); } EXPORT_SYMBOL(mlx5_del_flow_rules); @@ -1986,7 +1988,7 @@ int mlx5_destroy_flow_table(struct mlx5_flow_table *ft) mutex_unlock(&root->chain_lock); return err; } - if (tree_remove_node(&ft->node)) + if (tree_remove_node(&ft->node, false)) mlx5_core_warn(get_dev(&ft->node), "Flow table %d wasn't destroyed, refcount > 1\n", ft->id); mutex_unlock(&root->chain_lock); @@ -1997,7 +1999,7 @@ EXPORT_SYMBOL(mlx5_destroy_flow_table); void mlx5_destroy_flow_group(struct mlx5_flow_group *fg) { - if (tree_remove_node(&fg->node)) + if (tree_remove_node(&fg->node, false)) mlx5_core_warn(get_dev(&fg->node), "Flow group %d wasn't destroyed, refcount > 1\n", fg->id); } @@ -2381,8 +2383,8 @@ static void clean_tree(struct fs_node *node) tree_get_node(node); list_for_each_entry_safe(iter, temp, &node->children, list) clean_tree(iter); - tree_put_node(node); - tree_remove_node(node); + tree_put_node(node, false); + tree_remove_node(node, false); } } -- cgit v1.2.3 From 718ce4d601dbf73b5dbe024a88c9e34168fe87f2 Mon Sep 17 00:00:00 2001 From: Eli Britstein Date: Tue, 8 Jan 2019 12:15:37 +0200 Subject: net/mlx5: Consolidate update FTE for all removal changes With commit a18e879d4e45 ("net/mlx5e: Annul encap action ordering requirement") and a use-case of e-switch remote mirroring, the incremental/stepped FTE removal process done by the fs core got us to illegal transient states and FW errors: SET_FLOW_TABLE_ENTRY(0x936) op_mod(0x0) failed, status bad parameter(0x3), syndrome (0x9c2e40) To avoid that and improve FTE removal performance, aggregate the FTE's updates that should be applied. Remove the FTE if it is empty, or apply one FW update command with the aggregated updates. Fixes: a18e879d4e45 ("net/mlx5e: Annul encap action ordering requirement") Signed-off-by: Eli Britstein Reviewed-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 31 ++++++++++++++++++----- 1 file changed, 25 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index 42a6952ff9d0..0be3eb86dd84 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -430,9 +430,6 @@ static void modify_fte(struct fs_fte *fte) struct mlx5_core_dev *dev; int err; - if (!fte->modify_mask) - return; - fs_get_obj(fg, fte->node.parent); fs_get_obj(ft, fg->node.parent); dev = get_dev(&fte->node); @@ -475,7 +472,6 @@ static void del_sw_hw_rule(struct fs_node *node) BIT(MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST); } out: - modify_fte(fte); kfree(rule); } @@ -602,7 +598,7 @@ static struct fs_fte *alloc_fte(struct mlx5_flow_table *ft, fte->node.type = FS_TYPE_FLOW_ENTRY; fte->action = *flow_act; - tree_init_node(&fte->node, del_hw_fte, del_sw_fte); + tree_init_node(&fte->node, NULL, del_sw_fte); return fte; } @@ -1882,10 +1878,33 @@ EXPORT_SYMBOL(mlx5_add_flow_rules); void mlx5_del_flow_rules(struct mlx5_flow_handle *handle) { + struct fs_fte *fte; int i; + /* In order to consolidate the HW changes we lock the FTE for other + * changes, and increase its refcount, in order not to perform the + * "del" functions of the FTE. Will handle them here. + * The removal of the rules is done under locked FTE. + * After removing all the handle's rules, if there are remaining + * rules, it means we just need to modify the FTE in FW, and + * unlock/decrease the refcount we increased before. + * Otherwise, it means the FTE should be deleted. First delete the + * FTE in FW. Then, unlock the FTE, and proceed the tree_put_node of + * the FTE, which will handle the last decrease of the refcount, as + * well as required handling of its parent. + */ + fs_get_obj(fte, handle->rule[0]->node.parent); + down_write_ref_node(&fte->node, false); for (i = handle->num_rules - 1; i >= 0; i--) - tree_remove_node(&handle->rule[i]->node, false); + tree_remove_node(&handle->rule[i]->node, true); + if (fte->modify_mask && fte->dests_size) { + modify_fte(fte); + up_write_ref_node(&fte->node, false); + } else { + del_hw_fte(&fte->node); + up_write(&fte->node.lock); + tree_put_node(&fte->node, false); + } kfree(handle); } EXPORT_SYMBOL(mlx5_del_flow_rules); -- cgit v1.2.3 From 5b33eba99fbcf467b913d444dd65313c27623b1a Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 26 Feb 2019 16:18:19 +0200 Subject: net/mlx5e: Properly get the PF number phys port name ndo Currently, we fail to retrieve the PF number in some cases (e.g single ported cards, lag capability), this further results in a call trace issued by the rtnetlink code, since the error value is not -EOPNOTSUPP. Change the implementation to be independent from the lag code and function properly on both two ports and single ported cards. Call Trace: [ 194.525057] mlx5_core 0000:82:00.0: mlx5_lag_get_pf_num:605:(pid 837): no lag device, can't get pf num [ 194.525804] WARNING: CPU: 7 PID: 837 at net/core/rtnetlink.c:3457 rtmsg_ifinfo_build_skb+0x131/0x160 [ 194.529952] CPU: 7 PID: 837 Comm: kworker/7:3 Tainted: G W O 5.0.0-rc7+ #3 [ 194.531307] Workqueue: events linkwatch_event [ 194.531697] RIP: 0010:rtmsg_ifinfo_build_skb+0x131/0x160 [ 194.545007] Call Trace: [ 194.545406] rtmsg_ifinfo_event.part.29+0x1b/0xb0 [ 194.545810] rtmsg_ifinfo+0x51/0x80 [ 194.546209] netdev_state_change+0xc7/0x110 [ 194.546608] ? dev_valid_name+0x1b0/0x1b0 [ 194.547010] ? __local_bh_enable_ip+0xef/0x1d0 [ 194.547411] ? lockdep_hardirqs_on+0x3ea/0x560 [ 194.547811] ? linkwatch_do_dev+0x9b/0x100 [ 194.548207] linkwatch_do_dev+0x9b/0x100 [ 194.548605] __linkwatch_run_queue+0x244/0x430 [ 194.549014] ? linkwatch_schedule_work+0x100/0x100 [ 194.549412] ? lock_acquire+0x10f/0x2d0 [ 194.549816] linkwatch_event+0x3f/0x50 [ 194.550212] process_one_work+0x7d3/0x1460 Fixes: c12ecc230564 ("net/mlx5e: Move to use common phys port names for vport representors") Signed-off-by: Roi Dayan Acked-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c index a1a3e2774989..a66b6ed80b30 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c @@ -1129,16 +1129,17 @@ static int mlx5e_rep_get_phys_port_name(struct net_device *dev, struct mlx5e_priv *priv = netdev_priv(dev); struct mlx5e_rep_priv *rpriv = priv->ppriv; struct mlx5_eswitch_rep *rep = rpriv->rep; - int ret, pf_num; + unsigned int fn; + int ret; - ret = mlx5_lag_get_pf_num(priv->mdev, &pf_num); - if (ret) - return ret; + fn = PCI_FUNC(priv->mdev->pdev->devfn); + if (fn >= MLX5_MAX_PORTS) + return -EOPNOTSUPP; if (rep->vport == MLX5_VPORT_UPLINK) - ret = snprintf(buf, len, "p%d", pf_num); + ret = snprintf(buf, len, "p%d", fn); else - ret = snprintf(buf, len, "pf%dvf%d", pf_num, rep->vport - 1); + ret = snprintf(buf, len, "pf%dvf%d", fn, rep->vport - 1); if (ret >= len) return -EOPNOTSUPP; -- cgit v1.2.3 From 6ffb6303426cee1c312bdc1117724f10b3078906 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Tue, 26 Feb 2019 17:02:42 +0200 Subject: net/mlx5: Remove redundant lag function to get pf num The function is not being used. Signed-off-by: Roi Dayan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/lag.c | 21 --------------------- drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h | 2 -- 2 files changed, 23 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c index 48aa6e030bcf..959605559858 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c @@ -595,27 +595,6 @@ void mlx5_lag_add(struct mlx5_core_dev *dev, struct net_device *netdev) err); } -int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num) -{ - struct mlx5_lag *ldev; - int n; - - ldev = mlx5_lag_dev_get(dev); - if (!ldev) { - mlx5_core_warn(dev, "no lag device, can't get pf num\n"); - return -EINVAL; - } - - for (n = 0; n < MLX5_MAX_PORTS; n++) - if (ldev->pf[n].dev == dev) { - *pf_num = n; - return 0; - } - - mlx5_core_warn(dev, "wasn't able to locate pf in the lag device\n"); - return -EINVAL; -} - /* Must be called with intf_mutex held */ void mlx5_lag_remove(struct mlx5_core_dev *dev) { diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h index 9529cf9623e3..7b331674622c 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h @@ -188,8 +188,6 @@ static inline int mlx5_lag_is_lacp_owner(struct mlx5_core_dev *dev) MLX5_CAP_GEN(dev, lag_master); } -int mlx5_lag_get_pf_num(struct mlx5_core_dev *dev, int *pf_num); - void mlx5_reload_interface(struct mlx5_core_dev *mdev, int protocol); void mlx5_lag_update(struct mlx5_core_dev *dev); -- cgit v1.2.3 From 3d6f3cdf9bfe92c430674308db0f1c8655f2c11d Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Mon, 14 Jan 2019 17:34:28 +0200 Subject: net/mlx5e: IPoIB, Fix RX checksum statistics update Update the RX checksum only if the feature is enabled. Fixes: 9d6bd752c63c ("net/mlx5e: IPoIB, RX handler") Signed-off-by: Feras Daoud Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index be396e5e4e39..3dde5c7e0739 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -1295,8 +1295,14 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->protocol = *((__be16 *)(skb->data)); - skb->ip_summed = CHECKSUM_COMPLETE; - skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + if (netdev->features & NETIF_F_RXCSUM) { + skb->ip_summed = CHECKSUM_COMPLETE; + skb->csum = csum_unfold((__force __sum16)cqe->check_sum); + stats->csum_complete++; + } else { + skb->ip_summed = CHECKSUM_NONE; + stats->csum_none++; + } if (unlikely(mlx5e_rx_hw_stamp(tstamp))) skb_hwtstamps(skb)->hwtstamp = @@ -1315,7 +1321,6 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq, skb->dev = netdev; - stats->csum_complete++; stats->packets++; stats->bytes += cqe_bcnt; } -- cgit v1.2.3 From c475e11e82d16133304321bae285c5c1d4cfc856 Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Tue, 5 Mar 2019 16:45:09 +0200 Subject: net/mlx5e: Fix access to non-existing receive queue In case number of channels is changed while interface is down, RSS indirection table is mistakenly not modified accordingly, causing access to out-of-range non-existing object. Fix by updating the RSS indireciton table also in the early return flow of interface down. Fixes: fb35c534b788 ("net/mlx5e: Fix NULL pointer derefernce in set channels error flow") Fixes: bbeb53b8b2c9 ("net/mlx5e: Move RSS params to a dedicated struct") Reported-by: Or Gerlitz Tested-by: Maria Pasechnik Signed-off-by: Tariq Toukan Reviewed-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 0804b478ad19..a0987cc5fe4a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -424,6 +424,9 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv, if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) { priv->channels.params = new_channels.params; + if (!netif_is_rxfh_configured(priv->netdev)) + mlx5e_build_default_indir_rqt(priv->rss_params.indirection_rqt, + MLX5E_INDIR_RQT_SIZE, count); goto out; } -- cgit v1.2.3 From 6e77c413e8e73d0f36b5358b601389d75ec4451c Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 4 Mar 2019 00:27:15 -0800 Subject: net/mlx5: Avoid panic when setting vport mac, getting vport config If we try to set VFs mac address on a VF (not PF) net device, the kernel will be crash. The commands are show as below: $ echo 2 > /sys/class/net/$MLX_PF0/device/sriov_numvfs $ ip link set $MLX_VF0 vf 0 mac 00:11:22:33:44:00 [exception RIP: mlx5_eswitch_set_vport_mac+41] [ffffb8b7079e3688] do_setlink at ffffffff8f67f85b [ffffb8b7079e37a8] __rtnl_newlink at ffffffff8f683778 [ffffb8b7079e3b68] rtnl_newlink at ffffffff8f683a63 [ffffb8b7079e3b90] rtnetlink_rcv_msg at ffffffff8f67d812 [ffffb8b7079e3c10] netlink_rcv_skb at ffffffff8f6b88ab [ffffb8b7079e3c60] netlink_unicast at ffffffff8f6b808f [ffffb8b7079e3ca0] netlink_sendmsg at ffffffff8f6b8412 [ffffb8b7079e3d18] sock_sendmsg at ffffffff8f6452f6 [ffffb8b7079e3d30] ___sys_sendmsg at ffffffff8f645860 [ffffb8b7079e3eb0] __sys_sendmsg at ffffffff8f647a38 [ffffb8b7079e3f38] do_syscall_64 at ffffffff8f00401b [ffffb8b7079e3f50] entry_SYSCALL_64_after_hwframe at ffffffff8f80008c and [exception RIP: mlx5_eswitch_get_vport_config+12] [ffffa70607e57678] mlx5e_get_vf_config at ffffffffc03c7f8f [mlx5_core] [ffffa70607e57688] do_setlink at ffffffffbc67fa59 [ffffa70607e577a8] __rtnl_newlink at ffffffffbc683778 [ffffa70607e57b68] rtnl_newlink at ffffffffbc683a63 [ffffa70607e57b90] rtnetlink_rcv_msg at ffffffffbc67d812 [ffffa70607e57c10] netlink_rcv_skb at ffffffffbc6b88ab [ffffa70607e57c60] netlink_unicast at ffffffffbc6b808f [ffffa70607e57ca0] netlink_sendmsg at ffffffffbc6b8412 [ffffa70607e57d18] sock_sendmsg at ffffffffbc6452f6 [ffffa70607e57d30] ___sys_sendmsg at ffffffffbc645860 [ffffa70607e57eb0] __sys_sendmsg at ffffffffbc647a38 [ffffa70607e57f38] do_syscall_64 at ffffffffbc00401b [ffffa70607e57f50] entry_SYSCALL_64_after_hwframe at ffffffffbc80008c Fixes: a8d70a054a718 ("net/mlx5: E-Switch, Disallow vlan/spoofcheck setup if not being esw manager") Cc: Eli Cohen Signed-off-by: Tonghao Zhang Reviewed-by: Roi Dayan Acked-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index d0b28251abf2..e582a42005f1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -1931,7 +1931,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw, u64 node_guid; int err = 0; - if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) return -EPERM; if (!LEGAL_VPORT(esw, vport) || is_multicast_ether_addr(mac)) return -EINVAL; @@ -2005,7 +2005,7 @@ int mlx5_eswitch_get_vport_config(struct mlx5_eswitch *esw, { struct mlx5_vport *evport; - if (!MLX5_CAP_GEN(esw->dev, vport_group_manager)) + if (!esw || !MLX5_CAP_GEN(esw->dev, vport_group_manager)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; -- cgit v1.2.3 From 24319258660a84dd77f4be026a55b10a12524919 Mon Sep 17 00:00:00 2001 From: Tonghao Zhang Date: Mon, 4 Mar 2019 00:27:16 -0800 Subject: net/mlx5: Avoid panic when setting vport rate If we try to set VFs rate on a VF (not PF) net device, the kernel will be crash. The commands are show as below: $ echo 2 > /sys/class/net/$MLX_PF0/device/sriov_numvfs $ ip link set $MLX_VF0 vf 0 max_tx_rate 2 min_tx_rate 1 If not applied the first patch ("net/mlx5: Avoid panic when setting vport mac, getting vport config"), the command: $ ip link set $MLX_VF0 vf 0 rate 100 can also crash the kernel. [ 1650.006388] RIP: 0010:mlx5_eswitch_set_vport_rate+0x1f/0x260 [mlx5_core] [ 1650.007092] do_setlink+0x982/0xd20 [ 1650.007129] __rtnl_newlink+0x528/0x7d0 [ 1650.007374] rtnl_newlink+0x43/0x60 [ 1650.007407] rtnetlink_rcv_msg+0x2a2/0x320 [ 1650.007484] netlink_rcv_skb+0xcb/0x100 [ 1650.007519] netlink_unicast+0x17f/0x230 [ 1650.007554] netlink_sendmsg+0x2d2/0x3d0 [ 1650.007592] sock_sendmsg+0x36/0x50 [ 1650.007625] ___sys_sendmsg+0x280/0x2a0 [ 1650.007963] __sys_sendmsg+0x58/0xa0 [ 1650.007998] do_syscall_64+0x5b/0x180 [ 1650.009438] entry_SYSCALL_64_after_hwframe+0x44/0xa9 Fixes: c9497c98901c ("net/mlx5: Add support for setting VF min rate") Cc: Mohamad Haj Yahia Signed-off-by: Tonghao Zhang Reviewed-by: Roi Dayan Acked-by: Saeed Mahameed Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/eswitch.c | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c index e582a42005f1..ecd2c747f726 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c @@ -2297,19 +2297,24 @@ static int normalize_vports_min_rate(struct mlx5_eswitch *esw, u32 divider) int mlx5_eswitch_set_vport_rate(struct mlx5_eswitch *esw, int vport, u32 max_rate, u32 min_rate) { - u32 fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); - bool min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && - fw_max_bw_share >= MLX5_MIN_BW_SHARE; - bool max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); struct mlx5_vport *evport; + u32 fw_max_bw_share; u32 previous_min_rate; u32 divider; + bool min_rate_supported; + bool max_rate_supported; int err = 0; if (!ESW_ALLOWED(esw)) return -EPERM; if (!LEGAL_VPORT(esw, vport)) return -EINVAL; + + fw_max_bw_share = MLX5_CAP_QOS(esw->dev, max_tsar_bw_share); + min_rate_supported = MLX5_CAP_QOS(esw->dev, esw_bw_share) && + fw_max_bw_share >= MLX5_MIN_BW_SHARE; + max_rate_supported = MLX5_CAP_QOS(esw->dev, esw_rate_limit); + if ((min_rate && !min_rate_supported) || (max_rate && !max_rate_supported)) return -EOPNOTSUPP; -- cgit v1.2.3 From fe543b2f174f34a7a751aa08b334fe6b105c4569 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Mon, 11 Mar 2019 00:02:32 -0500 Subject: net: liquidio: fix a NULL pointer dereference In case octeon_alloc_soft_command fails, the fix reports the error and returns to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_main.c b/drivers/net/ethernet/cavium/liquidio/lio_main.c index 9b7819fdc9de..fb6f813cff65 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_main.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_main.c @@ -1192,6 +1192,11 @@ static void send_rx_ctrl_cmd(struct lio *lio, int start_stop) sc = (struct octeon_soft_command *) octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, 0); + if (!sc) { + netif_info(lio, rx_err, lio->netdev, + "Failed to allocate octeon_soft_command\n"); + return; + } ncmd = (union octnet_cmd *)sc->virtdptr; -- cgit v1.2.3 From b8b27498659c65034032af79842913844a6cc79a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 7 Mar 2019 23:20:11 +0100 Subject: netfilter: nf_tables: return immediately on empty commit When running 'nft flush ruleset' while no rules exist, we will increment the generation counter and announce a new genid to userspace, yet nothing had changed in the first place. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nf_tables_api.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c index 9d8f51dfc593..513f93118604 100644 --- a/net/netfilter/nf_tables_api.c +++ b/net/netfilter/nf_tables_api.c @@ -6564,6 +6564,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb) struct nft_chain *chain; struct nft_table *table; + if (list_empty(&net->nft.commit_list)) { + mutex_unlock(&net->nft.commit_mutex); + return 0; + } + /* 0. Validate ruleset, otherwise roll back for error reporting. */ if (nf_tables_validate(net) < 0) return -EAGAIN; -- cgit v1.2.3 From dd9d9f5907bb475f8b1796c47d4ecc7fb9b72136 Mon Sep 17 00:00:00 2001 From: Bryan Whitehead Date: Mon, 11 Mar 2019 13:39:39 -0400 Subject: lan743x: Fix RX Kernel Panic It has been noticed that running the speed test at www.speedtest.net occasionally causes a kernel panic. Investigation revealed that under this test RX buffer allocation sometimes fails and returns NULL. But the lan743x driver did not handle this case. This patch fixes this issue by attempting to allocate a buffer before sending the new rx packet to the OS. If the allocation fails then the new rx packet is dropped and the existing buffer is reused in the DMA ring. Updates for v2: Additional 2 locations where allocation was not checked, has been changed to reuse existing buffer. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Bryan Whitehead Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 46 +++++++++++++++++++-------- 1 file changed, 32 insertions(+), 14 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 4d1b4a24907f..2876bab642e2 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -1902,7 +1902,17 @@ static int lan743x_rx_next_index(struct lan743x_rx *rx, int index) return ((++index) % rx->ring_size); } -static int lan743x_rx_allocate_ring_element(struct lan743x_rx *rx, int index) +static struct sk_buff *lan743x_rx_allocate_skb(struct lan743x_rx *rx) +{ + int length = 0; + + length = (LAN743X_MAX_FRAME_SIZE + ETH_HLEN + 4 + RX_HEAD_PADDING); + return __netdev_alloc_skb(rx->adapter->netdev, + length, GFP_ATOMIC | GFP_DMA); +} + +static int lan743x_rx_init_ring_element(struct lan743x_rx *rx, int index, + struct sk_buff *skb) { struct lan743x_rx_buffer_info *buffer_info; struct lan743x_rx_descriptor *descriptor; @@ -1911,9 +1921,7 @@ static int lan743x_rx_allocate_ring_element(struct lan743x_rx *rx, int index) length = (LAN743X_MAX_FRAME_SIZE + ETH_HLEN + 4 + RX_HEAD_PADDING); descriptor = &rx->ring_cpu_ptr[index]; buffer_info = &rx->buffer_info[index]; - buffer_info->skb = __netdev_alloc_skb(rx->adapter->netdev, - length, - GFP_ATOMIC | GFP_DMA); + buffer_info->skb = skb; if (!(buffer_info->skb)) return -ENOMEM; buffer_info->dma_ptr = dma_map_single(&rx->adapter->pdev->dev, @@ -2060,8 +2068,19 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) /* packet is available */ if (first_index == last_index) { /* single buffer packet */ + struct sk_buff *new_skb = NULL; int packet_length; + new_skb = lan743x_rx_allocate_skb(rx); + if (!new_skb) { + /* failed to allocate next skb. + * Memory is very low. + * Drop this packet and reuse buffer. + */ + lan743x_rx_reuse_ring_element(rx, first_index); + goto process_extension; + } + buffer_info = &rx->buffer_info[first_index]; skb = buffer_info->skb; descriptor = &rx->ring_cpu_ptr[first_index]; @@ -2081,7 +2100,7 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) skb_put(skb, packet_length - 4); skb->protocol = eth_type_trans(skb, rx->adapter->netdev); - lan743x_rx_allocate_ring_element(rx, first_index); + lan743x_rx_init_ring_element(rx, first_index, new_skb); } else { int index = first_index; @@ -2094,26 +2113,23 @@ static int lan743x_rx_process_packet(struct lan743x_rx *rx) if (first_index <= last_index) { while ((index >= first_index) && (index <= last_index)) { - lan743x_rx_release_ring_element(rx, - index); - lan743x_rx_allocate_ring_element(rx, - index); + lan743x_rx_reuse_ring_element(rx, + index); index = lan743x_rx_next_index(rx, index); } } else { while ((index >= first_index) || (index <= last_index)) { - lan743x_rx_release_ring_element(rx, - index); - lan743x_rx_allocate_ring_element(rx, - index); + lan743x_rx_reuse_ring_element(rx, + index); index = lan743x_rx_next_index(rx, index); } } } +process_extension: if (extension_index >= 0) { descriptor = &rx->ring_cpu_ptr[extension_index]; buffer_info = &rx->buffer_info[extension_index]; @@ -2290,7 +2306,9 @@ static int lan743x_rx_ring_init(struct lan743x_rx *rx) rx->last_head = 0; for (index = 0; index < rx->ring_size; index++) { - ret = lan743x_rx_allocate_ring_element(rx, index); + struct sk_buff *new_skb = lan743x_rx_allocate_skb(rx); + + ret = lan743x_rx_init_ring_element(rx, index, new_skb); if (ret) goto cleanup; } -- cgit v1.2.3 From 41af8b3a097c6fd17a4867efa25966927094f57c Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Mon, 11 Mar 2019 01:01:41 -0500 Subject: net: lio_core: fix two NULL pointer dereferences In case octeon_alloc_soft_command fails, the fix reports the error and returns to avoid NULL pointer dereferences. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/liquidio/lio_core.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c index e21bf3724611..1c50c10b5a16 100644 --- a/drivers/net/ethernet/cavium/liquidio/lio_core.c +++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c @@ -1211,6 +1211,11 @@ int liquidio_change_mtu(struct net_device *netdev, int new_mtu) sc = (struct octeon_soft_command *) octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, 16, 0); + if (!sc) { + netif_info(lio, rx_err, lio->netdev, + "Failed to allocate soft command\n"); + return -ENOMEM; + } ncmd = (union octnet_cmd *)sc->virtdptr; @@ -1684,6 +1689,11 @@ int liquidio_set_fec(struct lio *lio, int on_off) sc = octeon_alloc_soft_command(oct, OCTNET_CMD_SIZE, sizeof(struct oct_nic_seapi_resp), 0); + if (!sc) { + dev_err(&oct->pci_dev->dev, + "Failed to allocate soft command\n"); + return -ENOMEM; + } ncmd = sc->virtdptr; resp = sc->virtrptr; -- cgit v1.2.3 From 0b31d98d90f09868dce71319615e19cd1f146fb6 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Mon, 11 Mar 2019 01:05:41 -0500 Subject: net: thunder: fix a potential NULL pointer dereference In case alloc_ordered_workqueue fails, the fix reports the error and returns -ENOMEM. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/cavium/thunder/nicvf_main.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c index 503cfadff4ac..aa2be4807191 100644 --- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c +++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c @@ -2234,6 +2234,12 @@ static int nicvf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) nic->nicvf_rx_mode_wq = alloc_ordered_workqueue("nicvf_rx_mode_wq_VF%d", WQ_MEM_RECLAIM, nic->vf_id); + if (!nic->nicvf_rx_mode_wq) { + err = -ENOMEM; + dev_err(dev, "Failed to allocate work queue\n"); + goto err_unregister_interrupts; + } + INIT_WORK(&nic->rx_mode_work.work, nicvf_set_rx_mode_task); spin_lock_init(&nic->rx_mode_wq_lock); mutex_init(&nic->rx_mode_mtx); -- cgit v1.2.3 From f2feaefdabb0a6253aa020f65e7388f07a9ed47c Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 11 Mar 2019 11:41:05 -0700 Subject: tcp: Don't access TCP_SKB_CB before initializing it Since commit eeea10b83a13 ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()"), tcp_vX_fill_cb is only called after tcp_filter(). That means, TCP_SKB_CB(skb)->end_seq still points to the IP-part of the cb. We thus should not mock with it, as this can trigger bugs (thanks syzkaller): [ 12.349396] ================================================================== [ 12.350188] BUG: KASAN: slab-out-of-bounds in ip6_datagram_recv_specific_ctl+0x19b3/0x1a20 [ 12.351035] Read of size 1 at addr ffff88006adbc208 by task test_ip6_datagr/1799 Setting end_seq is actually no more necessary in tcp_filter as it gets initialized later on in tcp_vX_fill_cb. Cc: Eric Dumazet Fixes: eeea10b83a13 ("tcp: add tcp_v4_fill_cb()/tcp_v4_restore_cb()") Signed-off-by: Christoph Paasch Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- net/ipv4/tcp_ipv4.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 831d844a27ca..277d71239d75 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -1734,15 +1734,8 @@ EXPORT_SYMBOL(tcp_add_backlog); int tcp_filter(struct sock *sk, struct sk_buff *skb) { struct tcphdr *th = (struct tcphdr *)skb->data; - unsigned int eaten = skb->len; - int err; - err = sk_filter_trim_cap(sk, skb, th->doff * 4); - if (!err) { - eaten -= skb->len; - TCP_SKB_CB(skb)->end_seq -= eaten; - } - return err; + return sk_filter_trim_cap(sk, skb, th->doff * 4); } EXPORT_SYMBOL(tcp_filter); -- cgit v1.2.3 From ee74d0bd4325efb41e38affe5955f920ed973f23 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 11 Mar 2019 13:48:44 -0700 Subject: net/x25: reset state in x25_connect() In case x25_connect() fails and frees the socket neighbour, we also need to undo the change done to x25->state. Before my last bug fix, we had use-after-free so this patch fixes a latent bug. syzbot report : kasan: CONFIG_KASAN_INLINE enabled kasan: GPF could be caused by NULL-ptr deref or user memory access general protection fault: 0000 [#1] PREEMPT SMP KASAN CPU: 1 PID: 16137 Comm: syz-executor.1 Not tainted 5.0.0+ #117 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 RIP: 0010:x25_write_internal+0x1e8/0xdf0 net/x25/x25_subr.c:173 Code: 00 40 88 b5 e0 fe ff ff 0f 85 01 0b 00 00 48 8b 8b 80 04 00 00 48 ba 00 00 00 00 00 fc ff df 48 8d 79 1c 48 89 fe 48 c1 ee 03 <0f> b6 34 16 48 89 fa 83 e2 07 83 c2 03 40 38 f2 7c 09 40 84 f6 0f RSP: 0018:ffff888076717a08 EFLAGS: 00010207 RAX: ffff88805f2f2292 RBX: ffff8880a0ae6000 RCX: 0000000000000000 kobject: 'loop5' (0000000018d0d0ee): kobject_uevent_env RDX: dffffc0000000000 RSI: 0000000000000003 RDI: 000000000000001c RBP: ffff888076717b40 R08: ffff8880950e0580 R09: ffffed100be5e46d R10: ffffed100be5e46c R11: ffff88805f2f2363 R12: ffff888065579840 kobject: 'loop5' (0000000018d0d0ee): fill_kobj_path: path = '/devices/virtual/block/loop5' R13: 1ffff1100ece2f47 R14: 0000000000000013 R15: 0000000000000013 FS: 00007fb88cf43700(0000) GS:ffff8880ae900000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 00007f9a42a41028 CR3: 0000000087a67000 CR4: 00000000001406e0 DR0: 0000000000000000 DR1: 0000000000000000 DR2: 0000000000000000 DR3: 0000000000000000 DR6: 00000000fffe0ff0 DR7: 0000000000000400 Call Trace: x25_release+0xd0/0x340 net/x25/af_x25.c:658 __sock_release+0xd3/0x2b0 net/socket.c:579 sock_close+0x1b/0x30 net/socket.c:1162 __fput+0x2df/0x8d0 fs/file_table.c:278 ____fput+0x16/0x20 fs/file_table.c:309 task_work_run+0x14a/0x1c0 kernel/task_work.c:113 get_signal+0x1961/0x1d50 kernel/signal.c:2388 do_signal+0x87/0x1940 arch/x86/kernel/signal.c:816 exit_to_usermode_loop+0x244/0x2c0 arch/x86/entry/common.c:162 prepare_exit_to_usermode arch/x86/entry/common.c:197 [inline] syscall_return_slowpath arch/x86/entry/common.c:268 [inline] do_syscall_64+0x52d/0x610 arch/x86/entry/common.c:293 entry_SYSCALL_64_after_hwframe+0x49/0xbe RIP: 0033:0x457f29 Code: ad b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 00 66 90 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 7b b8 fb ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007fb88cf42c78 EFLAGS: 00000246 ORIG_RAX: 000000000000002a RAX: fffffffffffffe00 RBX: 0000000000000003 RCX: 0000000000457f29 RDX: 0000000000000012 RSI: 0000000020000080 RDI: 0000000000000004 RBP: 000000000073bf00 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00007fb88cf436d4 R13: 00000000004be462 R14: 00000000004cec98 R15: 00000000ffffffff Modules linked in: Fixes: 95d6ebd53c79 ("net/x25: fix use-after-free in x25_device_event()") Signed-off-by: Eric Dumazet Cc: andrew hendry Reported-by: syzbot Signed-off-by: David S. Miller --- net/x25/af_x25.c | 1 + 1 file changed, 1 insertion(+) diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 27171ac6fe3b..20a511398389 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -825,6 +825,7 @@ out_put_neigh: x25_neigh_put(x25->neighbour); x25->neighbour = NULL; read_unlock_bh(&x25_list_lock); + x25->state = X25_STATE_0; } out_put_route: x25_route_put(rt); -- cgit v1.2.3 From 4280b73092fe136d412db9e8e0d69cbc753cdac6 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Mon, 11 Mar 2019 01:53:15 -0500 Subject: net: qlge: fix a potential NULL pointer dereference In case alloc_ordered_workqueue fails, the fix returns -ENOMEM to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qlge/qlge_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qlge/qlge_main.c b/drivers/net/ethernet/qlogic/qlge/qlge_main.c index 096515c27263..07e1c623048e 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge_main.c +++ b/drivers/net/ethernet/qlogic/qlge/qlge_main.c @@ -4681,6 +4681,11 @@ static int ql_init_device(struct pci_dev *pdev, struct net_device *ndev, */ qdev->workqueue = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, ndev->name); + if (!qdev->workqueue) { + err = -ENOMEM; + goto err_out2; + } + INIT_DELAYED_WORK(&qdev->asic_reset_work, ql_asic_reset_work); INIT_DELAYED_WORK(&qdev->mpi_reset_work, ql_mpi_reset_work); INIT_DELAYED_WORK(&qdev->mpi_work, ql_mpi_work); -- cgit v1.2.3 From 5c149314d91876f743ee43efd75b6287ec55480e Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Mon, 11 Mar 2019 02:06:06 -0500 Subject: net: rocker: fix a potential NULL pointer dereference In case kzalloc fails, the fix releases resources and returns NOTIFY_BAD to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/rocker/rocker_main.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/rocker/rocker_main.c b/drivers/net/ethernet/rocker/rocker_main.c index c883aa89b7ca..a71c900ca04f 100644 --- a/drivers/net/ethernet/rocker/rocker_main.c +++ b/drivers/net/ethernet/rocker/rocker_main.c @@ -2805,6 +2805,11 @@ static int rocker_switchdev_event(struct notifier_block *unused, memcpy(&switchdev_work->fdb_info, ptr, sizeof(switchdev_work->fdb_info)); switchdev_work->fdb_info.addr = kzalloc(ETH_ALEN, GFP_ATOMIC); + if (unlikely(!switchdev_work->fdb_info.addr)) { + kfree(switchdev_work); + return NOTIFY_BAD; + } + ether_addr_copy((u8 *)switchdev_work->fdb_info.addr, fdb_info->addr); /* Take a reference on the rocker device */ -- cgit v1.2.3 From c6873d18cb4a5be9623d468c626b5650451ba44a Mon Sep 17 00:00:00 2001 From: xiaofeis Date: Mon, 11 Mar 2019 14:11:40 +0800 Subject: Documentation: devicetree: add a new optional property for port mac address Add documentation for a new optional property local-mac-address which is described in ethernet.txt. Signed-off-by: xiaofeis Signed-off-by: David S. Miller --- Documentation/devicetree/bindings/net/dsa/dsa.txt | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/Documentation/devicetree/bindings/net/dsa/dsa.txt b/Documentation/devicetree/bindings/net/dsa/dsa.txt index 35694c0c376b..d66a5292b9d3 100644 --- a/Documentation/devicetree/bindings/net/dsa/dsa.txt +++ b/Documentation/devicetree/bindings/net/dsa/dsa.txt @@ -71,6 +71,10 @@ properties, described in binding documents: Documentation/devicetree/bindings/net/fixed-link.txt for details. +- local-mac-address : See + Documentation/devicetree/bindings/net/ethernet.txt + for details. + Example The following example shows three switches on three MDIO busses, @@ -97,6 +101,7 @@ linked into one DSA cluster. port@1 { reg = <1>; label = "lan1"; + local-mac-address = [00 00 00 00 00 00]; }; port@2 { -- cgit v1.2.3 From 10010493c1261ca3d7444d372f281009890a5ff7 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 12 Mar 2019 00:54:55 -0500 Subject: isdn: hfcpci: fix potential NULL pointer dereference In case ioremap fails, the fix releases resources and returns. The following printk is for logging purpose and thus is preserved. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- drivers/isdn/hardware/mISDN/hfcpci.c | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/drivers/isdn/hardware/mISDN/hfcpci.c b/drivers/isdn/hardware/mISDN/hfcpci.c index ebb3fa2e1d00..362aa5450a5e 100644 --- a/drivers/isdn/hardware/mISDN/hfcpci.c +++ b/drivers/isdn/hardware/mISDN/hfcpci.c @@ -2032,10 +2032,19 @@ setup_hw(struct hfc_pci *hc) hc->hw.fifos = buffer; pci_write_config_dword(hc->pdev, 0x80, hc->hw.dmahandle); hc->hw.pci_io = ioremap((ulong) hc->hw.pci_io, 256); + if (unlikely(!hc->hw.pci_io)) { + printk(KERN_WARNING + "HFC-PCI: Error in ioremap for PCI!\n"); + pci_free_consistent(hc->pdev, 0x8000, hc->hw.fifos, + hc->hw.dmahandle); + return 1; + } + printk(KERN_INFO "HFC-PCI: defined at mem %#lx fifo %#lx(%#lx) IRQ %d HZ %d\n", (u_long) hc->hw.pci_io, (u_long) hc->hw.fifos, (u_long) hc->hw.dmahandle, hc->irq, HZ); + /* enable memory mapped ports, disable busmaster */ pci_write_config_word(hc->pdev, PCI_COMMAND, PCI_ENA_MEMIO); hc->hw.int_m2 = 0; -- cgit v1.2.3 From eb32cfcdef2305dc0e44a65d42801315669bb27e Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 12 Mar 2019 02:06:47 -0500 Subject: net: qlogic: fix a potential NULL pointer dereference In case create_singlethread_workqueue fails, the fix returns -ENOMEM to avoid NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qla3xxx.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/drivers/net/ethernet/qlogic/qla3xxx.c b/drivers/net/ethernet/qlogic/qla3xxx.c index 10b075bc5959..b61b88cbc0c7 100644 --- a/drivers/net/ethernet/qlogic/qla3xxx.c +++ b/drivers/net/ethernet/qlogic/qla3xxx.c @@ -3886,6 +3886,12 @@ static int ql3xxx_probe(struct pci_dev *pdev, netif_stop_queue(ndev); qdev->workqueue = create_singlethread_workqueue(ndev->name); + if (!qdev->workqueue) { + unregister_netdev(ndev); + err = -ENOMEM; + goto err_out_iounmap; + } + INIT_DELAYED_WORK(&qdev->reset_work, ql_reset_work); INIT_DELAYED_WORK(&qdev->tx_timeout_work, ql_tx_timeout_work); INIT_DELAYED_WORK(&qdev->link_state_work, ql_link_state_machine_work); -- cgit v1.2.3 From 9f4d6358e11bbc7b839f9419636188e4151fb6e4 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 12 Mar 2019 02:16:21 -0500 Subject: net: fujitsu: fix a potential NULL pointer dereference In case ioremap fails, the fix releases the pcmcia window and returns -ENOMEM to avoid the NULL pointer dereference. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/fujitsu/fmvj18x_cs.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c index a69cd19a55ae..1eca0fdb9933 100644 --- a/drivers/net/ethernet/fujitsu/fmvj18x_cs.c +++ b/drivers/net/ethernet/fujitsu/fmvj18x_cs.c @@ -547,6 +547,11 @@ static int fmvj18x_get_hwinfo(struct pcmcia_device *link, u_char *node_id) return -1; base = ioremap(link->resource[2]->start, resource_size(link->resource[2])); + if (!base) { + pcmcia_release_window(link, link->resource[2]); + return -ENOMEM; + } + pcmcia_map_mem_page(link, link->resource[2], 0); /* -- cgit v1.2.3 From c7cbc3e937b885c9394bf9d0ca21ceb75c2ac262 Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 12 Mar 2019 02:24:07 -0500 Subject: net: 8390: fix potential NULL pointer dereferences In case ioremap fails, the fix releases resources and returns to avoid NULL pointer dereferences. Signed-off-by: Kangjie Lu Signed-off-by: David S. Miller --- drivers/net/ethernet/8390/pcnet_cs.c | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/drivers/net/ethernet/8390/pcnet_cs.c b/drivers/net/ethernet/8390/pcnet_cs.c index 61e43802b9a5..645efac6310d 100644 --- a/drivers/net/ethernet/8390/pcnet_cs.c +++ b/drivers/net/ethernet/8390/pcnet_cs.c @@ -289,6 +289,11 @@ static struct hw_info *get_hwinfo(struct pcmcia_device *link) virt = ioremap(link->resource[2]->start, resource_size(link->resource[2])); + if (unlikely(!virt)) { + pcmcia_release_window(link, link->resource[2]); + return NULL; + } + for (i = 0; i < NR_INFO; i++) { pcmcia_map_mem_page(link, link->resource[2], hw_info[i].offset & ~(resource_size(link->resource[2])-1)); @@ -1423,6 +1428,11 @@ static int setup_shmem_window(struct pcmcia_device *link, int start_pg, /* Try scribbling on the buffer */ info->base = ioremap(link->resource[3]->start, resource_size(link->resource[3])); + if (unlikely(!info->base)) { + ret = -ENOMEM; + goto failed; + } + for (i = 0; i < (TX_PAGES<<8); i += 2) __raw_writew((i>>1), info->base+offset+i); udelay(100); -- cgit v1.2.3 From 035a14e71f27eefa50087963b94cbdb3580d08bf Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 12 Mar 2019 02:43:18 -0500 Subject: net: sh_eth: fix a missing check of of_get_phy_mode of_get_phy_mode may fail and return a negative error code; the fix checks the return value of of_get_phy_mode and returns NULL of it fails. Fixes: b356e978e92f ("sh_eth: add device tree support") Signed-off-by: Kangjie Lu Reviewed-by: Sergei Shtylyov Reviewed-by: Geert Uytterhoeven Tested-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- drivers/net/ethernet/renesas/sh_eth.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c index 339b2eae2100..e33af371b169 100644 --- a/drivers/net/ethernet/renesas/sh_eth.c +++ b/drivers/net/ethernet/renesas/sh_eth.c @@ -3181,12 +3181,16 @@ static struct sh_eth_plat_data *sh_eth_parse_dt(struct device *dev) struct device_node *np = dev->of_node; struct sh_eth_plat_data *pdata; const char *mac_addr; + int ret; pdata = devm_kzalloc(dev, sizeof(*pdata), GFP_KERNEL); if (!pdata) return NULL; - pdata->phy_interface = of_get_phy_mode(np); + ret = of_get_phy_mode(np); + if (ret < 0) + return NULL; + pdata->phy_interface = ret; mac_addr = of_get_mac_address(np); if (mac_addr) -- cgit v1.2.3 From 4ec850e5dfec092b26cf3b7d5a6c9e444ea4babd Mon Sep 17 00:00:00 2001 From: Kangjie Lu Date: Tue, 12 Mar 2019 02:50:40 -0500 Subject: net: dwmac-sun8i: fix a missing check of of_get_phy_mode of_get_phy_mode may fail and return a negative error code; the fix checks the return value of of_get_phy_mode and returns -EINVAL of it fails. Signed-off-by: Kangjie Lu Acked-by: Maxime Ripard Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c index 0f660af01a4b..195669f550f0 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-sun8i.c @@ -1147,7 +1147,10 @@ static int sun8i_dwmac_probe(struct platform_device *pdev) return ret; } - plat_dat->interface = of_get_phy_mode(dev->of_node); + ret = of_get_phy_mode(dev->of_node); + if (ret < 0) + return -EINVAL; + plat_dat->interface = ret; /* platform data specifying hardware features and callbacks. * hardware features were copied from Allwinner drivers. -- cgit v1.2.3 From 6bab45b4de0d70f56ac163fd0b105c0c76451515 Mon Sep 17 00:00:00 2001 From: Vadim Pasternak Date: Tue, 12 Mar 2019 08:40:41 +0000 Subject: mlxsw: core: Prevent duplication during QSFP module initialization Verify during thermal initialization if QSFP module's entry is already configured in order to prevent duplication. Such scenario could happen in case two switch drivers (PCI and I2C based) coexist and if after boot, splitting configuration is applied for some ports and then I2C based driver is re-probed. In such case after reboot same QSFP module, associated with split will be discovered by I2C based driver few times, and it will cause a crash. It could happen for example on system equipped with BMC (Baseboard Management Controller), running I2C based driver, when the next steps are performed: - System boot - Host side configures port spilt. - BMC side is rebooted. Fixes: 6a79507cfe94 ("mlxsw: core: Extend thermal module with per QSFP module thermal zones") Signed-off-by: Vadim Pasternak Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/core_thermal.c | 21 +++++++++++---------- 1 file changed, 11 insertions(+), 10 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c index 0b85c7252f9e..472f63f9fac5 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/core_thermal.c @@ -111,7 +111,6 @@ struct mlxsw_thermal { struct mlxsw_thermal_trip trips[MLXSW_THERMAL_NUM_TRIPS]; enum thermal_device_mode mode; struct mlxsw_thermal_module *tz_module_arr; - unsigned int tz_module_num; }; static inline u8 mlxsw_state_to_duty(int state) @@ -711,6 +710,9 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, module = mlxsw_reg_pmlp_module_get(pmlp_pl, 0); module_tz = &thermal->tz_module_arr[module]; + /* Skip if parent is already set (case of port split). */ + if (module_tz->parent) + return 0; module_tz->module = module; module_tz->parent = thermal; memcpy(module_tz->trips, default_thermal_trips, @@ -718,13 +720,7 @@ mlxsw_thermal_module_init(struct device *dev, struct mlxsw_core *core, /* Initialize all trip point. */ mlxsw_thermal_module_trips_reset(module_tz); /* Update trip point according to the module data. */ - err = mlxsw_thermal_module_trips_update(dev, core, module_tz); - if (err) - return err; - - thermal->tz_module_num++; - - return 0; + return mlxsw_thermal_module_trips_update(dev, core, module_tz); } static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz) @@ -732,6 +728,7 @@ static void mlxsw_thermal_module_fini(struct mlxsw_thermal_module *module_tz) if (module_tz && module_tz->tzdev) { mlxsw_thermal_module_tz_fini(module_tz->tzdev); module_tz->tzdev = NULL; + module_tz->parent = NULL; } } @@ -740,6 +737,7 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, struct mlxsw_thermal *thermal) { unsigned int module_count = mlxsw_core_max_ports(core); + struct mlxsw_thermal_module *module_tz; int i, err; thermal->tz_module_arr = kcalloc(module_count, @@ -754,8 +752,11 @@ mlxsw_thermal_modules_init(struct device *dev, struct mlxsw_core *core, goto err_unreg_tz_module_arr; } - for (i = 0; i < thermal->tz_module_num; i++) { - err = mlxsw_thermal_module_tz_init(&thermal->tz_module_arr[i]); + for (i = 0; i < module_count - 1; i++) { + module_tz = &thermal->tz_module_arr[i]; + if (!module_tz->parent) + continue; + err = mlxsw_thermal_module_tz_init(module_tz); if (err) goto err_unreg_tz_module_arr; } -- cgit v1.2.3 From 426aa1fc622527d7941d0e1b0032b6bd697534e4 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 12 Mar 2019 08:40:42 +0000 Subject: mlxsw: minimal: Initialize base_mac Currently base_mac is not initialized which causes wrong reporting of zeroed parent_id to userspace. Fix this by initializing base_mac properly. Fixes: c100e47caa8e ("mlxsw: minimal: Add ethtool support") Signed-off-by: Jiri Pirko Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/minimal.c | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlxsw/minimal.c b/drivers/net/ethernet/mellanox/mlxsw/minimal.c index 68bee9572a1b..00c390024350 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/minimal.c +++ b/drivers/net/ethernet/mellanox/mlxsw/minimal.c @@ -34,6 +34,18 @@ struct mlxsw_m_port { u8 module; }; +static int mlxsw_m_base_mac_get(struct mlxsw_m *mlxsw_m) +{ + char spad_pl[MLXSW_REG_SPAD_LEN] = {0}; + int err; + + err = mlxsw_reg_query(mlxsw_m->core, MLXSW_REG(spad), spad_pl); + if (err) + return err; + mlxsw_reg_spad_base_mac_memcpy_from(spad_pl, mlxsw_m->base_mac); + return 0; +} + static int mlxsw_m_port_dummy_open_stop(struct net_device *dev) { return 0; @@ -314,6 +326,12 @@ static int mlxsw_m_init(struct mlxsw_core *mlxsw_core, mlxsw_m->core = mlxsw_core; mlxsw_m->bus_info = mlxsw_bus_info; + err = mlxsw_m_base_mac_get(mlxsw_m); + if (err) { + dev_err(mlxsw_m->bus_info->dev, "Failed to get base mac\n"); + return err; + } + err = mlxsw_m_ports_create(mlxsw_m); if (err) { dev_err(mlxsw_m->bus_info->dev, "Failed to create ports\n"); -- cgit v1.2.3 From e15ce4b8d11227007577e6dc1364d288b8874fbe Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 12 Mar 2019 17:05:47 +0200 Subject: net/mlx4_core: Fix reset flow when in command polling mode As part of unloading a device, the driver switches from FW command event mode to FW command polling mode. Part of switching over to polling mode is freeing the command context array memory (unfortunately, currently, without NULLing the command context array pointer). The reset flow calls "complete" to complete all outstanding fw commands (if we are in event mode). The check for event vs. polling mode here is to test if the command context array pointer is NULL. If the reset flow is activated after the switch to polling mode, it will attempt (incorrectly) to complete all the commands in the context array -- because the pointer was not NULLed when the driver switched over to polling mode. As a result, we have a use-after-free situation, which results in a kernel crash. For example: BUG: unable to handle kernel NULL pointer dereference at (null) IP: [] __wake_up_common+0x2e/0x90 PGD 0 Oops: 0000 [#1] SMP Modules linked in: netconsole nfsv3 nfs_acl nfs lockd grace ... CPU: 2 PID: 940 Comm: kworker/2:3 Kdump: loaded Not tainted 3.10.0-862.el7.x86_64 #1 Hardware name: Microsoft Corporation Virtual Machine/Virtual Machine, BIOS 090006 04/28/2016 Workqueue: events hv_eject_device_work [pci_hyperv] task: ffff8d1734ca0fd0 ti: ffff8d17354bc000 task.ti: ffff8d17354bc000 RIP: 0010:[] [] __wake_up_common+0x2e/0x90 RSP: 0018:ffff8d17354bfa38 EFLAGS: 00010082 RAX: 0000000000000000 RBX: ffff8d17362d42c8 RCX: 0000000000000000 RDX: 0000000000000001 RSI: 0000000000000003 RDI: ffff8d17362d42c8 RBP: ffff8d17354bfa70 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000298 R11: ffff8d173610e000 R12: ffff8d17362d42d0 R13: 0000000000000246 R14: 0000000000000000 R15: 0000000000000003 FS: 0000000000000000(0000) GS:ffff8d1802680000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: 0000000000000000 CR3: 00000000f16d8000 CR4: 00000000001406e0 Call Trace: [] complete+0x3c/0x50 [] mlx4_cmd_wake_completions+0x70/0x90 [mlx4_core] [] mlx4_enter_error_state+0xe1/0x380 [mlx4_core] [] mlx4_comm_cmd+0x29b/0x360 [mlx4_core] [] __mlx4_cmd+0x441/0x920 [mlx4_core] [] ? __slab_free+0x81/0x2f0 [] ? __radix_tree_lookup+0x84/0xf0 [] mlx4_free_mtt_range+0x5b/0xb0 [mlx4_core] [] mlx4_mtt_cleanup+0x17/0x20 [mlx4_core] [] mlx4_free_eq+0xa7/0x1c0 [mlx4_core] [] mlx4_cleanup_eq_table+0xde/0x130 [mlx4_core] [] mlx4_unload_one+0x118/0x300 [mlx4_core] [] mlx4_remove_one+0x91/0x1f0 [mlx4_core] The fix is to set the command context array pointer to NULL after freeing the array. Fixes: f5aef5aa3506 ("net/mlx4_core: Activate reset flow upon fatal command cases") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index c19e74e6ac94..44081acd48fb 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2689,6 +2689,7 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) down(&priv->cmd.event_sem); kfree(priv->cmd.context); + priv->cmd.context = NULL; up(&priv->cmd.poll_sem); up_write(&priv->cmd.switch_sem); -- cgit v1.2.3 From c07d27927f2f2e96fcd27bb9fb330c9ea65612d0 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 12 Mar 2019 17:05:48 +0200 Subject: net/mlx4_core: Fix locking in SRIOV mode when switching between events and polling In procedures mlx4_cmd_use_events() and mlx4_cmd_use_polling(), we need to guarantee that there are no FW commands in progress on the comm channel (for VFs) or wrapped FW commands (on the PF) when SRIOV is active. We do this by also taking the slave_cmd_mutex when SRIOV is active. This is especially important when switching from event to polling, since we free the command-context array during the switch. If there are FW commands in progress (e.g., waiting for a completion event), the completion event handler will access freed memory. Since the decision to use comm_wait or comm_poll is taken before grabbing the event_sem/poll_sem in mlx4_comm_cmd_wait/poll, we must take the slave_cmd_mutex as well (to guarantee that the decision to use events or polling and the call to the appropriate cmd function are atomic). Fixes: a7e1f04905e5 ("net/mlx4_core: Fix deadlock when switching between polling and event fw commands") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/cmd.c | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/drivers/net/ethernet/mellanox/mlx4/cmd.c b/drivers/net/ethernet/mellanox/mlx4/cmd.c index 44081acd48fb..a5d5d6fc1da0 100644 --- a/drivers/net/ethernet/mellanox/mlx4/cmd.c +++ b/drivers/net/ethernet/mellanox/mlx4/cmd.c @@ -2645,6 +2645,8 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) if (!priv->cmd.context) return -ENOMEM; + if (mlx4_is_mfunc(dev)) + mutex_lock(&priv->cmd.slave_cmd_mutex); down_write(&priv->cmd.switch_sem); for (i = 0; i < priv->cmd.max_cmds; ++i) { priv->cmd.context[i].token = i; @@ -2670,6 +2672,8 @@ int mlx4_cmd_use_events(struct mlx4_dev *dev) down(&priv->cmd.poll_sem); priv->cmd.use_events = 1; up_write(&priv->cmd.switch_sem); + if (mlx4_is_mfunc(dev)) + mutex_unlock(&priv->cmd.slave_cmd_mutex); return err; } @@ -2682,6 +2686,8 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) struct mlx4_priv *priv = mlx4_priv(dev); int i; + if (mlx4_is_mfunc(dev)) + mutex_lock(&priv->cmd.slave_cmd_mutex); down_write(&priv->cmd.switch_sem); priv->cmd.use_events = 0; @@ -2693,6 +2699,8 @@ void mlx4_cmd_use_polling(struct mlx4_dev *dev) up(&priv->cmd.poll_sem); up_write(&priv->cmd.switch_sem); + if (mlx4_is_mfunc(dev)) + mutex_unlock(&priv->cmd.slave_cmd_mutex); } struct mlx4_cmd_mailbox *mlx4_alloc_cmd_mailbox(struct mlx4_dev *dev) -- cgit v1.2.3 From 8511a653e9250ef36b95803c375a7be0e2edb628 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Tue, 12 Mar 2019 17:05:49 +0200 Subject: net/mlx4_core: Fix qp mtt size calculation Calculation of qp mtt size (in function mlx4_RST2INIT_wrapper) ultimately depends on function roundup_pow_of_two. If the amount of memory required by the QP is less than one page, roundup_pow_of_two is called with argument zero. In this case, the roundup_pow_of_two result is undefined. Calling roundup_pow_of_two with a zero argument resulted in the following stack trace: UBSAN: Undefined behaviour in ./include/linux/log2.h:61:13 shift exponent 64 is too large for 64-bit type 'long unsigned int' CPU: 4 PID: 26939 Comm: rping Tainted: G OE 4.19.0-rc1 Hardware name: Supermicro X9DR3-F/X9DR3-F, BIOS 3.2a 07/09/2015 Call Trace: dump_stack+0x9a/0xeb ubsan_epilogue+0x9/0x7c __ubsan_handle_shift_out_of_bounds+0x254/0x29d ? __ubsan_handle_load_invalid_value+0x180/0x180 ? debug_show_all_locks+0x310/0x310 ? sched_clock+0x5/0x10 ? sched_clock+0x5/0x10 ? sched_clock_cpu+0x18/0x260 ? find_held_lock+0x35/0x1e0 ? mlx4_RST2INIT_QP_wrapper+0xfb1/0x1440 [mlx4_core] mlx4_RST2INIT_QP_wrapper+0xfb1/0x1440 [mlx4_core] Fix this by explicitly testing for zero, and returning one if the argument is zero (assuming that the next higher power of 2 in this case should be one). Fixes: c82e9aa0a8bc ("mlx4_core: resource tracking for HCA resources used by guests") Signed-off-by: Jack Morgenstein Signed-off-by: Tariq Toukan Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx4/resource_tracker.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c index eb13d3618162..4356f3a58002 100644 --- a/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c +++ b/drivers/net/ethernet/mellanox/mlx4/resource_tracker.c @@ -2719,13 +2719,13 @@ static int qp_get_mtt_size(struct mlx4_qp_context *qpc) int total_pages; int total_mem; int page_offset = (be32_to_cpu(qpc->params2) >> 6) & 0x3f; + int tot; sq_size = 1 << (log_sq_size + log_sq_sride + 4); rq_size = (srq|rss|xrc) ? 0 : (1 << (log_rq_size + log_rq_stride + 4)); total_mem = sq_size + rq_size; - total_pages = - roundup_pow_of_two((total_mem + (page_offset << 6)) >> - page_shift); + tot = (total_mem + (page_offset << 6)) >> page_shift; + total_pages = !tot ? 1 : roundup_pow_of_two(tot); return total_pages; } -- cgit v1.2.3 From deb6bfabdbb634e91f36a4e9cb00a7137d72d886 Mon Sep 17 00:00:00 2001 From: Bryan Whitehead Date: Wed, 13 Mar 2019 15:55:48 -0400 Subject: lan743x: Fix TX Stall Issue It has been observed that tx queue may stall while downloading from certain web sites (example www.speedtest.net) The cause has been tracked down to a corner case where the tx interrupt vector was disabled automatically, but was not re enabled later. The lan743x has two mechanisms to enable/disable individual interrupts. Interrupts can be enabled/disabled by individual source, and they can also be enabled/disabled by individual vector which has been mapped to the source. Both must be enabled for interrupts to work properly. The TX code path, primarily uses the interrupt enable/disable of the TX source bit, while leaving the vector enabled all the time. However, while investigating this issue it was noticed that the driver requested the use of the vector auto clear feature. The test above revealed a case where the vector enable was cleared unintentionally. This patch fixes the issue by deleting the lines that request the vector auto clear feature to be used. Fixes: 23f0703c125b ("lan743x: Add main source files for new lan743x driver") Signed-off-by: Bryan Whitehead Signed-off-by: David S. Miller --- drivers/net/ethernet/microchip/lan743x_main.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c index 2876bab642e2..13e6bf13ac4d 100644 --- a/drivers/net/ethernet/microchip/lan743x_main.c +++ b/drivers/net/ethernet/microchip/lan743x_main.c @@ -585,8 +585,7 @@ static int lan743x_intr_open(struct lan743x_adapter *adapter) if (adapter->csr.flags & LAN743X_CSR_FLAG_SUPPORTS_INTR_AUTO_SET_CLR) { - flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR | - LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET | + flags = LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_SET | LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_SET | LAN743X_VECTOR_FLAG_SOURCE_ENABLE_AUTO_CLEAR | LAN743X_VECTOR_FLAG_SOURCE_STATUS_AUTO_CLEAR; @@ -599,12 +598,6 @@ static int lan743x_intr_open(struct lan743x_adapter *adapter) /* map TX interrupt to vector */ int_vec_map1 |= INT_VEC_MAP1_TX_VEC_(index, vector); lan743x_csr_write(adapter, INT_VEC_MAP1, int_vec_map1); - if (flags & - LAN743X_VECTOR_FLAG_VECTOR_ENABLE_AUTO_CLEAR) { - int_vec_en_auto_clr |= INT_VEC_EN_(vector); - lan743x_csr_write(adapter, INT_VEC_EN_AUTO_CLR, - int_vec_en_auto_clr); - } /* Remove TX interrupt from shared mask */ intr->vector_list[0].int_mask &= ~int_bit; -- cgit v1.2.3 From 5b5f99b186906d198f4455b3add911c87ab361fc Mon Sep 17 00:00:00 2001 From: Zhike Wang Date: Mon, 11 Mar 2019 03:15:54 -0700 Subject: net_sched: return correct value for *notify* functions It is confusing to directly use return value of netlink_send()/ netlink_unicast() as the return value of *notify*, as it may be not error at all. Example: in tc_del_tfilter(), after calling tfilter_del_notify(), it will goto errout if (err). However, the netlink_send()/netlink_unicast() will return positive value even for successful case. So it may not call tcf_chain_tp_remove() and so on to clean up the resource, as a result, resource is leaked. It may be easier to only check the return value of tfilter_del_nofiy(), but it is more clean to correct all related functions. Co-developed-by: Zengmo Gao Signed-off-by: Zhike Wang Acked-by: Cong Wang Signed-off-by: David S. Miller --- net/sched/cls_api.c | 32 +++++++++++++++++++++++--------- net/sched/sch_api.c | 15 +++++++++++---- 2 files changed, 34 insertions(+), 13 deletions(-) diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 2c2aac4ac721..dc10525e90e7 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1893,6 +1893,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -1906,10 +1907,14 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb, } if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + else + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + if (err > 0) + err = 0; + return err; } static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, @@ -1941,12 +1946,15 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb, } if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); - - err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + else + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); if (err < 0) NL_SET_ERR_MSG(extack, "Failed to send filter delete notification"); + + if (err > 0) + err = 0; return err; } @@ -2688,6 +2696,7 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, struct tcf_block *block = chain->block; struct net *net = block->net; struct sk_buff *skb; + int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -2701,9 +2710,14 @@ static int tc_chain_notify(struct tcf_chain *chain, struct sk_buff *oskb, } if (unicast) - return netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + err = netlink_unicast(net->rtnl, skb, portid, MSG_DONTWAIT); + else + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + flags & NLM_F_ECHO); - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, flags & NLM_F_ECHO); + if (err > 0) + err = 0; + return err; } static int tc_chain_notify_delete(const struct tcf_proto_ops *tmplt_ops, diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c index 352b46f98440..fb8f138b9776 100644 --- a/net/sched/sch_api.c +++ b/net/sched/sch_api.c @@ -1824,6 +1824,7 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, { struct sk_buff *skb; u32 portid = oskb ? NETLINK_CB(oskb).portid : 0; + int err = 0; skb = alloc_skb(NLMSG_GOODSIZE, GFP_KERNEL); if (!skb) @@ -1834,8 +1835,11 @@ static int tclass_notify(struct net *net, struct sk_buff *oskb, return -EINVAL; } - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + if (err > 0) + err = 0; + return err; } static int tclass_del_notify(struct net *net, @@ -1866,8 +1870,11 @@ static int tclass_del_notify(struct net *net, return err; } - return rtnetlink_send(skb, net, portid, RTNLGRP_TC, - n->nlmsg_flags & NLM_F_ECHO); + err = rtnetlink_send(skb, net, portid, RTNLGRP_TC, + n->nlmsg_flags & NLM_F_ECHO); + if (err > 0) + err = 0; + return err; } #ifdef CONFIG_NET_CLS -- cgit v1.2.3 From 4504ab0e6eb801555368cbb3011ab0530f659d4b Mon Sep 17 00:00:00 2001 From: Vakul Garg Date: Tue, 12 Mar 2019 08:22:57 +0000 Subject: net/tls: Inform user space about send buffer availability A previous fix ("tls: Fix write space handling") assumed that user space application gets informed about the socket send buffer availability when tls_push_sg() gets called. Inside tls_push_sg(), in case do_tcp_sendpages() returns 0, the function returns without calling ctx->sk_write_space. Further, the new function tls_sw_write_space() did not invoke ctx->sk_write_space. This leads to situation that user space application encounters a lockup always waiting for socket send buffer to become available. Rather than call ctx->sk_write_space from tls_push_sg(), it should be called from tls_write_space. So whenever tcp stack invokes sk->sk_write_space after freeing socket send buffer, we always declare the same to user space by the way of invoking ctx->sk_write_space. Fixes: 7463d3a2db0ef ("tls: Fix write space handling") Signed-off-by: Vakul Garg Reviewed-by: Boris Pismenny Signed-off-by: David S. Miller --- net/tls/tls_device.c | 3 --- net/tls/tls_main.c | 3 ++- 2 files changed, 2 insertions(+), 4 deletions(-) diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c index 4a1da837a733..135a7ee9db03 100644 --- a/net/tls/tls_device.c +++ b/net/tls/tls_device.c @@ -558,9 +558,6 @@ void tls_device_write_space(struct sock *sk, struct tls_context *ctx) MSG_DONTWAIT | MSG_NOSIGNAL); sk->sk_allocation = sk_allocation; } - - if (!rc) - ctx->sk_write_space(sk); } void handle_device_resync(struct sock *sk, u32 seq, u64 rcd_sn) diff --git a/net/tls/tls_main.c b/net/tls/tls_main.c index 17e8667917aa..df921a2904b9 100644 --- a/net/tls/tls_main.c +++ b/net/tls/tls_main.c @@ -146,7 +146,6 @@ retry: } ctx->in_tcp_sendpages = false; - ctx->sk_write_space(sk); return 0; } @@ -228,6 +227,8 @@ static void tls_write_space(struct sock *sk) else #endif tls_sw_write_space(sk, ctx); + + ctx->sk_write_space(sk); } static void tls_ctx_free(struct tls_context *ctx) -- cgit v1.2.3 From 163d1c3d6f17556ed3c340d3789ea93be95d6c28 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 12 Mar 2019 06:50:11 -0700 Subject: l2tp: fix infoleak in l2tp_ip6_recvmsg() Back in 2013 Hannes took care of most of such leaks in commit bceaa90240b6 ("inet: prevent leakage of uninitialized memory to user in recv syscalls") But the bug in l2tp_ip6_recvmsg() has not been fixed. syzbot report : BUG: KMSAN: kernel-infoleak in _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 CPU: 1 PID: 10996 Comm: syz-executor362 Not tainted 5.0.0+ #11 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x173/0x1d0 lib/dump_stack.c:113 kmsan_report+0x12e/0x2a0 mm/kmsan/kmsan.c:600 kmsan_internal_check_memory+0x9f4/0xb10 mm/kmsan/kmsan.c:694 kmsan_copy_to_user+0xab/0xc0 mm/kmsan/kmsan_hooks.c:601 _copy_to_user+0x16b/0x1f0 lib/usercopy.c:32 copy_to_user include/linux/uaccess.h:174 [inline] move_addr_to_user+0x311/0x570 net/socket.c:227 ___sys_recvmsg+0xb65/0x1310 net/socket.c:2283 do_recvmmsg+0x646/0x10c0 net/socket.c:2390 __sys_recvmmsg net/socket.c:2469 [inline] __do_sys_recvmmsg net/socket.c:2492 [inline] __se_sys_recvmmsg+0x1d1/0x350 net/socket.c:2485 __x64_sys_recvmmsg+0x62/0x80 net/socket.c:2485 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x445819 Code: e8 6c b6 02 00 48 83 c4 18 c3 0f 1f 80 00 00 00 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 0f 83 2b 12 fc ff c3 66 2e 0f 1f 84 00 00 00 00 RSP: 002b:00007f64453eddb8 EFLAGS: 00000246 ORIG_RAX: 000000000000012b RAX: ffffffffffffffda RBX: 00000000006dac28 RCX: 0000000000445819 RDX: 0000000000000005 RSI: 0000000020002f80 RDI: 0000000000000003 RBP: 00000000006dac20 R08: 0000000000000000 R09: 0000000000000000 R10: 0000000000000000 R11: 0000000000000246 R12: 00000000006dac2c R13: 00007ffeba8f87af R14: 00007f64453ee9c0 R15: 20c49ba5e353f7cf Local variable description: ----addr@___sys_recvmsg Variable was created at: ___sys_recvmsg+0xf6/0x1310 net/socket.c:2244 do_recvmmsg+0x646/0x10c0 net/socket.c:2390 Bytes 0-31 of 32 are uninitialized Memory access of size 32 starts at ffff8880ae62fbb0 Data copied to user address 0000000020000000 Fixes: a32e0eec7042 ("l2tp: introduce L2TPv3 IP encapsulation support for IPv6") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: David S. Miller --- net/l2tp/l2tp_ip6.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c index 0ae6899edac0..37a69df17cab 100644 --- a/net/l2tp/l2tp_ip6.c +++ b/net/l2tp/l2tp_ip6.c @@ -674,9 +674,6 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, if (flags & MSG_OOB) goto out; - if (addr_len) - *addr_len = sizeof(*lsa); - if (flags & MSG_ERRQUEUE) return ipv6_recv_error(sk, msg, len, addr_len); @@ -706,6 +703,7 @@ static int l2tp_ip6_recvmsg(struct sock *sk, struct msghdr *msg, size_t len, lsa->l2tp_conn_id = 0; if (ipv6_addr_type(&lsa->l2tp_addr) & IPV6_ADDR_LINKLOCAL) lsa->l2tp_scope_id = inet6_iif(skb); + *addr_len = sizeof(*lsa); } if (np->rxopt.all) -- cgit v1.2.3 From 63b6c974c24cc6a56cd1c17609d8d218dcff999c Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Tue, 12 Mar 2019 10:50:59 -0700 Subject: MAINTAINERS: GENET & SYSTEMPORT: Add internal Broadcom list There is a patchwork instance behind bcm-kernel-feedback-list that is helpful to track submissions, add this list for the Broadcom GENET and SYSTEMPORT drivers. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- MAINTAINERS | 2 ++ 1 file changed, 2 insertions(+) diff --git a/MAINTAINERS b/MAINTAINERS index f470eb4aeb21..c394519a4aaa 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3143,6 +3143,7 @@ F: drivers/phy/broadcom/phy-brcm-usb* BROADCOM GENET ETHERNET DRIVER M: Doug Berger M: Florian Fainelli +L: bcm-kernel-feedback-list@broadcom.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/genet/ @@ -3250,6 +3251,7 @@ F: drivers/spi/spi-iproc-qspi.c BROADCOM SYSTEMPORT ETHERNET DRIVER M: Florian Fainelli +L: bcm-kernel-feedback-list@broadcom.com L: netdev@vger.kernel.org S: Supported F: drivers/net/ethernet/broadcom/bcmsysport.* -- cgit v1.2.3 From 9417d81f4f8adfe20a12dd1fadf73a618cbd945d Mon Sep 17 00:00:00 2001 From: Xin Long Date: Wed, 13 Mar 2019 17:00:48 +0800 Subject: pptp: dst_release sk_dst_cache in pptp_sock_destruct sk_setup_caps() is called to set sk->sk_dst_cache in pptp_connect, so we have to dst_release(sk->sk_dst_cache) in pptp_sock_destruct, otherwise, the dst refcnt will leak. It can be reproduced by this syz log: r1 = socket$pptp(0x18, 0x1, 0x2) bind$pptp(r1, &(0x7f0000000100)={0x18, 0x2, {0x0, @local}}, 0x1e) connect$pptp(r1, &(0x7f0000000000)={0x18, 0x2, {0x3, @remote}}, 0x1e) Consecutive dmesg warnings will occur: unregister_netdevice: waiting for lo to become free. Usage count = 1 v1->v2: - use rcu_dereference_protected() instead of rcu_dereference_check(), as suggested by Eric. Fixes: 00959ade36ac ("PPTP: PPP over IPv4 (Point-to-Point Tunneling Protocol)") Reported-by: Xiumei Mu Signed-off-by: Xin Long Signed-off-by: David S. Miller --- drivers/net/ppp/pptp.c | 1 + 1 file changed, 1 insertion(+) diff --git a/drivers/net/ppp/pptp.c b/drivers/net/ppp/pptp.c index 8f09edd811e9..50c60550f295 100644 --- a/drivers/net/ppp/pptp.c +++ b/drivers/net/ppp/pptp.c @@ -532,6 +532,7 @@ static void pptp_sock_destruct(struct sock *sk) pppox_unbind_sock(sk); } skb_queue_purge(&sk->sk_receive_queue); + dst_release(rcu_dereference_protected(sk->sk_dst_cache, 1)); } static int pptp_create(struct net *net, struct socket *sock, int kern) -- cgit v1.2.3