From 074014abdf2bd2a00da3dd14a6ae04cafc1d62cc Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Tue, 13 Aug 2019 15:28:18 +0100 Subject: net: ieee802154: remove redundant assignment to rc Variable rc is initialized to a value that is never read and it is re-assigned later. The initialization is redundant and can be removed. Addresses-Coverity: ("Unused value") Signed-off-by: Colin Ian King Signed-off-by: Stefan Schmidt --- net/ieee802154/socket.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ieee802154/socket.c b/net/ieee802154/socket.c index bc6b912603f1..1e9876813392 100644 --- a/net/ieee802154/socket.c +++ b/net/ieee802154/socket.c @@ -1102,7 +1102,7 @@ static struct packet_type ieee802154_packet_type = { static int __init af_ieee802154_init(void) { - int rc = -EINVAL; + int rc; rc = proto_register(&ieee802154_raw_prot, 1); if (rc) -- cgit v1.2.3 From 3ee1bb7aae97324ec9078da1f00cb2176919563f Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Mon, 12 Aug 2019 04:57:27 -0700 Subject: batman-adv: fix uninit-value in batadv_netlink_get_ifindex() batadv_netlink_get_ifindex() needs to make sure user passed a correct u32 attribute. syzbot reported : BUG: KMSAN: uninit-value in batadv_netlink_dump_hardif+0x70d/0x880 net/batman-adv/netlink.c:968 CPU: 1 PID: 11705 Comm: syz-executor888 Not tainted 5.1.0+ #1 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:77 [inline] dump_stack+0x191/0x1f0 lib/dump_stack.c:113 kmsan_report+0x130/0x2a0 mm/kmsan/kmsan.c:622 __msan_warning+0x75/0xe0 mm/kmsan/kmsan_instr.c:310 batadv_netlink_dump_hardif+0x70d/0x880 net/batman-adv/netlink.c:968 genl_lock_dumpit+0xc6/0x130 net/netlink/genetlink.c:482 netlink_dump+0xa84/0x1ab0 net/netlink/af_netlink.c:2253 __netlink_dump_start+0xa3a/0xb30 net/netlink/af_netlink.c:2361 genl_family_rcv_msg net/netlink/genetlink.c:550 [inline] genl_rcv_msg+0xfc1/0x1a40 net/netlink/genetlink.c:627 netlink_rcv_skb+0x431/0x620 net/netlink/af_netlink.c:2486 genl_rcv+0x63/0x80 net/netlink/genetlink.c:638 netlink_unicast_kernel net/netlink/af_netlink.c:1311 [inline] netlink_unicast+0xf3e/0x1020 net/netlink/af_netlink.c:1337 netlink_sendmsg+0x127e/0x12f0 net/netlink/af_netlink.c:1926 sock_sendmsg_nosec net/socket.c:651 [inline] sock_sendmsg net/socket.c:661 [inline] ___sys_sendmsg+0xcc6/0x1200 net/socket.c:2260 __sys_sendmsg net/socket.c:2298 [inline] __do_sys_sendmsg net/socket.c:2307 [inline] __se_sys_sendmsg+0x305/0x460 net/socket.c:2305 __x64_sys_sendmsg+0x4a/0x70 net/socket.c:2305 do_syscall_64+0xbc/0xf0 arch/x86/entry/common.c:291 entry_SYSCALL_64_after_hwframe+0x63/0xe7 RIP: 0033:0x440209 Fixes: b60620cf567b ("batman-adv: netlink: hardif query") Signed-off-by: Eric Dumazet Reported-by: syzbot Signed-off-by: Sven Eckelmann Signed-off-by: Simon Wunderlich --- net/batman-adv/netlink.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/batman-adv/netlink.c b/net/batman-adv/netlink.c index 6f08fd122a8d..7e052d6f759b 100644 --- a/net/batman-adv/netlink.c +++ b/net/batman-adv/netlink.c @@ -164,7 +164,7 @@ batadv_netlink_get_ifindex(const struct nlmsghdr *nlh, int attrtype) { struct nlattr *attr = nlmsg_find_attr(nlh, GENL_HDRLEN, attrtype); - return attr ? nla_get_u32(attr) : 0; + return (attr && nla_len(attr) == sizeof(u32)) ? nla_get_u32(attr) : 0; } /** -- cgit v1.2.3 From f20faa06d83de440bec8e200870784c3458793c4 Mon Sep 17 00:00:00 2001 From: Todd Seidelmann Date: Wed, 14 Aug 2019 10:54:16 -0400 Subject: netfilter: ebtables: Fix argument order to ADD_COUNTER The ordering of arguments to the x_tables ADD_COUNTER macro appears to be wrong in ebtables (cf. ip_tables.c, ip6_tables.c, and arp_tables.c). This causes data corruption in the ebtables userspace tools because they get incorrect packet & byte counts from the kernel. Fixes: d72133e628803 ("netfilter: ebtables: use ADD_COUNTER macro") Signed-off-by: Todd Seidelmann Signed-off-by: Pablo Neira Ayuso --- net/bridge/netfilter/ebtables.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/bridge/netfilter/ebtables.c b/net/bridge/netfilter/ebtables.c index c8177a89f52c..4096d8a74a2b 100644 --- a/net/bridge/netfilter/ebtables.c +++ b/net/bridge/netfilter/ebtables.c @@ -221,7 +221,7 @@ unsigned int ebt_do_table(struct sk_buff *skb, return NF_DROP; } - ADD_COUNTER(*(counter_base + i), 1, skb->len); + ADD_COUNTER(*(counter_base + i), skb->len, 1); /* these should only watch: not modify, nor tell us * what to do with the packet @@ -959,8 +959,8 @@ static void get_counters(const struct ebt_counter *oldcounters, continue; counter_base = COUNTER_BASE(oldcounters, nentries, cpu); for (i = 0; i < nentries; i++) - ADD_COUNTER(counters[i], counter_base[i].pcnt, - counter_base[i].bcnt); + ADD_COUNTER(counters[i], counter_base[i].bcnt, + counter_base[i].pcnt); } } @@ -1280,7 +1280,7 @@ static int do_update_counters(struct net *net, const char *name, /* we add to the counters of the first cpu */ for (i = 0; i < num_counters; i++) - ADD_COUNTER(t->private->counters[i], tmp[i].pcnt, tmp[i].bcnt); + ADD_COUNTER(t->private->counters[i], tmp[i].bcnt, tmp[i].pcnt); write_unlock_bh(&t->lock); ret = 0; -- cgit v1.2.3 From 14c415862c0630e01712a4eeaf6159a2b1b6d2a4 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 16 Aug 2019 11:23:58 +0200 Subject: netfilter: nft_flow_offload: missing netlink attribute policy The netlink attribute policy for NFTA_FLOW_TABLE_NAME is missing. Fixes: a3c90f7a2323 ("netfilter: nf_tables: flow offload expression") Signed-off-by: Pablo Neira Ayuso --- net/netfilter/nft_flow_offload.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'net') diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c index 060a4ed46d5e..01705ad74a9a 100644 --- a/net/netfilter/nft_flow_offload.c +++ b/net/netfilter/nft_flow_offload.c @@ -149,6 +149,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx, return nft_chain_validate_hooks(ctx->chain, hook_mask); } +static const struct nla_policy nft_flow_offload_policy[NFTA_FLOW_MAX + 1] = { + [NFTA_FLOW_TABLE_NAME] = { .type = NLA_STRING, + .len = NFT_NAME_MAXLEN - 1 }, +}; + static int nft_flow_offload_init(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]) @@ -207,6 +212,7 @@ static const struct nft_expr_ops nft_flow_offload_ops = { static struct nft_expr_type nft_flow_offload_type __read_mostly = { .name = "flow_offload", .ops = &nft_flow_offload_ops, + .policy = nft_flow_offload_policy, .maxattr = NFTA_FLOW_MAX, .owner = THIS_MODULE, }; -- cgit v1.2.3 From 89a26cd4b501e9511d3cd3d22327fc76a75a38b3 Mon Sep 17 00:00:00 2001 From: Juliana Rodrigueiro Date: Fri, 16 Aug 2019 17:02:22 +0200 Subject: netfilter: xt_nfacct: Fix alignment mismatch in xt_nfacct_match_info When running a 64-bit kernel with a 32-bit iptables binary, the size of the xt_nfacct_match_info struct diverges. kernel: sizeof(struct xt_nfacct_match_info) : 40 iptables: sizeof(struct xt_nfacct_match_info)) : 36 Trying to append nfacct related rules results in an unhelpful message. Although it is suggested to look for more information in dmesg, nothing can be found there. # iptables -A -m nfacct --nfacct-name iptables: Invalid argument. Run `dmesg' for more information. This patch fixes the memory misalignment by enforcing 8-byte alignment within the struct's first revision. This solution is often used in many other uapi netfilter headers. Signed-off-by: Juliana Rodrigueiro Acked-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/xt_nfacct.h | 5 +++++ net/netfilter/xt_nfacct.c | 36 ++++++++++++++++++++++---------- 2 files changed, 30 insertions(+), 11 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/netfilter/xt_nfacct.h b/include/uapi/linux/netfilter/xt_nfacct.h index 5c8a4d760ee3..b5123ab8d54a 100644 --- a/include/uapi/linux/netfilter/xt_nfacct.h +++ b/include/uapi/linux/netfilter/xt_nfacct.h @@ -11,4 +11,9 @@ struct xt_nfacct_match_info { struct nf_acct *nfacct; }; +struct xt_nfacct_match_info_v1 { + char name[NFACCT_NAME_MAX]; + struct nf_acct *nfacct __attribute__((aligned(8))); +}; + #endif /* _XT_NFACCT_MATCH_H */ diff --git a/net/netfilter/xt_nfacct.c b/net/netfilter/xt_nfacct.c index d0ab1adf5bff..5aab6df74e0f 100644 --- a/net/netfilter/xt_nfacct.c +++ b/net/netfilter/xt_nfacct.c @@ -54,25 +54,39 @@ nfacct_mt_destroy(const struct xt_mtdtor_param *par) nfnl_acct_put(info->nfacct); } -static struct xt_match nfacct_mt_reg __read_mostly = { - .name = "nfacct", - .family = NFPROTO_UNSPEC, - .checkentry = nfacct_mt_checkentry, - .match = nfacct_mt, - .destroy = nfacct_mt_destroy, - .matchsize = sizeof(struct xt_nfacct_match_info), - .usersize = offsetof(struct xt_nfacct_match_info, nfacct), - .me = THIS_MODULE, +static struct xt_match nfacct_mt_reg[] __read_mostly = { + { + .name = "nfacct", + .revision = 0, + .family = NFPROTO_UNSPEC, + .checkentry = nfacct_mt_checkentry, + .match = nfacct_mt, + .destroy = nfacct_mt_destroy, + .matchsize = sizeof(struct xt_nfacct_match_info), + .usersize = offsetof(struct xt_nfacct_match_info, nfacct), + .me = THIS_MODULE, + }, + { + .name = "nfacct", + .revision = 1, + .family = NFPROTO_UNSPEC, + .checkentry = nfacct_mt_checkentry, + .match = nfacct_mt, + .destroy = nfacct_mt_destroy, + .matchsize = sizeof(struct xt_nfacct_match_info_v1), + .usersize = offsetof(struct xt_nfacct_match_info_v1, nfacct), + .me = THIS_MODULE, + }, }; static int __init nfacct_mt_init(void) { - return xt_register_match(&nfacct_mt_reg); + return xt_register_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg)); } static void __exit nfacct_mt_exit(void) { - xt_unregister_match(&nfacct_mt_reg); + xt_unregister_matches(nfacct_mt_reg, ARRAY_SIZE(nfacct_mt_reg)); } module_init(nfacct_mt_init); -- cgit v1.2.3 From ef8d8ccdc216f797e66cb4a1372f5c4c285ce1e4 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Fri, 16 Aug 2019 21:26:22 -0700 Subject: tcp: make sure EPOLLOUT wont be missed As Jason Baron explained in commit 790ba4566c1a ("tcp: set SOCK_NOSPACE under memory pressure"), it is crucial we properly set SOCK_NOSPACE when needed. However, Jason patch had a bug, because the 'nonblocking' status as far as sk_stream_wait_memory() is concerned is governed by MSG_DONTWAIT flag passed at sendmsg() time : long timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); So it is very possible that tcp sendmsg() calls sk_stream_wait_memory(), and that sk_stream_wait_memory() returns -EAGAIN with SOCK_NOSPACE cleared, if sk->sk_sndtimeo has been set to a small (but not zero) value. This patch removes the 'noblock' variable since we must always set SOCK_NOSPACE if -EAGAIN is returned. It also renames the do_nonblock label since we might reach this code path even if we were in blocking mode. Fixes: 790ba4566c1a ("tcp: set SOCK_NOSPACE under memory pressure") Signed-off-by: Eric Dumazet Cc: Jason Baron Reported-by: Vladimir Rutsky Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Acked-by: Jason Baron Signed-off-by: David S. Miller --- net/core/stream.c | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'net') diff --git a/net/core/stream.c b/net/core/stream.c index e94bb02a5629..4f1d4aa5fb38 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -120,7 +120,6 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) int err = 0; long vm_wait = 0; long current_timeo = *timeo_p; - bool noblock = (*timeo_p ? false : true); DEFINE_WAIT_FUNC(wait, woken_wake_function); if (sk_stream_memory_free(sk)) @@ -133,11 +132,8 @@ int sk_stream_wait_memory(struct sock *sk, long *timeo_p) if (sk->sk_err || (sk->sk_shutdown & SEND_SHUTDOWN)) goto do_error; - if (!*timeo_p) { - if (noblock) - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); - goto do_nonblock; - } + if (!*timeo_p) + goto do_eagain; if (signal_pending(current)) goto do_interrupted; sk_clear_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -169,7 +165,13 @@ out: do_error: err = -EPIPE; goto out; -do_nonblock: +do_eagain: + /* Make sure that whenever EAGAIN is returned, EPOLLOUT event can + * be generated later. + * When TCP receives ACK packets that make room, tcp_check_space() + * only calls tcp_new_space() if SOCK_NOSPACE is set. + */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); err = -EAGAIN; goto out; do_interrupted: -- cgit v1.2.3 From fb89c39455e4b49881c5a42761bd71f03d3ef888 Mon Sep 17 00:00:00 2001 From: Ivan Khoronzhuk Date: Thu, 15 Aug 2019 23:56:35 +0300 Subject: xdp: unpin xdp umem pages in error path Fix mem leak caused by missed unpin routine for umem pages. Fixes: 8aef7340ae9695 ("xsk: introduce xdp_umem_page") Signed-off-by: Ivan Khoronzhuk Acked-by: Jonathan Lemon Signed-off-by: Daniel Borkmann --- net/xdp/xdp_umem.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index 83de74ca729a..688aac7a6943 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -365,7 +365,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->pages = kcalloc(umem->npgs, sizeof(*umem->pages), GFP_KERNEL); if (!umem->pages) { err = -ENOMEM; - goto out_account; + goto out_pin; } for (i = 0; i < umem->npgs; i++) @@ -373,6 +373,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) return 0; +out_pin: + xdp_umem_unpin_pages(umem); out_account: xdp_umem_unaccount_pages(umem); return err; -- cgit v1.2.3 From 4651d1802f7063e4d8c0bcad957f46ece0c04024 Mon Sep 17 00:00:00 2001 From: Jason Baron Date: Mon, 19 Aug 2019 14:36:01 -0400 Subject: net/smc: make sure EPOLLOUT is raised Currently, we are only explicitly setting SOCK_NOSPACE on a write timeout for non-blocking sockets. Epoll() edge-trigger mode relies on SOCK_NOSPACE being set when -EAGAIN is returned to ensure that EPOLLOUT is raised. Expand the setting of SOCK_NOSPACE to non-blocking sockets as well that can use SO_SNDTIMEO to adjust their write timeout. This mirrors the behavior that Eric Dumazet introduced for tcp sockets. Signed-off-by: Jason Baron Cc: Eric Dumazet Cc: Ursula Braun Cc: Karsten Graul Signed-off-by: David S. Miller --- net/smc/smc_tx.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c index f0de323d15d6..6c8f09c1ce51 100644 --- a/net/smc/smc_tx.c +++ b/net/smc/smc_tx.c @@ -76,13 +76,11 @@ static int smc_tx_wait(struct smc_sock *smc, int flags) DEFINE_WAIT_FUNC(wait, woken_wake_function); struct smc_connection *conn = &smc->conn; struct sock *sk = &smc->sk; - bool noblock; long timeo; int rc = 0; /* similar to sk_stream_wait_memory */ timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); - noblock = timeo ? false : true; add_wait_queue(sk_sleep(sk), &wait); while (1) { sk_set_bit(SOCKWQ_ASYNC_NOSPACE, sk); @@ -97,8 +95,8 @@ static int smc_tx_wait(struct smc_sock *smc, int flags) break; } if (!timeo) { - if (noblock) - set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); + /* ensure EPOLLOUT is subsequently generated */ + set_bit(SOCK_NOSPACE, &sk->sk_socket->flags); rc = -EAGAIN; break; } -- cgit v1.2.3 From 96a1b033ac24ccc58156f05c183b2cba0b9412d5 Mon Sep 17 00:00:00 2001 From: "Terry S. Duncan" Date: Mon, 19 Aug 2019 17:24:02 -0700 Subject: net/ncsi: Ensure 32-bit boundary for data cksum The NCSI spec indicates that if the data does not end on a 32 bit boundary, one to three padding bytes equal to 0x00 shall be present to align the checksum field to a 32-bit boundary. Signed-off-by: Terry S. Duncan Signed-off-by: David S. Miller --- net/ncsi/ncsi-cmd.c | 2 +- net/ncsi/ncsi-rsp.c | 9 ++++++--- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index 5c3fad8cba57..eab4346b0a39 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -54,7 +54,7 @@ static void ncsi_cmd_build_header(struct ncsi_pkt_hdr *h, checksum = ncsi_calculate_checksum((unsigned char *)h, sizeof(*h) + nca->payload); pchecksum = (__be32 *)((void *)h + sizeof(struct ncsi_pkt_hdr) + - nca->payload); + ALIGN(nca->payload, 4)); *pchecksum = htonl(checksum); } diff --git a/net/ncsi/ncsi-rsp.c b/net/ncsi/ncsi-rsp.c index 7581bf919885..d876bd55f356 100644 --- a/net/ncsi/ncsi-rsp.c +++ b/net/ncsi/ncsi-rsp.c @@ -47,7 +47,8 @@ static int ncsi_validate_rsp_pkt(struct ncsi_request *nr, if (ntohs(h->code) != NCSI_PKT_RSP_C_COMPLETED || ntohs(h->reason) != NCSI_PKT_RSP_R_NO_ERROR) { netdev_dbg(nr->ndp->ndev.dev, - "NCSI: non zero response/reason code\n"); + "NCSI: non zero response/reason code %04xh, %04xh\n", + ntohs(h->code), ntohs(h->reason)); return -EPERM; } @@ -55,7 +56,7 @@ static int ncsi_validate_rsp_pkt(struct ncsi_request *nr, * sender doesn't support checksum according to NCSI * specification. */ - pchecksum = (__be32 *)((void *)(h + 1) + payload - 4); + pchecksum = (__be32 *)((void *)(h + 1) + ALIGN(payload, 4) - 4); if (ntohl(*pchecksum) == 0) return 0; @@ -63,7 +64,9 @@ static int ncsi_validate_rsp_pkt(struct ncsi_request *nr, sizeof(*h) + payload - 4); if (*pchecksum != htonl(checksum)) { - netdev_dbg(nr->ndp->ndev.dev, "NCSI: checksum mismatched\n"); + netdev_dbg(nr->ndp->ndev.dev, + "NCSI: checksum mismatched; recd: %08x calc: %08x\n", + *pchecksum, htonl(checksum)); return -EINVAL; } -- cgit v1.2.3 From a1c4cd67840ef80f6ca5f73326fa9a6719303a95 Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 20 Aug 2019 13:52:47 +0800 Subject: net: fix __ip_mc_inc_group usage in ip_mc_inc_group, memory allocation flag, not mcast mode, is expected by __ip_mc_inc_group similar issue in __ip_mc_join_group, both mcase mode and gfp_t are needed here, so use ____ip_mc_inc_group(...) Fixes: 9fb20801dab4 ("net: Fix ip_mc_{dec,inc}_group allocation context") Signed-off-by: Li RongQing Signed-off-by: Florian Fainelli Signed-off-by: Zhang Yu Signed-off-by: David S. Miller --- net/ipv4/igmp.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c index 180f6896b98b..480d0b22db1a 100644 --- a/net/ipv4/igmp.c +++ b/net/ipv4/igmp.c @@ -1475,7 +1475,7 @@ EXPORT_SYMBOL(__ip_mc_inc_group); void ip_mc_inc_group(struct in_device *in_dev, __be32 addr) { - __ip_mc_inc_group(in_dev, addr, MCAST_EXCLUDE); + __ip_mc_inc_group(in_dev, addr, GFP_KERNEL); } EXPORT_SYMBOL(ip_mc_inc_group); @@ -2197,7 +2197,7 @@ static int __ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr, iml->sflist = NULL; iml->sfmode = mode; rcu_assign_pointer(inet->mc_list, iml); - __ip_mc_inc_group(in_dev, addr, mode); + ____ip_mc_inc_group(in_dev, addr, mode, GFP_KERNEL); err = 0; done: return err; -- cgit v1.2.3 From 5fd2f91ad483baffdbe798f8a08f1b41442d1e24 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 1 Aug 2019 09:30:33 +0200 Subject: mac80211: fix possible sta leak If TDLS station addition is rejected, the sta memory is leaked. Avoid this by moving the check before the allocation. Cc: stable@vger.kernel.org Fixes: 7ed5285396c2 ("mac80211: don't initiate TDLS connection if station is not associated to AP") Link: https://lore.kernel.org/r/20190801073033.7892-1-johannes@sipsolutions.net Signed-off-by: Johannes Berg --- net/mac80211/cfg.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c index 4d458067d80d..111c400199ec 100644 --- a/net/mac80211/cfg.c +++ b/net/mac80211/cfg.c @@ -1546,6 +1546,11 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (is_multicast_ether_addr(mac)) return -EINVAL; + if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER) && + sdata->vif.type == NL80211_IFTYPE_STATION && + !sdata->u.mgd.associated) + return -EINVAL; + sta = sta_info_alloc(sdata, mac, GFP_KERNEL); if (!sta) return -ENOMEM; @@ -1553,10 +1558,6 @@ static int ieee80211_add_station(struct wiphy *wiphy, struct net_device *dev, if (params->sta_flags_set & BIT(NL80211_STA_FLAG_TDLS_PEER)) sta->sta.tdls = true; - if (sta->sta.tdls && sdata->vif.type == NL80211_IFTYPE_STATION && - !sdata->u.mgd.associated) - return -EINVAL; - err = sta_apply_parameters(local, sta, params); if (err) { sta_info_free(local, sta); -- cgit v1.2.3 From b67fd72e84a88cae64cea8ab47ccdaab3bb3094d Mon Sep 17 00:00:00 2001 From: Alexander Wetzel Date: Mon, 5 Aug 2019 14:34:00 +0200 Subject: cfg80211: Fix Extended Key ID key install checks Fix two shortcomings in the Extended Key ID API: 1) Allow the userspace to install pairwise keys using keyid 1 without NL80211_KEY_NO_TX set. This allows the userspace to install and activate pairwise keys with keyid 1 in the same way as for keyid 0, simplifying the API usage for e.g. FILS and FT key installs. 2) IEEE 802.11 - 2016 restricts Extended Key ID usage to CCMP/GCMP ciphers in IEEE 802.11 - 2016 "9.4.2.25.4 RSN capabilities". Enforce that when installing a key. Cc: stable@vger.kernel.org # 5.2 Fixes: 6cdd3979a2bd ("nl80211/cfg80211: Extended Key ID support") Signed-off-by: Alexander Wetzel Link: https://lore.kernel.org/r/20190805123400.51567-1-alexander@wetzel-home.de Signed-off-by: Johannes Berg --- net/wireless/util.c | 23 ++++++++++++++--------- 1 file changed, 14 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/net/wireless/util.c b/net/wireless/util.c index d0e35b7b9e35..e74837824cea 100644 --- a/net/wireless/util.c +++ b/net/wireless/util.c @@ -233,25 +233,30 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev, switch (params->cipher) { case WLAN_CIPHER_SUITE_TKIP: + /* Extended Key ID can only be used with CCMP/GCMP ciphers */ + if ((pairwise && key_idx) || + params->mode != NL80211_KEY_RX_TX) + return -EINVAL; + break; case WLAN_CIPHER_SUITE_CCMP: case WLAN_CIPHER_SUITE_CCMP_256: case WLAN_CIPHER_SUITE_GCMP: case WLAN_CIPHER_SUITE_GCMP_256: - /* IEEE802.11-2016 allows only 0 and - when using Extended Key - * ID - 1 as index for pairwise keys. + /* IEEE802.11-2016 allows only 0 and - when supporting + * Extended Key ID - 1 as index for pairwise keys. * @NL80211_KEY_NO_TX is only allowed for pairwise keys when * the driver supports Extended Key ID. * @NL80211_KEY_SET_TX can't be set when installing and * validating a key. */ - if (params->mode == NL80211_KEY_NO_TX) { - if (!wiphy_ext_feature_isset(&rdev->wiphy, - NL80211_EXT_FEATURE_EXT_KEY_ID)) - return -EINVAL; - else if (!pairwise || key_idx < 0 || key_idx > 1) + if ((params->mode == NL80211_KEY_NO_TX && !pairwise) || + params->mode == NL80211_KEY_SET_TX) + return -EINVAL; + if (wiphy_ext_feature_isset(&rdev->wiphy, + NL80211_EXT_FEATURE_EXT_KEY_ID)) { + if (pairwise && (key_idx < 0 || key_idx > 1)) return -EINVAL; - } else if ((pairwise && key_idx) || - params->mode == NL80211_KEY_SET_TX) { + } else if (pairwise && key_idx) { return -EINVAL; } break; -- cgit v1.2.3 From 0d31d4dbf38412f5b8b11b4511d07b840eebe8cb Mon Sep 17 00:00:00 2001 From: "Hodaszi, Robert" Date: Fri, 14 Jun 2019 13:16:01 +0000 Subject: Revert "cfg80211: fix processing world regdomain when non modular" This reverts commit 96cce12ff6e0 ("cfg80211: fix processing world regdomain when non modular"). Re-triggering a reg_process_hint with the last request on all events, can make the regulatory domain fail in case of multiple WiFi modules. On slower boards (espacially with mdev), enumeration of the WiFi modules can end up in an intersected regulatory domain, and user cannot set it with 'iw reg set' anymore. This is happening, because: - 1st module enumerates, queues up a regulatory request - request gets processed by __reg_process_hint_driver(): - checks if previous was set by CORE -> yes - checks if regulator domain changed -> yes, from '00' to e.g. 'US' -> sends request to the 'crda' - 2nd module enumerates, queues up a regulator request (which triggers the reg_todo() work) - reg_todo() -> reg_process_pending_hints() sees, that the last request is not processed yet, so it tries to process it again. __reg_process_hint driver() will run again, and: - checks if the last request's initiator was the core -> no, it was the driver (1st WiFi module) - checks, if the previous initiator was the driver -> yes - checks if the regulator domain changed -> yes, it was '00' (set by core, and crda call did not return yet), and should be changed to 'US' ------> __reg_process_hint_driver calls an intersect Besides, the reg_process_hint call with the last request is meaningless since the crda call has a timeout work. If that timeout expires, the first module's request will lost. Cc: stable@vger.kernel.org Fixes: 96cce12ff6e0 ("cfg80211: fix processing world regdomain when non modular") Signed-off-by: Robert Hodaszi Link: https://lore.kernel.org/r/20190614131600.GA13897@a1-hr Signed-off-by: Johannes Berg --- net/wireless/reg.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/wireless/reg.c b/net/wireless/reg.c index 4831ad745f91..327479ce69f5 100644 --- a/net/wireless/reg.c +++ b/net/wireless/reg.c @@ -2788,7 +2788,7 @@ static void reg_process_pending_hints(void) /* When last_request->processed becomes true this will be rescheduled */ if (lr && !lr->processed) { - reg_process_hint(lr); + pr_debug("Pending regulatory request, waiting for it to be processed...\n"); return; } -- cgit v1.2.3 From f17f7648a49aa6728649ddf79bdbcac4f1970ce4 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 20 Aug 2019 10:19:47 +0800 Subject: ipv6/addrconf: allow adding multicast addr if IFA_F_MCAUTOJOIN is set In commit 93a714d6b53d ("multicast: Extend ip address command to enable multicast group join/leave on") we added a new flag IFA_F_MCAUTOJOIN to make user able to add multicast address on ethernet interface. This works for IPv4, but not for IPv6. See the inet6_addr_add code. static int inet6_addr_add() { ... if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, true...) } ifp = ipv6_add_addr(idev, cfg, true, extack); <- always fail with maddr if (!IS_ERR(ifp)) { ... } else if (cfg->ifa_flags & IFA_F_MCAUTOJOIN) { ipv6_mc_config(net->ipv6.mc_autojoin_sk, false...) } } But in ipv6_add_addr() it will check the address type and reject multicast address directly. So this feature is never worked for IPv6. We should not remove the multicast address check totally in ipv6_add_addr(), but could accept multicast address only when IFA_F_MCAUTOJOIN flag supplied. v2: update commit description Fixes: 93a714d6b53d ("multicast: Extend ip address command to enable multicast group join/leave on") Reported-by: Jianlin Shi Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index dc73888c7859..ced995f3fec4 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -1045,7 +1045,8 @@ ipv6_add_addr(struct inet6_dev *idev, struct ifa6_config *cfg, int err = 0; if (addr_type == IPV6_ADDR_ANY || - addr_type & IPV6_ADDR_MULTICAST || + (addr_type & IPV6_ADDR_MULTICAST && + !(cfg->ifa_flags & IFA_F_MCAUTOJOIN)) || (!(idev->dev->flags & IFF_LOOPBACK) && !netif_is_l3_master(idev->dev) && addr_type & IPV6_ADDR_LOOPBACK)) -- cgit v1.2.3 From 0f404bbdaf1624f4d25dd67da7ff85eab005beac Mon Sep 17 00:00:00 2001 From: Li RongQing Date: Tue, 20 Aug 2019 10:46:00 +0800 Subject: net: fix icmp_socket_deliver argument 2 input it expects a unsigned int, but got a __be32 Signed-off-by: Li RongQing Signed-off-by: Zhang Yu Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index 1510e951f451..bf7b5d45de99 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -902,7 +902,7 @@ static bool icmp_redirect(struct sk_buff *skb) return false; } - icmp_socket_deliver(skb, icmp_hdr(skb)->un.gateway); + icmp_socket_deliver(skb, ntohl(icmp_hdr(skb)->un.gateway)); return true; } -- cgit v1.2.3 From f6edbf2d616435cda7823942c20005ce198e97c8 Mon Sep 17 00:00:00 2001 From: "Justin.Lee1@Dell.com" Date: Wed, 21 Aug 2019 21:24:52 +0000 Subject: net/ncsi: Fix the payload copying for the request coming from Netlink The request coming from Netlink should use the OEM generic handler. The standard command handler expects payload in bytes/words/dwords but the actual payload is stored in data if the request is coming from Netlink. Signed-off-by: Justin Lee Reviewed-by: Vijay Khemka Signed-off-by: David S. Miller --- net/ncsi/ncsi-cmd.c | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ncsi/ncsi-cmd.c b/net/ncsi/ncsi-cmd.c index eab4346b0a39..0187e65176c0 100644 --- a/net/ncsi/ncsi-cmd.c +++ b/net/ncsi/ncsi-cmd.c @@ -309,14 +309,21 @@ static struct ncsi_request *ncsi_alloc_command(struct ncsi_cmd_arg *nca) int ncsi_xmit_cmd(struct ncsi_cmd_arg *nca) { + struct ncsi_cmd_handler *nch = NULL; struct ncsi_request *nr; + unsigned char type; struct ethhdr *eh; - struct ncsi_cmd_handler *nch = NULL; int i, ret; + /* Use OEM generic handler for Netlink request */ + if (nca->req_flags == NCSI_REQ_FLAG_NETLINK_DRIVEN) + type = NCSI_PKT_CMD_OEM; + else + type = nca->type; + /* Search for the handler */ for (i = 0; i < ARRAY_SIZE(ncsi_cmd_handlers); i++) { - if (ncsi_cmd_handlers[i].type == nca->type) { + if (ncsi_cmd_handlers[i].type == type) { if (ncsi_cmd_handlers[i].handler) nch = &ncsi_cmd_handlers[i]; else -- cgit v1.2.3 From db0b99f59ae4d934a0af1a5670706d7c2a4b58ea Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 23 Aug 2019 15:44:36 +0200 Subject: ipv6: propagate ipv6_add_dev's error returns out of ipv6_find_idev Currently, ipv6_find_idev returns NULL when ipv6_add_dev fails, ignoring the specific error value. This results in addrconf_add_dev returning ENOBUFS in all cases, which is unfortunate in cases such as: # ip link add dummyX type dummy # ip link set dummyX mtu 1200 up # ip addr add 2000::/64 dev dummyX RTNETLINK answers: No buffer space available Commit a317a2f19da7 ("ipv6: fail early when creating netdev named all or default") introduced error returns in ipv6_add_dev. Before that, that function would simply return NULL for all failures. Signed-off-by: Sabrina Dubroca Signed-off-by: David S. Miller --- net/ipv6/addrconf.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index ced995f3fec4..6a576ff92c39 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -478,7 +478,7 @@ static struct inet6_dev *ipv6_find_idev(struct net_device *dev) if (!idev) { idev = ipv6_add_dev(dev); if (IS_ERR(idev)) - return NULL; + return idev; } if (dev->flags&IFF_UP) @@ -2466,8 +2466,8 @@ static struct inet6_dev *addrconf_add_dev(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (!idev) - return ERR_PTR(-ENOBUFS); + if (IS_ERR(idev)) + return idev; if (idev->cnf.disable_ipv6) return ERR_PTR(-EACCES); @@ -3159,7 +3159,7 @@ static void init_loopback(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (!idev) { + if (IS_ERR(idev)) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -3374,7 +3374,7 @@ static void addrconf_sit_config(struct net_device *dev) */ idev = ipv6_find_idev(dev); - if (!idev) { + if (IS_ERR(idev)) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -3399,7 +3399,7 @@ static void addrconf_gre_config(struct net_device *dev) ASSERT_RTNL(); idev = ipv6_find_idev(dev); - if (!idev) { + if (IS_ERR(idev)) { pr_debug("%s: add_dev failed\n", __func__); return; } @@ -4773,8 +4773,8 @@ inet6_rtm_newaddr(struct sk_buff *skb, struct nlmsghdr *nlh, IFA_F_MCAUTOJOIN | IFA_F_OPTIMISTIC; idev = ipv6_find_idev(dev); - if (!idev) - return -ENOBUFS; + if (IS_ERR(idev)) + return PTR_ERR(idev); if (!ipv6_allow_optimistic_dad(net, idev)) cfg.ifa_flags &= ~IFA_F_OPTIMISTIC; -- cgit v1.2.3 From db38de39684dda2bf307f41797db2831deba64e9 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Wed, 21 Aug 2019 14:17:20 +0200 Subject: flow_dissector: Fix potential use-after-free on BPF_PROG_DETACH Call to bpf_prog_put(), with help of call_rcu(), queues an RCU-callback to free the program once a grace period has elapsed. The callback can run together with new RCU readers that started after the last grace period. New RCU readers can potentially see the "old" to-be-freed or already-freed pointer to the program object before the RCU update-side NULLs it. Reorder the operations so that the RCU update-side resets the protected pointer before the end of the grace period after which the program will be freed. Fixes: d58e468b1112 ("flow_dissector: implements flow dissector BPF hook") Reported-by: Lorenz Bauer Signed-off-by: Jakub Sitnicki Acked-by: Petar Penkov Signed-off-by: Daniel Borkmann --- net/core/flow_dissector.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c index 3e6fedb57bc1..2470b4b404e6 100644 --- a/net/core/flow_dissector.c +++ b/net/core/flow_dissector.c @@ -142,8 +142,8 @@ int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr) mutex_unlock(&flow_dissector_mutex); return -ENOENT; } - bpf_prog_put(attached); RCU_INIT_POINTER(net->flow_dissector_prog, NULL); + bpf_prog_put(attached); mutex_unlock(&flow_dissector_mutex); return 0; } -- cgit v1.2.3 From 2c238177bd7f4b14bdf7447cc1cd9bb791f147e6 Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Tue, 20 Aug 2019 17:50:25 +0200 Subject: bpf: allow narrow loads of some sk_reuseport_md fields with offset > 0 test_select_reuseport fails on s390 due to verifier rejecting test_select_reuseport_kern.o with the following message: ; data_check.eth_protocol = reuse_md->eth_protocol; 18: (69) r1 = *(u16 *)(r6 +22) invalid bpf_context access off=22 size=2 This is because on big-endian machines casts from __u32 to __u16 are generated by referencing the respective variable as __u16 with an offset of 2 (as opposed to 0 on little-endian machines). The verifier already has all the infrastructure in place to allow such accesses, it's just that they are not explicitly enabled for eth_protocol field. Enable them for eth_protocol field by using bpf_ctx_range instead of offsetof. Ditto for ip_protocol, bind_inany and len, since they already allow narrowing, and the same problem can arise when working with them. Fixes: 2dbb9b9e6df6 ("bpf: Introduce BPF_PROG_TYPE_SK_REUSEPORT") Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann --- net/core/filter.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/core/filter.c b/net/core/filter.c index 7878f918b8c0..4c6a252d4212 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -8757,13 +8757,13 @@ sk_reuseport_is_valid_access(int off, int size, return size == size_default; /* Fields that allow narrowing */ - case offsetof(struct sk_reuseport_md, eth_protocol): + case bpf_ctx_range(struct sk_reuseport_md, eth_protocol): if (size < FIELD_SIZEOF(struct sk_buff, protocol)) return false; /* fall through */ - case offsetof(struct sk_reuseport_md, ip_protocol): - case offsetof(struct sk_reuseport_md, bind_inany): - case offsetof(struct sk_reuseport_md, len): + case bpf_ctx_range(struct sk_reuseport_md, ip_protocol): + case bpf_ctx_range(struct sk_reuseport_md, bind_inany): + case bpf_ctx_range(struct sk_reuseport_md, len): bpf_ctx_record_field_size(info, size_default); return bpf_ctx_narrow_access_ok(off, size, size_default); -- cgit v1.2.3 From 12c6bc38f99bb168b7f16bdb5e855a51a23ee9ec Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Wed, 21 Aug 2019 17:16:10 -0700 Subject: openvswitch: Fix log message in ovs conntrack Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action") Signed-off-by: Yi-Hung Wei Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index 848c6eb55064..a1852e035ebb 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1565,7 +1565,7 @@ static int parse_ct(const struct nlattr *attr, struct ovs_conntrack_info *info, case OVS_CT_ATTR_TIMEOUT: memcpy(info->timeout, nla_data(a), nla_len(a)); if (!memchr(info->timeout, '\0', nla_len(a))) { - OVS_NLERR(log, "Invalid conntrack helper"); + OVS_NLERR(log, "Invalid conntrack timeout"); return -EINVAL; } break; -- cgit v1.2.3 From e2c693934194fd3b4e795635934883354c06ebc9 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 22 Aug 2019 22:19:48 +0800 Subject: ipv4/icmp: fix rt dst dev null pointer dereference In __icmp_send() there is a possibility that the rt->dst.dev is NULL, e,g, with tunnel collect_md mode, which will cause kernel crash. Here is what the code path looks like, for GRE: - ip6gre_tunnel_xmit - ip6gre_xmit_ipv4 - __gre6_xmit - ip6_tnl_xmit - if skb->len - t->tun_hlen - eth_hlen > mtu; return -EMSGSIZE - icmp_send - net = dev_net(rt->dst.dev); <-- here The reason is __metadata_dst_init() init dst->dev to NULL by default. We could not fix it in __metadata_dst_init() as there is no dev supplied. On the other hand, the reason we need rt->dst.dev is to get the net. So we can just try get it from skb->dev when rt->dst.dev is NULL. v4: Julian Anastasov remind skb->dev also could be NULL. We'd better still use dst.dev and do a check to avoid crash. v3: No changes. v2: fix the issue in __icmp_send() instead of updating shared dst dev in {ip_md, ip6}_tunnel_xmit. Fixes: c8b34e680a09 ("ip_tunnel: Add tnl_update_pmtu in ip_md_tunnel_xmit") Signed-off-by: Hangbin Liu Reviewed-by: Julian Anastasov Acked-by: Jonathan Lemon Signed-off-by: David S. Miller --- net/ipv4/icmp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c index bf7b5d45de99..4298aae74e0e 100644 --- a/net/ipv4/icmp.c +++ b/net/ipv4/icmp.c @@ -582,7 +582,13 @@ void __icmp_send(struct sk_buff *skb_in, int type, int code, __be32 info, if (!rt) goto out; - net = dev_net(rt->dst.dev); + + if (rt->dst.dev) + net = dev_net(rt->dst.dev); + else if (skb_in->dev) + net = dev_net(skb_in->dev); + else + goto out; /* * Find the original header. It is expected to be valid, of course. -- cgit v1.2.3 From c3b4c3a47e05d5fecf7354d75824a9d1b37f3e84 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Thu, 22 Aug 2019 22:19:49 +0800 Subject: xfrm/xfrm_policy: fix dst dev null pointer dereference in collect_md mode In decode_session{4,6} there is a possibility that the skb dst dev is NULL, e,g, with tunnel collect_md mode, which will cause kernel crash. Here is what the code path looks like, for GRE: - ip6gre_tunnel_xmit - ip6gre_xmit_ipv6 - __gre6_xmit - ip6_tnl_xmit - if skb->len - t->tun_hlen - eth_hlen > mtu; return -EMSGSIZE - icmpv6_send - icmpv6_route_lookup - xfrm_decode_session_reverse - decode_session4 - oif = skb_dst(skb)->dev->ifindex; <-- here - decode_session6 - oif = skb_dst(skb)->dev->ifindex; <-- here The reason is __metadata_dst_init() init dst->dev to NULL by default. We could not fix it in __metadata_dst_init() as there is no dev supplied. On the other hand, the skb_dst(skb)->dev is actually not needed as we called decode_session{4,6} via xfrm_decode_session_reverse(), so oif is not used by: fl4->flowi4_oif = reverse ? skb->skb_iif : oif; So make a dst dev check here should be clean and safe. v4: No changes. v3: No changes. v2: fix the issue in decode_session{4,6} instead of updating shared dst dev in {ip_md, ip6}_tunnel_xmit. Fixes: 8d79266bc48c ("ip6_tunnel: add collect_md mode to IPv6 tunnels") Signed-off-by: Hangbin Liu Tested-by: Jonathan Lemon Signed-off-by: David S. Miller --- net/xfrm/xfrm_policy.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index 8ca637a72697..ec94f5795ea4 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -3269,7 +3269,7 @@ decode_session4(struct sk_buff *skb, struct flowi *fl, bool reverse) struct flowi4 *fl4 = &fl->u.ip4; int oif = 0; - if (skb_dst(skb)) + if (skb_dst(skb) && skb_dst(skb)->dev) oif = skb_dst(skb)->dev->ifindex; memset(fl4, 0, sizeof(struct flowi4)); @@ -3387,7 +3387,7 @@ decode_session6(struct sk_buff *skb, struct flowi *fl, bool reverse) nexthdr = nh[nhoff]; - if (skb_dst(skb)) + if (skb_dst(skb) && skb_dst(skb)->dev) oif = skb_dst(skb)->dev->ifindex; memset(fl6, 0, sizeof(struct flowi6)); -- cgit v1.2.3 From b45ce32135d1c82a5bf12aa56957c3fd27956057 Mon Sep 17 00:00:00 2001 From: zhanglin Date: Fri, 23 Aug 2019 09:14:11 +0800 Subject: sock: fix potential memory leak in proto_register() If protocols registered exceeded PROTO_INUSE_NR, prot will be added to proto_list, but no available bit left for prot in proto_inuse_idx. Changes since v2: * Propagate the error code properly Signed-off-by: zhanglin Signed-off-by: David S. Miller --- net/core/sock.c | 31 +++++++++++++++++++++---------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/core/sock.c b/net/core/sock.c index 6d08553f885c..545fac19a711 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3287,16 +3287,17 @@ static __init int net_inuse_init(void) core_initcall(net_inuse_init); -static void assign_proto_idx(struct proto *prot) +static int assign_proto_idx(struct proto *prot) { prot->inuse_idx = find_first_zero_bit(proto_inuse_idx, PROTO_INUSE_NR); if (unlikely(prot->inuse_idx == PROTO_INUSE_NR - 1)) { pr_err("PROTO_INUSE_NR exhausted\n"); - return; + return -ENOSPC; } set_bit(prot->inuse_idx, proto_inuse_idx); + return 0; } static void release_proto_idx(struct proto *prot) @@ -3305,8 +3306,9 @@ static void release_proto_idx(struct proto *prot) clear_bit(prot->inuse_idx, proto_inuse_idx); } #else -static inline void assign_proto_idx(struct proto *prot) +static inline int assign_proto_idx(struct proto *prot) { + return 0; } static inline void release_proto_idx(struct proto *prot) @@ -3355,6 +3357,8 @@ static int req_prot_init(const struct proto *prot) int proto_register(struct proto *prot, int alloc_slab) { + int ret = -ENOBUFS; + if (alloc_slab) { prot->slab = kmem_cache_create_usercopy(prot->name, prot->obj_size, 0, @@ -3391,20 +3395,27 @@ int proto_register(struct proto *prot, int alloc_slab) } mutex_lock(&proto_list_mutex); + ret = assign_proto_idx(prot); + if (ret) { + mutex_unlock(&proto_list_mutex); + goto out_free_timewait_sock_slab_name; + } list_add(&prot->node, &proto_list); - assign_proto_idx(prot); mutex_unlock(&proto_list_mutex); - return 0; + return ret; out_free_timewait_sock_slab_name: - kfree(prot->twsk_prot->twsk_slab_name); + if (alloc_slab && prot->twsk_prot) + kfree(prot->twsk_prot->twsk_slab_name); out_free_request_sock_slab: - req_prot_cleanup(prot->rsk_prot); + if (alloc_slab) { + req_prot_cleanup(prot->rsk_prot); - kmem_cache_destroy(prot->slab); - prot->slab = NULL; + kmem_cache_destroy(prot->slab); + prot->slab = NULL; + } out: - return -ENOBUFS; + return ret; } EXPORT_SYMBOL(proto_register); -- cgit v1.2.3 From e93fb3e9521abffadb8f965c591a290cdd92b56c Mon Sep 17 00:00:00 2001 From: John Fastabend Date: Fri, 23 Aug 2019 17:11:38 -0700 Subject: net: route dump netlink NLM_F_MULTI flag missing An excerpt from netlink(7) man page, In multipart messages (multiple nlmsghdr headers with associated payload in one byte stream) the first and all following headers have the NLM_F_MULTI flag set, except for the last header which has the type NLMSG_DONE. but, after (ee28906) there is a missing NLM_F_MULTI flag in the middle of a FIB dump. The result is user space applications following above man page excerpt may get confused and may stop parsing msg believing something went wrong. In the golang netlink lib [0] the library logic stops parsing believing the message is not a multipart message. Found this running Cilium[1] against net-next while adding a feature to auto-detect routes. I noticed with multiple route tables we no longer could detect the default routes on net tree kernels because the library logic was not returning them. Fix this by handling the fib_dump_info_fnhe() case the same way the fib_dump_info() handles it by passing the flags argument through the call chain and adding a flags argument to rt_fill_info(). Tested with Cilium stack and auto-detection of routes works again. Also annotated libs to dump netlink msgs and inspected NLM_F_MULTI and NLMSG_DONE flags look correct after this. Note: In inet_rtm_getroute() pass rt_fill_info() '0' for flags the same as is done for fib_dump_info() so this looks correct to me. [0] https://github.com/vishvananda/netlink/ [1] https://github.com/cilium/ Fixes: ee28906fd7a14 ("ipv4: Dump route exceptions if requested") Signed-off-by: John Fastabend Reviewed-by: Stefano Brivio Signed-off-by: David S. Miller --- include/net/route.h | 2 +- net/ipv4/fib_trie.c | 2 +- net/ipv4/route.c | 17 ++++++++++------- 3 files changed, 12 insertions(+), 9 deletions(-) (limited to 'net') diff --git a/include/net/route.h b/include/net/route.h index 630a0493f1f3..dfce19c9fa96 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -233,7 +233,7 @@ void rt_del_uncached_list(struct rtable *rt); int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fib_info *fi, - int *fa_index, int fa_start); + int *fa_index, int fa_start, unsigned int flags); static inline void ip_rt_put(struct rtable *rt) { diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c index 2b2b3d291ab0..1ab2fb6bb37d 100644 --- a/net/ipv4/fib_trie.c +++ b/net/ipv4/fib_trie.c @@ -2145,7 +2145,7 @@ static int fn_trie_dump_leaf(struct key_vector *l, struct fib_table *tb, if (filter->dump_exceptions) { err = fib_dump_info_fnhe(skb, cb, tb->tb_id, fi, - &i_fa, s_fa); + &i_fa, s_fa, flags); if (err < 0) goto stop; } diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 517300d587a7..b6a6f18c3dd1 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -2728,7 +2728,8 @@ EXPORT_SYMBOL_GPL(ip_route_output_flow); /* called with rcu_read_lock held */ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, struct rtable *rt, u32 table_id, struct flowi4 *fl4, - struct sk_buff *skb, u32 portid, u32 seq) + struct sk_buff *skb, u32 portid, u32 seq, + unsigned int flags) { struct rtmsg *r; struct nlmsghdr *nlh; @@ -2736,7 +2737,7 @@ static int rt_fill_info(struct net *net, __be32 dst, __be32 src, u32 error; u32 metrics[RTAX_MAX]; - nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), 0); + nlh = nlmsg_put(skb, portid, seq, RTM_NEWROUTE, sizeof(*r), flags); if (!nlh) return -EMSGSIZE; @@ -2860,7 +2861,7 @@ nla_put_failure: static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fnhe_hash_bucket *bucket, int genid, - int *fa_index, int fa_start) + int *fa_index, int fa_start, unsigned int flags) { int i; @@ -2891,7 +2892,7 @@ static int fnhe_dump_bucket(struct net *net, struct sk_buff *skb, err = rt_fill_info(net, fnhe->fnhe_daddr, 0, rt, table_id, NULL, skb, NETLINK_CB(cb->skb).portid, - cb->nlh->nlmsg_seq); + cb->nlh->nlmsg_seq, flags); if (err) return err; next: @@ -2904,7 +2905,7 @@ next: int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, u32 table_id, struct fib_info *fi, - int *fa_index, int fa_start) + int *fa_index, int fa_start, unsigned int flags) { struct net *net = sock_net(cb->skb->sk); int nhsel, genid = fnhe_genid(net); @@ -2922,7 +2923,8 @@ int fib_dump_info_fnhe(struct sk_buff *skb, struct netlink_callback *cb, err = 0; if (bucket) err = fnhe_dump_bucket(net, skb, cb, table_id, bucket, - genid, fa_index, fa_start); + genid, fa_index, fa_start, + flags); rcu_read_unlock(); if (err) return err; @@ -3183,7 +3185,8 @@ static int inet_rtm_getroute(struct sk_buff *in_skb, struct nlmsghdr *nlh, fl4.flowi4_tos, res.fi, 0); } else { err = rt_fill_info(net, dst, src, rt, table_id, &fl4, skb, - NETLINK_CB(in_skb).portid, nlh->nlmsg_seq); + NETLINK_CB(in_skb).portid, + nlh->nlmsg_seq, 0); } if (err < 0) goto errout_rcu; -- cgit v1.2.3 From e0e6d062822529dbe9be21939359b0d1e065bb0f Mon Sep 17 00:00:00 2001 From: Zhu Yanjun Date: Fri, 23 Aug 2019 21:04:16 -0400 Subject: net: rds: add service level support in rds-info >From IB specific 7.6.5 SERVICE LEVEL, Service Level (SL) is used to identify different flows within an IBA subnet. It is carried in the local route header of the packet. Before this commit, run "rds-info -I". The outputs are as below: " RDS IB Connections: LocalAddr RemoteAddr Tos SL LocalDev RemoteDev 192.2.95.3 192.2.95.1 2 0 fe80::21:28:1a:39 fe80::21:28:10:b9 192.2.95.3 192.2.95.1 1 0 fe80::21:28:1a:39 fe80::21:28:10:b9 192.2.95.3 192.2.95.1 0 0 fe80::21:28:1a:39 fe80::21:28:10:b9 " After this commit, the output is as below: " RDS IB Connections: LocalAddr RemoteAddr Tos SL LocalDev RemoteDev 192.2.95.3 192.2.95.1 2 2 fe80::21:28:1a:39 fe80::21:28:10:b9 192.2.95.3 192.2.95.1 1 1 fe80::21:28:1a:39 fe80::21:28:10:b9 192.2.95.3 192.2.95.1 0 0 fe80::21:28:1a:39 fe80::21:28:10:b9 " The commit fe3475af3bdf ("net: rds: add per rds connection cache statistics") adds cache_allocs in struct rds_info_rdma_connection as below: struct rds_info_rdma_connection { ... __u32 rdma_mr_max; __u32 rdma_mr_size; __u8 tos; __u32 cache_allocs; }; The peer struct in rds-tools of struct rds_info_rdma_connection is as below: struct rds_info_rdma_connection { ... uint32_t rdma_mr_max; uint32_t rdma_mr_size; uint8_t tos; uint8_t sl; uint32_t cache_allocs; }; The difference between userspace and kernel is the member variable sl. In the kernel struct, the member variable sl is missing. This will introduce risks. So it is necessary to use this commit to avoid this risk. Fixes: fe3475af3bdf ("net: rds: add per rds connection cache statistics") CC: Joe Jin CC: JUNXIAO_BI Suggested-by: Gerd Rausch Signed-off-by: Zhu Yanjun Acked-by: Santosh Shilimkar Signed-off-by: David S. Miller --- include/uapi/linux/rds.h | 2 ++ net/rds/ib.c | 16 ++++++++++------ net/rds/ib.h | 1 + net/rds/ib_cm.c | 3 +++ net/rds/rdma_transport.c | 10 ++++++++-- 5 files changed, 24 insertions(+), 8 deletions(-) (limited to 'net') diff --git a/include/uapi/linux/rds.h b/include/uapi/linux/rds.h index fd6b5f66e2c5..cba368e55863 100644 --- a/include/uapi/linux/rds.h +++ b/include/uapi/linux/rds.h @@ -250,6 +250,7 @@ struct rds_info_rdma_connection { __u32 rdma_mr_max; __u32 rdma_mr_size; __u8 tos; + __u8 sl; __u32 cache_allocs; }; @@ -265,6 +266,7 @@ struct rds6_info_rdma_connection { __u32 rdma_mr_max; __u32 rdma_mr_size; __u8 tos; + __u8 sl; __u32 cache_allocs; }; diff --git a/net/rds/ib.c b/net/rds/ib.c index ec05d91aa9a2..45acab2de0cf 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -291,7 +291,7 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) { struct rds_info_rdma_connection *iinfo = buffer; - struct rds_ib_connection *ic; + struct rds_ib_connection *ic = conn->c_transport_data; /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) @@ -301,15 +301,16 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn, iinfo->src_addr = conn->c_laddr.s6_addr32[3]; iinfo->dst_addr = conn->c_faddr.s6_addr32[3]; - iinfo->tos = conn->c_tos; + if (ic) { + iinfo->tos = conn->c_tos; + iinfo->sl = ic->i_sl; + } memset(&iinfo->src_gid, 0, sizeof(iinfo->src_gid)); memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid)); if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; - ic = conn->c_transport_data; - rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid, (union ib_gid *)&iinfo->dst_gid); @@ -329,7 +330,7 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, void *buffer) { struct rds6_info_rdma_connection *iinfo6 = buffer; - struct rds_ib_connection *ic; + struct rds_ib_connection *ic = conn->c_transport_data; /* We will only ever look at IB transports */ if (conn->c_trans != &rds_ib_transport) @@ -337,6 +338,10 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, iinfo6->src_addr = conn->c_laddr; iinfo6->dst_addr = conn->c_faddr; + if (ic) { + iinfo6->tos = conn->c_tos; + iinfo6->sl = ic->i_sl; + } memset(&iinfo6->src_gid, 0, sizeof(iinfo6->src_gid)); memset(&iinfo6->dst_gid, 0, sizeof(iinfo6->dst_gid)); @@ -344,7 +349,6 @@ static int rds6_ib_conn_info_visitor(struct rds_connection *conn, if (rds_conn_state(conn) == RDS_CONN_UP) { struct rds_ib_device *rds_ibdev; - ic = conn->c_transport_data; rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo6->src_gid, (union ib_gid *)&iinfo6->dst_gid); rds_ibdev = ic->rds_ibdev; diff --git a/net/rds/ib.h b/net/rds/ib.h index 303c6ee8bdb7..f2b558e8b5ea 100644 --- a/net/rds/ib.h +++ b/net/rds/ib.h @@ -220,6 +220,7 @@ struct rds_ib_connection { /* Send/Recv vectors */ int i_scq_vector; int i_rcq_vector; + u8 i_sl; }; /* This assumes that atomic_t is at least 32 bits */ diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index fddaa09f7b0d..233f1368162b 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -152,6 +152,9 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even RDS_PROTOCOL_MINOR(conn->c_version), ic->i_flowctl ? ", flow control" : ""); + /* receive sl from the peer */ + ic->i_sl = ic->i_cm_id->route.path_rec->sl; + atomic_set(&ic->i_cq_quiesce, 0); /* Init rings and fill recv. this needs to wait until protocol diff --git a/net/rds/rdma_transport.c b/net/rds/rdma_transport.c index 9986d6065c4d..5f741e51b4ba 100644 --- a/net/rds/rdma_transport.c +++ b/net/rds/rdma_transport.c @@ -43,6 +43,9 @@ static struct rdma_cm_id *rds_rdma_listen_id; static struct rdma_cm_id *rds6_rdma_listen_id; #endif +/* Per IB specification 7.7.3, service level is a 4-bit field. */ +#define TOS_TO_SL(tos) ((tos) & 0xF) + static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, struct rdma_cm_event *event, bool isv6) @@ -97,10 +100,13 @@ static int rds_rdma_cm_event_handler_cmn(struct rdma_cm_id *cm_id, struct rds_ib_connection *ibic; ibic = conn->c_transport_data; - if (ibic && ibic->i_cm_id == cm_id) + if (ibic && ibic->i_cm_id == cm_id) { + cm_id->route.path_rec[0].sl = + TOS_TO_SL(conn->c_tos); ret = trans->cm_initiate_connect(cm_id, isv6); - else + } else { rds_conn_drop(conn); + } } break; -- cgit v1.2.3 From 803f3e22ae10003a83c781498c0ac34cfe3463ff Mon Sep 17 00:00:00 2001 From: Alexey Kodanev Date: Fri, 23 Aug 2019 20:51:43 +0300 Subject: ipv4: mpls: fix mpls_xmit for iptunnel When using mpls over gre/gre6 setup, rt->rt_gw4 address is not set, the same for rt->rt_gw_family. Therefore, when rt->rt_gw_family is checked in mpls_xmit(), neigh_xmit() call is skipped. As a result, such setup doesn't work anymore. This issue was found with LTP mpls03 tests. Fixes: 1550c171935d ("ipv4: Prepare rtable for IPv6 gateway") Signed-off-by: Alexey Kodanev Reviewed-by: David Ahern Signed-off-by: David S. Miller --- net/mpls/mpls_iptunnel.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net') diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index d25e91d7bdc1..44b675016393 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -133,12 +133,12 @@ static int mpls_xmit(struct sk_buff *skb) mpls_stats_inc_outucastpkts(out_dev, skb); if (rt) { - if (rt->rt_gw_family == AF_INET) - err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4, - skb); - else if (rt->rt_gw_family == AF_INET6) + if (rt->rt_gw_family == AF_INET6) err = neigh_xmit(NEIGH_ND_TABLE, out_dev, &rt->rt_gw6, skb); + else + err = neigh_xmit(NEIGH_ARP_TABLE, out_dev, &rt->rt_gw4, + skb); } else if (rt6) { if (ipv6_addr_v4mapped(&rt6->rt6i_gateway)) { /* 6PE (RFC 4798) */ -- cgit v1.2.3 From 7177895154e6a35179d332f4a584d396c50d0612 Mon Sep 17 00:00:00 2001 From: Yi-Hung Wei Date: Thu, 22 Aug 2019 13:17:50 -0700 Subject: openvswitch: Fix conntrack cache with timeout This patch addresses a conntrack cache issue with timeout policy. Currently, we do not check if the timeout extension is set properly in the cached conntrack entry. Thus, after packet recirculate from conntrack action, the timeout policy is not applied properly. This patch fixes the aforementioned issue. Fixes: 06bd2bdf19d2 ("openvswitch: Add timeout support to ct action") Reported-by: kbuild test robot Signed-off-by: Yi-Hung Wei Acked-by: Pravin B Shelar Signed-off-by: David S. Miller --- net/openvswitch/conntrack.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'net') diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index a1852e035ebb..d8da6477d6be 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -67,6 +67,7 @@ struct ovs_conntrack_info { struct md_mark mark; struct md_labels labels; char timeout[CTNL_TIMEOUT_NAME_MAX]; + struct nf_ct_timeout *nf_ct_timeout; #if IS_ENABLED(CONFIG_NF_NAT) struct nf_nat_range2 range; /* Only present for SRC NAT and DST NAT. */ #endif @@ -697,6 +698,14 @@ static bool skb_nfct_cached(struct net *net, if (help && rcu_access_pointer(help->helper) != info->helper) return false; } + if (info->nf_ct_timeout) { + struct nf_conn_timeout *timeout_ext; + + timeout_ext = nf_ct_timeout_find(ct); + if (!timeout_ext || info->nf_ct_timeout != + rcu_dereference(timeout_ext->timeout)) + return false; + } /* Force conntrack entry direction to the current packet? */ if (info->force && CTINFO2DIR(ctinfo) != IP_CT_DIR_ORIGINAL) { /* Delete the conntrack entry if confirmed, else just release @@ -1657,6 +1666,10 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr, ct_info.timeout)) pr_info_ratelimited("Failed to associated timeout " "policy `%s'\n", ct_info.timeout); + else + ct_info.nf_ct_timeout = rcu_dereference( + nf_ct_timeout_find(ct_info.ct)->timeout); + } if (helper) { -- cgit v1.2.3