From efb796f5571f030743e1d9c662cdebdad724f8c5 Mon Sep 17 00:00:00 2001 From: Michal Kubecek Date: Mon, 14 Dec 2020 14:25:01 +0100 Subject: ethtool: fix string set id check Syzbot reported a shift of a u32 by more than 31 in strset_parse_request() which is undefined behavior. This is caused by range check of string set id using variable ret (which is always 0 at this point) instead of id (string set id from request). Fixes: 71921690f974 ("ethtool: provide string sets with STRSET_GET request") Reported-by: syzbot+96523fb438937cd01220@syzkaller.appspotmail.com Signed-off-by: Michal Kubecek Link: https://lore.kernel.org/r/b54ed5c5fd972a59afea3e1badfb36d86df68799.1607952208.git.mkubecek@suse.cz Signed-off-by: Jakub Kicinski --- net/ethtool/strset.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/ethtool/strset.c b/net/ethtool/strset.c index 0baad0ce1832..c3a5489964cd 100644 --- a/net/ethtool/strset.c +++ b/net/ethtool/strset.c @@ -182,7 +182,7 @@ static int strset_parse_request(struct ethnl_req_info *req_base, ret = strset_get_id(attr, &id, extack); if (ret < 0) return ret; - if (ret >= ETH_SS_COUNT) { + if (id >= ETH_SS_COUNT) { NL_SET_ERR_MSG_ATTR(extack, attr, "unknown string set id"); return -EOPNOTSUPP; -- cgit v1.2.3 From 7061eb8cfa902daa1ec71d23b5cddb8b4391e72b Mon Sep 17 00:00:00 2001 From: Lijun Pan Date: Mon, 14 Dec 2020 15:19:28 -0600 Subject: net: core: introduce __netdev_notify_peers There are some use cases for netdev_notify_peers in the context when rtnl lock is already held. Introduce lockless version of netdev_notify_peers call to save the extra code to call call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev); After that, convert netdev_notify_peers to call the new helper. Suggested-by: Nathan Lynch Signed-off-by: Lijun Pan Signed-off-by: Jakub Kicinski --- include/linux/netdevice.h | 1 + net/core/dev.c | 22 ++++++++++++++++++++-- 2 files changed, 21 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7bf167993c05..259be67644e3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4547,6 +4547,7 @@ void __dev_set_rx_mode(struct net_device *dev); int dev_set_promiscuity(struct net_device *dev, int inc); int dev_set_allmulti(struct net_device *dev, int inc); void netdev_state_change(struct net_device *dev); +void __netdev_notify_peers(struct net_device *dev); void netdev_notify_peers(struct net_device *dev); void netdev_features_change(struct net_device *dev); /* Load a device via the kmod */ diff --git a/net/core/dev.c b/net/core/dev.c index a46334906c94..8fa739259041 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -1456,6 +1456,25 @@ void netdev_state_change(struct net_device *dev) } EXPORT_SYMBOL(netdev_state_change); +/** + * __netdev_notify_peers - notify network peers about existence of @dev, + * to be called when rtnl lock is already held. + * @dev: network device + * + * Generate traffic such that interested network peers are aware of + * @dev, such as by generating a gratuitous ARP. This may be used when + * a device wants to inform the rest of the network about some sort of + * reconfiguration such as a failover event or virtual machine + * migration. + */ +void __netdev_notify_peers(struct net_device *dev) +{ + ASSERT_RTNL(); + call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); + call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev); +} +EXPORT_SYMBOL(__netdev_notify_peers); + /** * netdev_notify_peers - notify network peers about existence of @dev * @dev: network device @@ -1469,8 +1488,7 @@ EXPORT_SYMBOL(netdev_state_change); void netdev_notify_peers(struct net_device *dev) { rtnl_lock(); - call_netdevice_notifiers(NETDEV_NOTIFY_PEERS, dev); - call_netdevice_notifiers(NETDEV_RESEND_IGMP, dev); + __netdev_notify_peers(dev); rtnl_unlock(); } EXPORT_SYMBOL(netdev_notify_peers); -- cgit v1.2.3 From c32c928d29deb2636e5889f59305cc15b004909f Mon Sep 17 00:00:00 2001 From: Hoang Le Date: Tue, 15 Dec 2020 10:31:51 +0700 Subject: tipc: do sanity check payload of a netlink message When we initialize nlmsghdr with no payload inside tipc_nl_compat_dumpit() the parsing function returns -EINVAL. We fix it by making the parsing call conditional. Acked-by: Jon Maloy Signed-off-by: Hoang Le Link: https://lore.kernel.org/r/20201215033151.76139-1-hoang.h.le@dektech.com.au Signed-off-by: Jakub Kicinski --- net/tipc/netlink_compat.c | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'net') diff --git a/net/tipc/netlink_compat.c b/net/tipc/netlink_compat.c index 82f154989418..5a1ce64039f7 100644 --- a/net/tipc/netlink_compat.c +++ b/net/tipc/netlink_compat.c @@ -213,12 +213,14 @@ static int __tipc_nl_compat_dumpit(struct tipc_nl_compat_cmd_dump *cmd, } info.attrs = attrbuf; - err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf, - tipc_genl_family.maxattr, - tipc_genl_family.policy, NULL); - if (err) - goto err_out; + if (nlmsg_len(cb.nlh) > 0) { + err = nlmsg_parse_deprecated(cb.nlh, GENL_HDRLEN, attrbuf, + tipc_genl_family.maxattr, + tipc_genl_family.policy, NULL); + if (err) + goto err_out; + } do { int rem; -- cgit v1.2.3 From ef72cd3c5ce168829c6684ecb2cae047d3493690 Mon Sep 17 00:00:00 2001 From: Ivan Vecera Date: Tue, 15 Dec 2020 10:08:10 +0100 Subject: ethtool: fix error paths in ethnl_set_channels() Fix two error paths in ethnl_set_channels() to avoid lock-up caused but unreleased RTNL. Fixes: e19c591eafad ("ethtool: set device channel counts with CHANNELS_SET request") Reported-by: LiLiang Signed-off-by: Ivan Vecera Reviewed-by: Michal Kubecek Link: https://lore.kernel.org/r/20201215090810.801777-1-ivecera@redhat.com Signed-off-by: Jakub Kicinski --- net/ethtool/channels.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'net') diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c index 5635604cb9ba..25a9e566ef5c 100644 --- a/net/ethtool/channels.c +++ b/net/ethtool/channels.c @@ -194,8 +194,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) if (netif_is_rxfh_configured(dev) && !ethtool_get_max_rxfh_channel(dev, &max_rx_in_use) && (channels.combined_count + channels.rx_count) <= max_rx_in_use) { + ret = -EINVAL; GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing indirection table settings"); - return -EINVAL; + goto out_ops; } /* Disabling channels, query zero-copy AF_XDP sockets */ @@ -203,8 +204,9 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info) min(channels.rx_count, channels.tx_count); for (i = from_channel; i < old_total; i++) if (xsk_get_pool_from_qid(dev, i)) { + ret = -EINVAL; GENL_SET_ERR_MSG(info, "requested channel counts are too low for existing zerocopy AF_XDP sockets"); - return -EINVAL; + goto out_ops; } ret = dev->ethtool_ops->set_channels(dev, &channels); -- cgit v1.2.3 From 995433b795cec0a4ef6c8603e7642903c621943a Mon Sep 17 00:00:00 2001 From: Karsten Graul Date: Tue, 15 Dec 2020 10:10:58 +0100 Subject: net/smc: fix access to parent of an ib device The parent of an ib device is used to retrieve the PCI device attributes. It turns out that there are possible cases when an ib device has no parent set in the device structure, which may lead to page faults when trying to access this memory. Fix that by checking the parent pointer and consolidate the pci device specific processing in a new function. Fixes: a3db10efcc4c ("net/smc: Add support for obtaining SMCR device list") Reported-by: syzbot+600fef7c414ee7e2d71b@syzkaller.appspotmail.com Signed-off-by: Karsten Graul Link: https://lore.kernel.org/r/20201215091058.49354-2-kgraul@linux.ibm.com Signed-off-by: Jakub Kicinski --- net/smc/smc_ib.c | 36 +++++++++++++++++++++++------------- 1 file changed, 23 insertions(+), 13 deletions(-) (limited to 'net') diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c index 89ea10675a7d..ddd7fac98b1d 100644 --- a/net/smc/smc_ib.c +++ b/net/smc/smc_ib.c @@ -394,6 +394,22 @@ errout: return -EMSGSIZE; } +static bool smc_nl_handle_pci_values(const struct smc_pci_dev *smc_pci_dev, + struct sk_buff *skb) +{ + if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev->pci_fid)) + return false; + if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev->pci_pchid)) + return false; + if (nla_put_u16(skb, SMC_NLA_DEV_PCI_VENDOR, smc_pci_dev->pci_vendor)) + return false; + if (nla_put_u16(skb, SMC_NLA_DEV_PCI_DEVICE, smc_pci_dev->pci_device)) + return false; + if (nla_put_string(skb, SMC_NLA_DEV_PCI_ID, smc_pci_dev->pci_id)) + return false; + return true; +} + static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev, struct sk_buff *skb, struct netlink_callback *cb) @@ -417,19 +433,13 @@ static int smc_nl_handle_smcr_dev(struct smc_ib_device *smcibdev, is_crit = smcr_diag_is_dev_critical(&smc_lgr_list, smcibdev); if (nla_put_u8(skb, SMC_NLA_DEV_IS_CRIT, is_crit)) goto errattr; - memset(&smc_pci_dev, 0, sizeof(smc_pci_dev)); - pci_dev = to_pci_dev(smcibdev->ibdev->dev.parent); - smc_set_pci_values(pci_dev, &smc_pci_dev); - if (nla_put_u32(skb, SMC_NLA_DEV_PCI_FID, smc_pci_dev.pci_fid)) - goto errattr; - if (nla_put_u16(skb, SMC_NLA_DEV_PCI_CHID, smc_pci_dev.pci_pchid)) - goto errattr; - if (nla_put_u16(skb, SMC_NLA_DEV_PCI_VENDOR, smc_pci_dev.pci_vendor)) - goto errattr; - if (nla_put_u16(skb, SMC_NLA_DEV_PCI_DEVICE, smc_pci_dev.pci_device)) - goto errattr; - if (nla_put_string(skb, SMC_NLA_DEV_PCI_ID, smc_pci_dev.pci_id)) - goto errattr; + if (smcibdev->ibdev->dev.parent) { + memset(&smc_pci_dev, 0, sizeof(smc_pci_dev)); + pci_dev = to_pci_dev(smcibdev->ibdev->dev.parent); + smc_set_pci_values(pci_dev, &smc_pci_dev); + if (!smc_nl_handle_pci_values(&smc_pci_dev, skb)) + goto errattr; + } snprintf(smc_ibname, sizeof(smc_ibname), "%s", smcibdev->ibdev->name); if (nla_put_string(skb, SMC_NLA_DEV_IB_NAME, smc_ibname)) goto errattr; -- cgit v1.2.3 From 3ae32c07815a24ae12de2e7838d9d429ba31e5e0 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 15 Dec 2020 17:56:51 +0800 Subject: mptcp: clear use_ack and use_map when dropping other suboptions This patch cleared use_ack and use_map when dropping other suboptions to fix the following syzkaller BUG: [ 15.223006] BUG: unable to handle page fault for address: 0000000000223b10 [ 15.223700] #PF: supervisor read access in kernel mode [ 15.224209] #PF: error_code(0x0000) - not-present page [ 15.224724] PGD b8d5067 P4D b8d5067 PUD c0a5067 PMD 0 [ 15.225237] Oops: 0000 [#1] SMP [ 15.225556] CPU: 0 PID: 7747 Comm: syz-executor Not tainted 5.10.0-rc6+ #24 [ 15.226281] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 15.227292] RIP: 0010:skb_release_data+0x89/0x1e0 [ 15.227816] Code: 5b 5d 41 5c 41 5d 41 5e 41 5f e9 02 06 8a ff e8 fd 05 8a ff 45 31 ed 80 7d 02 00 4c 8d 65 30 74 55 e8 eb 05 8a ff 49 8b 1c 24 <4c> 8b 7b 08 41 f6 c7 01 0f 85 18 01 00 00 e8 d4 05 8a ff 8b 43 34 [ 15.229669] RSP: 0018:ffffc900019c7c08 EFLAGS: 00010293 [ 15.230188] RAX: ffff88800daad900 RBX: 0000000000223b08 RCX: 0000000000000006 [ 15.230895] RDX: 0000000000000000 RSI: ffffffff818e06c5 RDI: ffff88807f6dc700 [ 15.231593] RBP: ffff88807f71a4c0 R08: 0000000000000001 R09: 0000000000000001 [ 15.232299] R10: ffffc900019c7c18 R11: 0000000000000000 R12: ffff88807f71a4f0 [ 15.233007] R13: 0000000000000000 R14: ffff88807f6dc700 R15: 0000000000000002 [ 15.233714] FS: 00007f65d9b5f700(0000) GS:ffff88807c400000(0000) knlGS:0000000000000000 [ 15.234509] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 15.235081] CR2: 0000000000223b10 CR3: 000000000b883000 CR4: 00000000000006f0 [ 15.235788] Call Trace: [ 15.236042] skb_release_all+0x28/0x30 [ 15.236419] __kfree_skb+0x11/0x20 [ 15.236768] tcp_data_queue+0x270/0x1240 [ 15.237161] ? tcp_urg+0x50/0x2a0 [ 15.237496] tcp_rcv_established+0x39a/0x890 [ 15.237997] ? mark_held_locks+0x49/0x70 [ 15.238467] tcp_v4_do_rcv+0xb9/0x270 [ 15.238915] __release_sock+0x8a/0x160 [ 15.239365] release_sock+0x32/0xd0 [ 15.239793] __inet_stream_connect+0x1d2/0x400 [ 15.240313] ? do_wait_intr_irq+0x80/0x80 [ 15.240791] inet_stream_connect+0x36/0x50 [ 15.241275] mptcp_stream_connect+0x69/0x1b0 [ 15.241787] __sys_connect+0x122/0x140 [ 15.242236] ? syscall_enter_from_user_mode+0x17/0x50 [ 15.242836] ? lockdep_hardirqs_on_prepare+0xd4/0x170 [ 15.243436] __x64_sys_connect+0x1a/0x20 [ 15.243924] do_syscall_64+0x33/0x40 [ 15.244313] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 15.244821] RIP: 0033:0x7f65d946e469 [ 15.245183] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ff 49 2b 00 f7 d8 64 89 01 48 [ 15.247019] RSP: 002b:00007f65d9b5eda8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a [ 15.247770] RAX: ffffffffffffffda RBX: 000000000049bf00 RCX: 00007f65d946e469 [ 15.248471] RDX: 0000000000000010 RSI: 00000000200000c0 RDI: 0000000000000005 [ 15.249205] RBP: 000000000049bf00 R08: 0000000000000000 R09: 0000000000000000 [ 15.249908] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000049bf0c [ 15.250603] R13: 00007fffe8a25cef R14: 00007f65d9b3f000 R15: 0000000000000003 [ 15.251312] Modules linked in: [ 15.251626] CR2: 0000000000223b10 [ 15.251965] BUG: kernel NULL pointer dereference, address: 0000000000000048 [ 15.252005] ---[ end trace f5c51fe19123c773 ]--- [ 15.252822] #PF: supervisor read access in kernel mode [ 15.252823] #PF: error_code(0x0000) - not-present page [ 15.252825] PGD c6c6067 P4D c6c6067 PUD c0d8067 [ 15.253294] RIP: 0010:skb_release_data+0x89/0x1e0 [ 15.253910] PMD 0 [ 15.253914] Oops: 0000 [#2] SMP [ 15.253917] CPU: 1 PID: 7746 Comm: syz-executor Tainted: G D 5.10.0-rc6+ #24 [ 15.253920] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 [ 15.254435] Code: 5b 5d 41 5c 41 5d 41 5e 41 5f e9 02 06 8a ff e8 fd 05 8a ff 45 31 ed 80 7d 02 00 4c 8d 65 30 74 55 e8 eb 05 8a ff 49 8b 1c 24 <4c> 8b 7b 08 41 f6 c7 01 0f 85 18 01 00 00 e8 d4 05 8a ff 8b 43 34 [ 15.254899] RIP: 0010:skb_release_data+0x89/0x1e0 [ 15.254902] Code: 5b 5d 41 5c 41 5d 41 5e 41 5f e9 02 06 8a ff e8 fd 05 8a ff 45 31 ed 80 7d 02 00 4c 8d 65 30 74 55 e8 eb 05 8a ff 49 8b 1c 24 <4c> 8b 7b 08 41 f6 c7 01 0f 85 18 01 00 00 e8 d4 05 8a ff 8b 43 34 [ 15.254905] RSP: 0018:ffffc900019bfc08 EFLAGS: 00010293 [ 15.255376] RSP: 0018:ffffc900019c7c08 EFLAGS: 00010293 [ 15.255580] [ 15.255583] RAX: ffff888004a7ac80 RBX: 0000000000000040 RCX: 0000000000000000 [ 15.255912] [ 15.256724] RDX: 0000000000000000 RSI: ffffffff818e06c5 RDI: ffff88807f6ddd00 [ 15.257620] RAX: ffff88800daad900 RBX: 0000000000223b08 RCX: 0000000000000006 [ 15.259817] RBP: ffff88800e9006c0 R08: 0000000000000000 R09: 0000000000000000 [ 15.259818] R10: 0000000000000000 R11: 0000000000000000 R12: ffff88800e9006f0 [ 15.259820] R13: 0000000000000000 R14: ffff88807f6ddd00 R15: 0000000000000002 [ 15.259822] FS: 00007fae4a60a700(0000) GS:ffff88807c500000(0000) knlGS:0000000000000000 [ 15.259826] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 15.260296] RDX: 0000000000000000 RSI: ffffffff818e06c5 RDI: ffff88807f6dc700 [ 15.262514] CR2: 0000000000000048 CR3: 000000000b89c000 CR4: 00000000000006e0 [ 15.262515] Call Trace: [ 15.262519] skb_release_all+0x28/0x30 [ 15.262523] __kfree_skb+0x11/0x20 [ 15.263054] RBP: ffff88807f71a4c0 R08: 0000000000000001 R09: 0000000000000001 [ 15.263680] tcp_data_queue+0x270/0x1240 [ 15.263843] R10: ffffc900019c7c18 R11: 0000000000000000 R12: ffff88807f71a4f0 [ 15.264693] ? tcp_urg+0x50/0x2a0 [ 15.264856] R13: 0000000000000000 R14: ffff88807f6dc700 R15: 0000000000000002 [ 15.265720] tcp_rcv_established+0x39a/0x890 [ 15.266438] FS: 00007f65d9b5f700(0000) GS:ffff88807c400000(0000) knlGS:0000000000000000 [ 15.267283] ? __schedule+0x3fa/0x880 [ 15.267287] tcp_v4_do_rcv+0xb9/0x270 [ 15.267290] __release_sock+0x8a/0x160 [ 15.268049] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [ 15.268788] release_sock+0x32/0xd0 [ 15.268791] __inet_stream_connect+0x1d2/0x400 [ 15.268795] ? do_wait_intr_irq+0x80/0x80 [ 15.269593] CR2: 0000000000223b10 CR3: 000000000b883000 CR4: 00000000000006f0 [ 15.270246] inet_stream_connect+0x36/0x50 [ 15.270250] mptcp_stream_connect+0x69/0x1b0 [ 15.270253] __sys_connect+0x122/0x140 [ 15.271097] Kernel panic - not syncing: Fatal exception [ 15.271820] ? syscall_enter_from_user_mode+0x17/0x50 [ 15.283542] ? lockdep_hardirqs_on_prepare+0xd4/0x170 [ 15.284275] __x64_sys_connect+0x1a/0x20 [ 15.284853] do_syscall_64+0x33/0x40 [ 15.285369] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 15.286105] RIP: 0033:0x7fae49f19469 [ 15.286638] Code: 00 f3 c3 66 2e 0f 1f 84 00 00 00 00 00 0f 1f 40 00 48 89 f8 48 89 f7 48 89 d6 48 89 ca 4d 89 c2 4d 89 c8 4c 8b 4c 24 08 0f 05 <48> 3d 01 f0 ff ff 73 01 c3 48 8b 0d ff 49 2b 00 f7 d8 64 89 01 48 [ 15.289295] RSP: 002b:00007fae4a609da8 EFLAGS: 00000246 ORIG_RAX: 000000000000002a [ 15.290375] RAX: ffffffffffffffda RBX: 000000000049bf00 RCX: 00007fae49f19469 [ 15.291403] RDX: 0000000000000010 RSI: 00000000200000c0 RDI: 0000000000000005 [ 15.292437] RBP: 000000000049bf00 R08: 0000000000000000 R09: 0000000000000000 [ 15.293456] R10: 0000000000000000 R11: 0000000000000246 R12: 000000000049bf0c [ 15.294473] R13: 00007fff0004b6bf R14: 00007fae4a5ea000 R15: 0000000000000003 [ 15.295492] Modules linked in: [ 15.295944] CR2: 0000000000000048 [ 15.296567] Kernel Offset: disabled [ 15.296941] ---[ end Kernel panic - not syncing: Fatal exception ]--- Reported-by: Christoph Paasch Fixes: 84dfe3677a6f (mptcp: send out dedicated ADD_ADDR packet) Signed-off-by: Geliang Tang Reviewed-by: Mat Martineau Link: https://lore.kernel.org/r/ccca4e8f01457a1b495c5d612ed16c5f7a585706.1608010058.git.geliangtang@gmail.com Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index c5328f407aab..e8a1adf299d8 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -606,6 +606,8 @@ static bool mptcp_established_options_add_addr(struct sock *sk, struct sk_buff * skb && skb_is_tcp_pure_ack(skb)) { pr_debug("drop other suboptions"); opts->suboptions = 0; + opts->ext_copy.use_ack = 0; + opts->ext_copy.use_map = 0; remaining += opt_size; drop_other_suboptions = true; } -- cgit v1.2.3 From 0c14846032f2c0a3b63234e1fc2759f4155b6067 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Dec 2020 12:48:32 +0100 Subject: mptcp: fix security context on server socket Currently MPTCP is not propagating the security context from the ingress request socket to newly created msk at clone time. Address the issue invoking the missing security helper. Fixes: cf7da0d66cc1 ("mptcp: Create SUBFLOW socket for incoming connections") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b812aaae8044..d24243a28fce 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2699,6 +2699,8 @@ struct sock *mptcp_sk_clone(const struct sock *sk, sock_reset_flag(nsk, SOCK_RCU_FREE); /* will be fully established after successful MPC subflow creation */ inet_sk_state_store(nsk, TCP_SYN_RECV); + + security_inet_csk_clone(nsk, req); bh_unlock_sock(nsk); /* keep a single reference */ -- cgit v1.2.3 From 3f8b2667f257c21a992bda33bfb919ee164a429c Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Dec 2020 12:48:33 +0100 Subject: mptcp: properly annotate nested lock MPTCP closes the subflows while holding the msk-level lock. While acquiring the subflow socket lock we need to use the correct nested annotation, or we can hit a lockdep splat at runtime. Reported-and-tested-by: Geliang Tang Fixes: e16163b6e2b7 ("mptcp: refactor shutdown and close") Signed-off-by: Paolo Abeni Reviewed-by: Mat Martineau Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index d24243a28fce..64c0c54c80e8 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2119,7 +2119,7 @@ void __mptcp_close_ssk(struct sock *sk, struct sock *ssk, list_del(&subflow->node); - lock_sock(ssk); + lock_sock_nested(ssk, SINGLE_DEPTH_NESTING); /* if we are invoked by the msk cleanup code, the subflow is * already orphaned -- cgit v1.2.3 From 219d04992b689e0498ece02d2a451f2b6e2563a9 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Dec 2020 12:48:34 +0100 Subject: mptcp: push pending frames when subflow has free space When multiple subflows are active, we can receive a window update on subflow with no write space available. MPTCP will try to push frames on such subflow and will fail. Pending frames will be pushed only after receiving a window update on a subflow with some wspace available. Overall the above could lead to suboptimal aggregate bandwidth usage. Instead, we should try to push pending frames as soon as the subflow reaches both conditions mentioned above. We can finally enable self-tests with asymmetric links, as the above makes them finally pass. Fixes: 6f8a612a33e4 ("mptcp: keep track of advertised windows right edge") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/mptcp/options.c | 13 ++++++++----- net/mptcp/protocol.c | 2 +- net/mptcp/protocol.h | 2 +- tools/testing/selftests/net/mptcp/simult_flows.sh | 6 +++--- 4 files changed, 13 insertions(+), 10 deletions(-) (limited to 'net') diff --git a/net/mptcp/options.c b/net/mptcp/options.c index e8a1adf299d8..e0d21c0607e5 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -875,10 +875,13 @@ static void ack_update_msk(struct mptcp_sock *msk, new_wnd_end = new_snd_una + tcp_sk(ssk)->snd_wnd; - if (after64(new_wnd_end, msk->wnd_end)) { + if (after64(new_wnd_end, msk->wnd_end)) msk->wnd_end = new_wnd_end; - __mptcp_wnd_updated(sk, ssk); - } + + /* this assumes mptcp_incoming_options() is invoked after tcp_ack() */ + if (after64(msk->wnd_end, READ_ONCE(msk->snd_nxt)) && + sk_stream_memory_free(ssk)) + __mptcp_check_push(sk, ssk); if (after64(new_snd_una, old_snd_una)) { msk->snd_una = new_snd_una; @@ -944,8 +947,8 @@ void mptcp_incoming_options(struct sock *sk, struct sk_buff *skb) * helpers are cheap. */ mptcp_data_lock(subflow->conn); - if (mptcp_send_head(subflow->conn)) - __mptcp_wnd_updated(subflow->conn, sk); + if (sk_stream_memory_free(sk)) + __mptcp_check_push(subflow->conn, sk); __mptcp_data_acked(subflow->conn); mptcp_data_unlock(subflow->conn); return; diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index 64c0c54c80e8..b53a91801a6c 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -2915,7 +2915,7 @@ void __mptcp_data_acked(struct sock *sk) mptcp_schedule_work(sk); } -void __mptcp_wnd_updated(struct sock *sk, struct sock *ssk) +void __mptcp_check_push(struct sock *sk, struct sock *ssk) { if (!mptcp_send_head(sk)) return; diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h index 7cf9d110b85f..d67de793d363 100644 --- a/net/mptcp/protocol.h +++ b/net/mptcp/protocol.h @@ -503,7 +503,7 @@ void mptcp_rcv_space_init(struct mptcp_sock *msk, const struct sock *ssk); void mptcp_data_ready(struct sock *sk, struct sock *ssk); bool mptcp_finish_join(struct sock *sk); bool mptcp_schedule_work(struct sock *sk); -void __mptcp_wnd_updated(struct sock *sk, struct sock *ssk); +void __mptcp_check_push(struct sock *sk, struct sock *ssk); void __mptcp_data_acked(struct sock *sk); void mptcp_subflow_eof(struct sock *sk); bool mptcp_update_rcv_data_fin(struct mptcp_sock *msk, u64 data_fin_seq, bool use_64bit); diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index 2f649b431456..f039ee57eb3c 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -287,7 +287,7 @@ run_test 10 10 0 0 "balanced bwidth" run_test 10 10 1 50 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -# run_test 30 10 0 0 "unbalanced bwidth" -# run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -# run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 30 10 0 0 "unbalanced bwidth" +run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" +run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" exit $ret -- cgit v1.2.3 From 13e1603739e58e94e7a3c24191fa2dcd1a8a5df3 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 16 Dec 2020 12:48:35 +0100 Subject: mptcp: fix pending data accounting When sendmsg() needs to wait for memory, the pending data is not updated. That causes a drift in forward memory allocation, leading to stall and/or warnings at socket close time. This change addresses the above issue moving the pending data counter update inside the sendmsg() main loop. Fixes: 6e628cd3a8f7 ("mptcp: use mptcp release_cb for delayed tasks") Reviewed-by: Mat Martineau Signed-off-by: Paolo Abeni Signed-off-by: Jakub Kicinski --- net/mptcp/protocol.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'net') diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c index b53a91801a6c..09b19aa2f205 100644 --- a/net/mptcp/protocol.c +++ b/net/mptcp/protocol.c @@ -1658,6 +1658,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) frag_truesize += psize; pfrag->offset += frag_truesize; WRITE_ONCE(msk->write_seq, msk->write_seq + psize); + msk->tx_pending_data += psize; /* charge data on mptcp pending queue to the msk socket * Note: we charge such data both to sk and ssk @@ -1683,10 +1684,8 @@ wait_for_memory: goto out; } - if (copied) { - msk->tx_pending_data += copied; + if (copied) mptcp_push_pending(sk, msg->msg_flags); - } out: release_sock(sk); -- cgit v1.2.3 From 44d4775ca51805b376a8db5b34f650434a08e556 Mon Sep 17 00:00:00 2001 From: Davide Caratti Date: Wed, 16 Dec 2020 19:33:29 +0100 Subject: net/sched: sch_taprio: reset child qdiscs before freeing them syzkaller shows that packets can still be dequeued while taprio_destroy() is running. Let sch_taprio use the reset() function to cancel the advance timer and drop all skbs from the child qdiscs. Fixes: 5a781ccbd19e ("tc: Add support for configuring the taprio scheduler") Link: https://syzkaller.appspot.com/bug?id=f362872379bf8f0017fb667c1ab158f2d1e764ae Reported-by: syzbot+8971da381fb5a31f542d@syzkaller.appspotmail.com Signed-off-by: Davide Caratti Acked-by: Vinicius Costa Gomes Link: https://lore.kernel.org/r/63b6d79b0e830ebb0283e020db4df3cdfdfb2b94.1608142843.git.dcaratti@redhat.com Signed-off-by: Jakub Kicinski --- net/sched/sch_taprio.c | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'net') diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 26fb8a62996b..c74817ec9964 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1597,6 +1597,21 @@ free_sched: return err; } +static void taprio_reset(struct Qdisc *sch) +{ + struct taprio_sched *q = qdisc_priv(sch); + struct net_device *dev = qdisc_dev(sch); + int i; + + hrtimer_cancel(&q->advance_timer); + if (q->qdiscs) { + for (i = 0; i < dev->num_tx_queues && q->qdiscs[i]; i++) + qdisc_reset(q->qdiscs[i]); + } + sch->qstats.backlog = 0; + sch->q.qlen = 0; +} + static void taprio_destroy(struct Qdisc *sch) { struct taprio_sched *q = qdisc_priv(sch); @@ -1607,7 +1622,6 @@ static void taprio_destroy(struct Qdisc *sch) list_del(&q->taprio_list); spin_unlock(&taprio_list_lock); - hrtimer_cancel(&q->advance_timer); taprio_disable_offload(dev, q, NULL); @@ -1954,6 +1968,7 @@ static struct Qdisc_ops taprio_qdisc_ops __read_mostly = { .init = taprio_init, .change = taprio_change, .destroy = taprio_destroy, + .reset = taprio_reset, .peek = taprio_peek, .dequeue = taprio_dequeue, .enqueue = taprio_enqueue, -- cgit v1.2.3