From a1d5ff5651ea592c67054233b14b30bf4452999c Mon Sep 17 00:00:00 2001 From: Mathy Vanhoef Date: Tue, 11 May 2021 20:02:44 +0200 Subject: mac80211: properly handle A-MSDUs that start with an RFC 1042 header Properly parse A-MSDUs whose first 6 bytes happen to equal a rfc1042 header. This can occur in practice when the destination MAC address equals AA:AA:03:00:00:00. More importantly, this simplifies the next patch to mitigate A-MSDU injection attacks. Cc: stable@vger.kernel.org Signed-off-by: Mathy Vanhoef Link: https://lore.kernel.org/r/20210511200110.0b2b886492f0.I23dd5d685fe16d3b0ec8106e8f01b59f499dffed@changeid Signed-off-by: Johannes Berg --- include/net/cfg80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/net') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 5224f885a99a..58c2cd417e89 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -5760,7 +5760,7 @@ unsigned int ieee80211_get_mesh_hdrlen(struct ieee80211s_hdr *meshhdr); */ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, const u8 *addr, enum nl80211_iftype iftype, - u8 data_offset); + u8 data_offset, bool is_amsdu); /** * ieee80211_data_to_8023 - convert an 802.11 data frame to 802.3 @@ -5772,7 +5772,7 @@ int ieee80211_data_to_8023_exthdr(struct sk_buff *skb, struct ethhdr *ehdr, static inline int ieee80211_data_to_8023(struct sk_buff *skb, const u8 *addr, enum nl80211_iftype iftype) { - return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0); + return ieee80211_data_to_8023_exthdr(skb, NULL, addr, iftype, 0, false); } /** -- cgit v1.2.3 From 098116e7e640ba677d9e345cbee83d253c13d556 Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Tue, 11 May 2021 10:35:21 +0200 Subject: net: really orphan skbs tied to closing sk If the owing socket is shutting down - e.g. the sock reference count already dropped to 0 and only sk_wmem_alloc is keeping the sock alive, skb_orphan_partial() becomes a no-op. When forwarding packets over veth with GRO enabled, the above causes refcount errors. This change addresses the issue with a plain skb_orphan() call in the critical scenario. Fixes: 9adc89af724f ("net: let skb_orphan_partial wake-up waiters.") Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/net/sock.h | 4 +++- net/core/sock.c | 8 ++++---- 2 files changed, 7 insertions(+), 5 deletions(-) (limited to 'include/net') diff --git a/include/net/sock.h b/include/net/sock.h index 42bc5e1a627f..0e962d8bc73b 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -2231,13 +2231,15 @@ static inline void skb_set_owner_r(struct sk_buff *skb, struct sock *sk) sk_mem_charge(sk, skb->truesize); } -static inline void skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk) +static inline __must_check bool skb_set_owner_sk_safe(struct sk_buff *skb, struct sock *sk) { if (sk && refcount_inc_not_zero(&sk->sk_refcnt)) { skb_orphan(skb); skb->destructor = sock_efree; skb->sk = sk; + return true; } + return false; } void sk_reset_timer(struct sock *sk, struct timer_list *timer, diff --git a/net/core/sock.c b/net/core/sock.c index c761c4a0b66b..958614ea16ed 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -2132,10 +2132,10 @@ void skb_orphan_partial(struct sk_buff *skb) if (skb_is_tcp_pure_ack(skb)) return; - if (can_skb_orphan_partial(skb)) - skb_set_owner_sk_safe(skb, skb->sk); - else - skb_orphan(skb); + if (can_skb_orphan_partial(skb) && skb_set_owner_sk_safe(skb, skb->sk)) + return; + + skb_orphan(skb); } EXPORT_SYMBOL(skb_orphan_partial); -- cgit v1.2.3 From c07531c01d8284aedaf95708ea90e76d11af0e21 Mon Sep 17 00:00:00 2001 From: Roi Dayan Date: Mon, 10 May 2021 14:50:24 +0300 Subject: netfilter: flowtable: Remove redundant hw refresh bit Offloading conns could fail for multiple reasons and a hw refresh bit is set to try to reoffload it in next sw packet. But it could be in some cases and future points that the hw refresh bit is not set but a refresh could succeed. Remove the hw refresh bit and do offload refresh if requested. There won't be a new work entry if a work is already pending anyway as there is the hw pending bit. Fixes: 8b3646d6e0c4 ("net/sched: act_ct: Support refreshing the flow table entries") Signed-off-by: Roi Dayan Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_flow_table.h | 1 - net/netfilter/nf_flow_table_core.c | 3 +-- net/netfilter/nf_flow_table_offload.c | 7 ++++--- 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/net') diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h index 51d8eb99764d..48ef7460ff30 100644 --- a/include/net/netfilter/nf_flow_table.h +++ b/include/net/netfilter/nf_flow_table.h @@ -157,7 +157,6 @@ enum nf_flow_flags { NF_FLOW_HW, NF_FLOW_HW_DYING, NF_FLOW_HW_DEAD, - NF_FLOW_HW_REFRESH, NF_FLOW_HW_PENDING, }; diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c index 39c02d1aeedf..1d02650dd715 100644 --- a/net/netfilter/nf_flow_table_core.c +++ b/net/netfilter/nf_flow_table_core.c @@ -306,8 +306,7 @@ void flow_offload_refresh(struct nf_flowtable *flow_table, { flow->timeout = nf_flowtable_time_stamp + NF_FLOW_TIMEOUT; - if (likely(!nf_flowtable_hw_offload(flow_table) || - !test_and_clear_bit(NF_FLOW_HW_REFRESH, &flow->flags))) + if (likely(!nf_flowtable_hw_offload(flow_table))) return; nf_flow_offload_add(flow_table, flow); diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c index 2af7bdb38407..528b2f172684 100644 --- a/net/netfilter/nf_flow_table_offload.c +++ b/net/netfilter/nf_flow_table_offload.c @@ -902,10 +902,11 @@ static void flow_offload_work_add(struct flow_offload_work *offload) err = flow_offload_rule_add(offload, flow_rule); if (err < 0) - set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags); - else - set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); + goto out; + + set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status); +out: nf_flow_offload_destroy(flow_rule); } -- cgit v1.2.3 From a90c57f2cedd52a511f739fb55e6244e22e1a2fb Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 14 May 2021 11:16:59 +0800 Subject: net: sched: fix packet stuck problem for lockless qdisc Lockless qdisc has below concurrent problem: cpu0 cpu1 . . q->enqueue . . . qdisc_run_begin() . . . dequeue_skb() . . . sch_direct_xmit() . . . . q->enqueue . qdisc_run_begin() . return and do nothing . . qdisc_run_end() . cpu1 enqueue a skb without calling __qdisc_run() because cpu0 has not released the lock yet and spin_trylock() return false for cpu1 in qdisc_run_begin(), and cpu0 do not see the skb enqueued by cpu1 when calling dequeue_skb() because cpu1 may enqueue the skb after cpu0 calling dequeue_skb() and before cpu0 calling qdisc_run_end(). Lockless qdisc has below another concurrent problem when tx_action is involved: cpu0(serving tx_action) cpu1 cpu2 . . . . q->enqueue . . qdisc_run_begin() . . dequeue_skb() . . . q->enqueue . . . . sch_direct_xmit() . . . qdisc_run_begin() . . return and do nothing . . . clear __QDISC_STATE_SCHED . . qdisc_run_begin() . . return and do nothing . . . . . . qdisc_run_end() . This patch fixes the above data race by: 1. If the first spin_trylock() return false and STATE_MISSED is not set, set STATE_MISSED and retry another spin_trylock() in case other CPU may not see STATE_MISSED after it releases the lock. 2. reschedule if STATE_MISSED is set after the lock is released at the end of qdisc_run_end(). For tx_action case, STATE_MISSED is also set when cpu1 is at the end if qdisc_run_end(), so tx_action will be rescheduled again to dequeue the skb enqueued by cpu2. Clear STATE_MISSED before retrying a dequeuing when dequeuing returns NULL in order to reduce the overhead of the second spin_trylock() and __netif_schedule() calling. Also clear the STATE_MISSED before calling __netif_schedule() at the end of qdisc_run_end() to avoid doing another round of dequeuing in the pfifo_fast_dequeue(). The performance impact of this patch, tested using pktgen and dummy netdev with pfifo_fast qdisc attached: threads without+this_patch with+this_patch delta 1 2.61Mpps 2.60Mpps -0.3% 2 3.97Mpps 3.82Mpps -3.7% 4 5.62Mpps 5.59Mpps -0.5% 8 2.78Mpps 2.77Mpps -0.3% 16 2.22Mpps 2.22Mpps -0.0% Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking") Acked-by: Jakub Kicinski Tested-by: Juergen Gross Signed-off-by: Yunsheng Lin Signed-off-by: David S. Miller --- include/net/sch_generic.h | 35 ++++++++++++++++++++++++++++++++++- net/sched/sch_generic.c | 19 +++++++++++++++++++ 2 files changed, 53 insertions(+), 1 deletion(-) (limited to 'include/net') diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h index f7a6e14491fb..1e625519ae96 100644 --- a/include/net/sch_generic.h +++ b/include/net/sch_generic.h @@ -36,6 +36,7 @@ struct qdisc_rate_table { enum qdisc_state_t { __QDISC_STATE_SCHED, __QDISC_STATE_DEACTIVATED, + __QDISC_STATE_MISSED, }; struct qdisc_size_table { @@ -159,8 +160,33 @@ static inline bool qdisc_is_empty(const struct Qdisc *qdisc) static inline bool qdisc_run_begin(struct Qdisc *qdisc) { if (qdisc->flags & TCQ_F_NOLOCK) { + if (spin_trylock(&qdisc->seqlock)) + goto nolock_empty; + + /* If the MISSED flag is set, it means other thread has + * set the MISSED flag before second spin_trylock(), so + * we can return false here to avoid multi cpus doing + * the set_bit() and second spin_trylock() concurrently. + */ + if (test_bit(__QDISC_STATE_MISSED, &qdisc->state)) + return false; + + /* Set the MISSED flag before the second spin_trylock(), + * if the second spin_trylock() return false, it means + * other cpu holding the lock will do dequeuing for us + * or it will see the MISSED flag set after releasing + * lock and reschedule the net_tx_action() to do the + * dequeuing. + */ + set_bit(__QDISC_STATE_MISSED, &qdisc->state); + + /* Retry again in case other CPU may not see the new flag + * after it releases the lock at the end of qdisc_run_end(). + */ if (!spin_trylock(&qdisc->seqlock)) return false; + +nolock_empty: WRITE_ONCE(qdisc->empty, false); } else if (qdisc_is_running(qdisc)) { return false; @@ -176,8 +202,15 @@ static inline bool qdisc_run_begin(struct Qdisc *qdisc) static inline void qdisc_run_end(struct Qdisc *qdisc) { write_seqcount_end(&qdisc->running); - if (qdisc->flags & TCQ_F_NOLOCK) + if (qdisc->flags & TCQ_F_NOLOCK) { spin_unlock(&qdisc->seqlock); + + if (unlikely(test_bit(__QDISC_STATE_MISSED, + &qdisc->state))) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + __netif_schedule(qdisc); + } + } } static inline bool qdisc_may_bulk(const struct Qdisc *qdisc) diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 44991ea726fc..795d986e7030 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -640,8 +640,10 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) { struct pfifo_fast_priv *priv = qdisc_priv(qdisc); struct sk_buff *skb = NULL; + bool need_retry = true; int band; +retry: for (band = 0; band < PFIFO_FAST_BANDS && !skb; band++) { struct skb_array *q = band2list(priv, band); @@ -652,6 +654,23 @@ static struct sk_buff *pfifo_fast_dequeue(struct Qdisc *qdisc) } if (likely(skb)) { qdisc_update_stats_at_dequeue(qdisc, skb); + } else if (need_retry && + test_bit(__QDISC_STATE_MISSED, &qdisc->state)) { + /* Delay clearing the STATE_MISSED here to reduce + * the overhead of the second spin_trylock() in + * qdisc_run_begin() and __netif_schedule() calling + * in qdisc_run_end(). + */ + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); + + /* Make sure dequeuing happens after clearing + * STATE_MISSED. + */ + smp_mb__after_atomic(); + + need_retry = false; + + goto retry; } else { WRITE_ONCE(qdisc->empty, true); } -- cgit v1.2.3 From 102b55ee92f9fda4dde7a45d2b20538e6e3e3d1e Mon Sep 17 00:00:00 2001 From: Yunsheng Lin Date: Fri, 14 May 2021 11:17:00 +0800 Subject: net: sched: fix tx action rescheduling issue during deactivation Currently qdisc_run() checks the STATE_DEACTIVATED of lockless qdisc before calling __qdisc_run(), which ultimately clear the STATE_MISSED when all the skb is dequeued. If STATE_DEACTIVATED is set before clearing STATE_MISSED, there may be rescheduling of net_tx_action() at the end of qdisc_run_end(), see below: CPU0(net_tx_atcion) CPU1(__dev_xmit_skb) CPU2(dev_deactivate) . . . . set STATE_MISSED . . __netif_schedule() . . . set STATE_DEACTIVATED . . qdisc_reset() . . . .<--------------- . synchronize_net() clear __QDISC_STATE_SCHED | . . . | . . . | . some_qdisc_is_busy() . | . return *false* . | . . test STATE_DEACTIVATED | . . __qdisc_run() *not* called | . . . | . . test STATE_MISS | . . __netif_schedule()--------| . . . . . . . . __qdisc_run() is not called by net_tx_atcion() in CPU0 because CPU2 has set STATE_DEACTIVATED flag during dev_deactivate(), and STATE_MISSED is only cleared in __qdisc_run(), __netif_schedule is called at the end of qdisc_run_end(), causing tx action rescheduling problem. qdisc_run() called by net_tx_action() runs in the softirq context, which should has the same semantic as the qdisc_run() called by __dev_xmit_skb() protected by rcu_read_lock_bh(). And there is a synchronize_net() between STATE_DEACTIVATED flag being set and qdisc_reset()/some_qdisc_is_busy in dev_deactivate(), we can safely bail out for the deactived lockless qdisc in net_tx_action(), and qdisc_reset() will reset all skb not dequeued yet. So add the rcu_read_lock() explicitly to protect the qdisc_run() and do the STATE_DEACTIVATED checking in net_tx_action() before calling qdisc_run_begin(). Another option is to do the checking in the qdisc_run_end(), but it will add unnecessary overhead for non-tx_action case, because __dev_queue_xmit() will not see qdisc with STATE_DEACTIVATED after synchronize_net(), the qdisc with STATE_DEACTIVATED can only be seen by net_tx_action() because of __netif_schedule(). The STATE_DEACTIVATED checking in qdisc_run() is to avoid race between net_tx_action() and qdisc_reset(), see: commit d518d2ed8640 ("net/sched: fix race between deactivation and dequeue for NOLOCK qdisc"). As the bailout added above for deactived lockless qdisc in net_tx_action() provides better protection for the race without calling qdisc_run() at all, so remove the STATE_DEACTIVATED checking in qdisc_run(). After qdisc_reset(), there is no skb in qdisc to be dequeued, so clear the STATE_MISSED in dev_reset_queue() too. Fixes: 6b3ba9146fe6 ("net: sched: allow qdiscs to handle locking") Acked-by: Jakub Kicinski Signed-off-by: Yunsheng Lin V8: Clearing STATE_MISSED before calling __netif_schedule() has avoid the endless rescheduling problem, but there may still be a unnecessary rescheduling, so adjust the commit log. Signed-off-by: David S. Miller --- include/net/pkt_sched.h | 7 +------ net/core/dev.c | 26 ++++++++++++++++++++++---- net/sched/sch_generic.c | 4 +++- 3 files changed, 26 insertions(+), 11 deletions(-) (limited to 'include/net') diff --git a/include/net/pkt_sched.h b/include/net/pkt_sched.h index f5c1bee0cd6a..6d7b12cba015 100644 --- a/include/net/pkt_sched.h +++ b/include/net/pkt_sched.h @@ -128,12 +128,7 @@ void __qdisc_run(struct Qdisc *q); static inline void qdisc_run(struct Qdisc *q) { if (qdisc_run_begin(q)) { - /* NOLOCK qdisc must check 'state' under the qdisc seqlock - * to avoid racing with dev_qdisc_reset() - */ - if (!(q->flags & TCQ_F_NOLOCK) || - likely(!test_bit(__QDISC_STATE_DEACTIVATED, &q->state))) - __qdisc_run(q); + __qdisc_run(q); qdisc_run_end(q); } } diff --git a/net/core/dev.c b/net/core/dev.c index 222b1d322c96..d596cd746353 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -5025,25 +5025,43 @@ static __latent_entropy void net_tx_action(struct softirq_action *h) sd->output_queue_tailp = &sd->output_queue; local_irq_enable(); + rcu_read_lock(); + while (head) { struct Qdisc *q = head; spinlock_t *root_lock = NULL; head = head->next_sched; - if (!(q->flags & TCQ_F_NOLOCK)) { - root_lock = qdisc_lock(q); - spin_lock(root_lock); - } /* We need to make sure head->next_sched is read * before clearing __QDISC_STATE_SCHED */ smp_mb__before_atomic(); + + if (!(q->flags & TCQ_F_NOLOCK)) { + root_lock = qdisc_lock(q); + spin_lock(root_lock); + } else if (unlikely(test_bit(__QDISC_STATE_DEACTIVATED, + &q->state))) { + /* There is a synchronize_net() between + * STATE_DEACTIVATED flag being set and + * qdisc_reset()/some_qdisc_is_busy() in + * dev_deactivate(), so we can safely bail out + * early here to avoid data race between + * qdisc_deactivate() and some_qdisc_is_busy() + * for lockless qdisc. + */ + clear_bit(__QDISC_STATE_SCHED, &q->state); + continue; + } + clear_bit(__QDISC_STATE_SCHED, &q->state); qdisc_run(q); if (root_lock) spin_unlock(root_lock); } + + rcu_read_unlock(); } xfrm_dev_backlog(sd); diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c index 795d986e7030..d86c4cca2cab 100644 --- a/net/sched/sch_generic.c +++ b/net/sched/sch_generic.c @@ -1177,8 +1177,10 @@ static void dev_reset_queue(struct net_device *dev, qdisc_reset(qdisc); spin_unlock_bh(qdisc_lock(qdisc)); - if (nolock) + if (nolock) { + clear_bit(__QDISC_STATE_MISSED, &qdisc->state); spin_unlock_bh(&qdisc->seqlock); + } } static bool some_qdisc_is_busy(struct net_device *dev) -- cgit v1.2.3 From e0652f8bb44d6294eeeac06d703185357f25d50b Mon Sep 17 00:00:00 2001 From: Dongliang Mu Date: Sat, 15 May 2021 07:29:06 +0800 Subject: NFC: nci: fix memory leak in nci_allocate_device nfcmrvl_disconnect fails to free the hci_dev field in struct nci_dev. Fix this by freeing hci_dev in nci_free_device. BUG: memory leak unreferenced object 0xffff888111ea6800 (size 1024): comm "kworker/1:0", pid 19, jiffies 4294942308 (age 13.580s) hex dump (first 32 bytes): 00 00 00 00 00 00 00 00 00 60 fd 0c 81 88 ff ff .........`...... 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 00 ................ backtrace: [<000000004bc25d43>] kmalloc include/linux/slab.h:552 [inline] [<000000004bc25d43>] kzalloc include/linux/slab.h:682 [inline] [<000000004bc25d43>] nci_hci_allocate+0x21/0xd0 net/nfc/nci/hci.c:784 [<00000000c59cff92>] nci_allocate_device net/nfc/nci/core.c:1170 [inline] [<00000000c59cff92>] nci_allocate_device+0x10b/0x160 net/nfc/nci/core.c:1132 [<00000000006e0a8e>] nfcmrvl_nci_register_dev+0x10a/0x1c0 drivers/nfc/nfcmrvl/main.c:153 [<000000004da1b57e>] nfcmrvl_probe+0x223/0x290 drivers/nfc/nfcmrvl/usb.c:345 [<00000000d506aed9>] usb_probe_interface+0x177/0x370 drivers/usb/core/driver.c:396 [<00000000bc632c92>] really_probe+0x159/0x4a0 drivers/base/dd.c:554 [<00000000f5009125>] driver_probe_device+0x84/0x100 drivers/base/dd.c:740 [<000000000ce658ca>] __device_attach_driver+0xee/0x110 drivers/base/dd.c:846 [<000000007067d05f>] bus_for_each_drv+0xb7/0x100 drivers/base/bus.c:431 [<00000000f8e13372>] __device_attach+0x122/0x250 drivers/base/dd.c:914 [<000000009cf68860>] bus_probe_device+0xc6/0xe0 drivers/base/bus.c:491 [<00000000359c965a>] device_add+0x5be/0xc30 drivers/base/core.c:3109 [<00000000086e4bd3>] usb_set_configuration+0x9d9/0xb90 drivers/usb/core/message.c:2164 [<00000000ca036872>] usb_generic_driver_probe+0x8c/0xc0 drivers/usb/core/generic.c:238 [<00000000d40d36f6>] usb_probe_device+0x5c/0x140 drivers/usb/core/driver.c:293 [<00000000bc632c92>] really_probe+0x159/0x4a0 drivers/base/dd.c:554 Reported-by: syzbot+19bcfc64a8df1318d1c3@syzkaller.appspotmail.com Fixes: 11f54f228643 ("NFC: nci: Add HCI over NCI protocol support") Signed-off-by: Dongliang Mu Signed-off-by: David S. Miller --- include/net/nfc/nci_core.h | 1 + net/nfc/nci/core.c | 1 + net/nfc/nci/hci.c | 5 +++++ 3 files changed, 7 insertions(+) (limited to 'include/net') diff --git a/include/net/nfc/nci_core.h b/include/net/nfc/nci_core.h index bd76e8e082c0..1df0f8074c9d 100644 --- a/include/net/nfc/nci_core.h +++ b/include/net/nfc/nci_core.h @@ -298,6 +298,7 @@ int nci_nfcc_loopback(struct nci_dev *ndev, void *data, size_t data_len, struct sk_buff **resp); struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev); +void nci_hci_deallocate(struct nci_dev *ndev); int nci_hci_send_event(struct nci_dev *ndev, u8 gate, u8 event, const u8 *param, size_t param_len); int nci_hci_send_cmd(struct nci_dev *ndev, u8 gate, diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c index 9a585332ea84..da7fe9db1b00 100644 --- a/net/nfc/nci/core.c +++ b/net/nfc/nci/core.c @@ -1191,6 +1191,7 @@ EXPORT_SYMBOL(nci_allocate_device); void nci_free_device(struct nci_dev *ndev) { nfc_free_device(ndev->nfc_dev); + nci_hci_deallocate(ndev); kfree(ndev); } EXPORT_SYMBOL(nci_free_device); diff --git a/net/nfc/nci/hci.c b/net/nfc/nci/hci.c index 6b275a387a92..96865142104f 100644 --- a/net/nfc/nci/hci.c +++ b/net/nfc/nci/hci.c @@ -792,3 +792,8 @@ struct nci_hci_dev *nci_hci_allocate(struct nci_dev *ndev) return hdev; } + +void nci_hci_deallocate(struct nci_dev *ndev) +{ + kfree(ndev->hci_dev); +} -- cgit v1.2.3 From 9453d45ecb6c2199d72e73c993e9d98677a2801b Mon Sep 17 00:00:00 2001 From: Vlad Buslov Date: Tue, 25 May 2021 16:21:52 +0300 Subject: net: zero-initialize tc skb extension on allocation Function skb_ext_add() doesn't initialize created skb extension with any value and leaves it up to the user. However, since extension of type TC_SKB_EXT originally contained only single value tc_skb_ext->chain its users used to just assign the chain value without setting whole extension memory to zero first. This assumption changed when TC_SKB_EXT extension was extended with additional fields but not all users were updated to initialize the new fields which leads to use of uninitialized memory afterwards. UBSAN log: [ 778.299821] UBSAN: invalid-load in net/openvswitch/flow.c:899:28 [ 778.301495] load of value 107 is not a valid value for type '_Bool' [ 778.303215] CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.12.0-rc7+ #2 [ 778.304933] Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS rel-1.13.0-0-gf21b5a4aeb02-prebuilt.qemu.org 04/01/2014 [ 778.307901] Call Trace: [ 778.308680] [ 778.309358] dump_stack+0xbb/0x107 [ 778.310307] ubsan_epilogue+0x5/0x40 [ 778.311167] __ubsan_handle_load_invalid_value.cold+0x43/0x48 [ 778.312454] ? memset+0x20/0x40 [ 778.313230] ovs_flow_key_extract.cold+0xf/0x14 [openvswitch] [ 778.314532] ovs_vport_receive+0x19e/0x2e0 [openvswitch] [ 778.315749] ? ovs_vport_find_upcall_portid+0x330/0x330 [openvswitch] [ 778.317188] ? create_prof_cpu_mask+0x20/0x20 [ 778.318220] ? arch_stack_walk+0x82/0xf0 [ 778.319153] ? secondary_startup_64_no_verify+0xb0/0xbb [ 778.320399] ? stack_trace_save+0x91/0xc0 [ 778.321362] ? stack_trace_consume_entry+0x160/0x160 [ 778.322517] ? lock_release+0x52e/0x760 [ 778.323444] netdev_frame_hook+0x323/0x610 [openvswitch] [ 778.324668] ? ovs_netdev_get_vport+0xe0/0xe0 [openvswitch] [ 778.325950] __netif_receive_skb_core+0x771/0x2db0 [ 778.327067] ? lock_downgrade+0x6e0/0x6f0 [ 778.328021] ? lock_acquire+0x565/0x720 [ 778.328940] ? generic_xdp_tx+0x4f0/0x4f0 [ 778.329902] ? inet_gro_receive+0x2a7/0x10a0 [ 778.330914] ? lock_downgrade+0x6f0/0x6f0 [ 778.331867] ? udp4_gro_receive+0x4c4/0x13e0 [ 778.332876] ? lock_release+0x52e/0x760 [ 778.333808] ? dev_gro_receive+0xcc8/0x2380 [ 778.334810] ? lock_downgrade+0x6f0/0x6f0 [ 778.335769] __netif_receive_skb_list_core+0x295/0x820 [ 778.336955] ? process_backlog+0x780/0x780 [ 778.337941] ? mlx5e_rep_tc_netdevice_event_unregister+0x20/0x20 [mlx5_core] [ 778.339613] ? seqcount_lockdep_reader_access.constprop.0+0xa7/0xc0 [ 778.341033] ? kvm_clock_get_cycles+0x14/0x20 [ 778.342072] netif_receive_skb_list_internal+0x5f5/0xcb0 [ 778.343288] ? __kasan_kmalloc+0x7a/0x90 [ 778.344234] ? mlx5e_handle_rx_cqe_mpwrq+0x9e0/0x9e0 [mlx5_core] [ 778.345676] ? mlx5e_xmit_xdp_frame_mpwqe+0x14d0/0x14d0 [mlx5_core] [ 778.347140] ? __netif_receive_skb_list_core+0x820/0x820 [ 778.348351] ? mlx5e_post_rx_mpwqes+0xa6/0x25d0 [mlx5_core] [ 778.349688] ? napi_gro_flush+0x26c/0x3c0 [ 778.350641] napi_complete_done+0x188/0x6b0 [ 778.351627] mlx5e_napi_poll+0x373/0x1b80 [mlx5_core] [ 778.352853] __napi_poll+0x9f/0x510 [ 778.353704] ? mlx5_flow_namespace_set_mode+0x260/0x260 [mlx5_core] [ 778.355158] net_rx_action+0x34c/0xa40 [ 778.356060] ? napi_threaded_poll+0x3d0/0x3d0 [ 778.357083] ? sched_clock_cpu+0x18/0x190 [ 778.358041] ? __common_interrupt+0x8e/0x1a0 [ 778.359045] __do_softirq+0x1ce/0x984 [ 778.359938] __irq_exit_rcu+0x137/0x1d0 [ 778.360865] irq_exit_rcu+0xa/0x20 [ 778.361708] common_interrupt+0x80/0xa0 [ 778.362640] [ 778.363212] asm_common_interrupt+0x1e/0x40 [ 778.364204] RIP: 0010:native_safe_halt+0xe/0x10 [ 778.365273] Code: 4f ff ff ff 4c 89 e7 e8 50 3f 40 fe e9 dc fe ff ff 48 89 df e8 43 3f 40 fe eb 90 cc e9 07 00 00 00 0f 00 2d 74 05 62 00 fb f4 90 e9 07 00 00 00 0f 00 2d 64 05 62 00 f4 c3 cc cc 0f 1f 44 00 [ 778.369355] RSP: 0018:ffffffff84407e48 EFLAGS: 00000246 [ 778.370570] RAX: ffff88842de46a80 RBX: ffffffff84425840 RCX: ffffffff83418468 [ 778.372143] RDX: 000000000026f1da RSI: 0000000000000004 RDI: ffffffff8343af5e [ 778.373722] RBP: fffffbfff0884b08 R08: 0000000000000000 R09: ffff88842de46bcb [ 778.375292] R10: ffffed1085bc8d79 R11: 0000000000000001 R12: 0000000000000000 [ 778.376860] R13: ffffffff851124a0 R14: 0000000000000000 R15: dffffc0000000000 [ 778.378491] ? rcu_eqs_enter.constprop.0+0xb8/0xe0 [ 778.379606] ? default_idle_call+0x5e/0xe0 [ 778.380578] default_idle+0xa/0x10 [ 778.381406] default_idle_call+0x96/0xe0 [ 778.382350] do_idle+0x3d4/0x550 [ 778.383153] ? arch_cpu_idle_exit+0x40/0x40 [ 778.384143] cpu_startup_entry+0x19/0x20 [ 778.385078] start_kernel+0x3c7/0x3e5 [ 778.385978] secondary_startup_64_no_verify+0xb0/0xbb Fix the issue by providing new function tc_skb_ext_alloc() that allocates tc skb extension and initializes its memory to 0 before returning it to the caller. Change all existing users to use new API instead of calling skb_ext_add() directly. Fixes: 038ebb1a713d ("net/sched: act_ct: fix miss set mru for ovs after defrag in act_ct") Fixes: d29334c15d33 ("net/sched: act_api: fix miss set post_ct for ovs after do conntrack in act_ct") Signed-off-by: Vlad Buslov Acked-by: Cong Wang Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +- include/net/pkt_cls.h | 11 +++++++++++ net/sched/cls_api.c | 2 +- 4 files changed, 14 insertions(+), 3 deletions(-) (limited to 'include/net') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c index 6cdc52d50a48..311382261840 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c @@ -626,7 +626,7 @@ static bool mlx5e_restore_skb(struct sk_buff *skb, u32 chain, u32 reg_c1, struct mlx5_eswitch *esw; u32 zone_restore_id; - tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + tc_skb_ext = tc_skb_ext_alloc(skb); if (!tc_skb_ext) { WARN_ON(1); return false; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c index bccdb43a880b..2c776e7a7692 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c @@ -5090,7 +5090,7 @@ bool mlx5e_tc_update_skb(struct mlx5_cqe64 *cqe, if (mapped_obj.type == MLX5_MAPPED_OBJ_CHAIN) { chain = mapped_obj.chain; - tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + tc_skb_ext = tc_skb_ext_alloc(skb); if (WARN_ON(!tc_skb_ext)) return false; diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h index 255e4f4b521f..ec7823921bd2 100644 --- a/include/net/pkt_cls.h +++ b/include/net/pkt_cls.h @@ -709,6 +709,17 @@ tc_cls_common_offload_init(struct flow_cls_common_offload *cls_common, cls_common->extack = extack; } +#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT) +static inline struct tc_skb_ext *tc_skb_ext_alloc(struct sk_buff *skb) +{ + struct tc_skb_ext *tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT); + + if (tc_skb_ext) + memset(tc_skb_ext, 0, sizeof(*tc_skb_ext)); + return tc_skb_ext; +} +#endif + enum tc_matchall_command { TC_CLSMATCHALL_REPLACE, TC_CLSMATCHALL_DESTROY, diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 40fbea626dfd..279f9e2a2319 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -1624,7 +1624,7 @@ int tcf_classify_ingress(struct sk_buff *skb, /* If we missed on some chain */ if (ret == TC_ACT_UNSPEC && last_executed_chain) { - ext = skb_ext_add(skb, TC_SKB_EXT); + ext = tc_skb_ext_alloc(skb); if (WARN_ON_ONCE(!ext)) return TC_ACT_SHOT; ext->chain = last_executed_chain; -- cgit v1.2.3