diff options
38 files changed, 535 insertions, 400 deletions
@@ -2489,6 +2489,13 @@ D: XF86_Mach8 D: XF86_8514 D: cfdisk (curses based disk partitioning program) +N: Mat Martineau +E: mat@martineau.name +D: MPTCP subsystem co-maintainer 2020-2023 +D: Keyctl restricted keyring and Diffie-Hellman UAPI +D: Bluetooth L2CAP ERTM mode and AMP +S: USA + N: John S. Marvin E: jsm@fc.hp.com D: PA-RISC port diff --git a/Documentation/networking/bridge.rst b/Documentation/networking/bridge.rst index 4aef9cddde2f..c859f3c1636e 100644 --- a/Documentation/networking/bridge.rst +++ b/Documentation/networking/bridge.rst @@ -8,7 +8,7 @@ In order to use the Ethernet bridging functionality, you'll need the userspace tools. Documentation for Linux bridging is on: - http://www.linuxfoundation.org/collaborate/workgroups/networking/bridge + https://wiki.linuxfoundation.org/networking/bridge The bridge-utilities are maintained at: git://git.kernel.org/pub/scm/linux/kernel/git/shemminger/bridge-utils.git diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst index 49db1d11d7c4..8b1045c3b59e 100644 --- a/Documentation/networking/nf_conntrack-sysctl.rst +++ b/Documentation/networking/nf_conntrack-sysctl.rst @@ -173,7 +173,9 @@ nf_conntrack_sctp_timeout_cookie_echoed - INTEGER (seconds) default 3 nf_conntrack_sctp_timeout_established - INTEGER (seconds) - default 432000 (5 days) + default 210 + + Default is set to (hb_interval * path_max_retrans + rto_max) nf_conntrack_sctp_timeout_shutdown_sent - INTEGER (seconds) default 0.3 @@ -190,12 +192,6 @@ nf_conntrack_sctp_timeout_heartbeat_sent - INTEGER (seconds) This timeout is used to setup conntrack entry on secondary paths. Default is set to hb_interval. -nf_conntrack_sctp_timeout_heartbeat_acked - INTEGER (seconds) - default 210 - - This timeout is used to setup conntrack entry on secondary paths. - Default is set to (hb_interval * path_max_retrans + rto_max) - nf_conntrack_udp_timeout - INTEGER (seconds) default 30 diff --git a/MAINTAINERS b/MAINTAINERS index 5b74014994f5..7c884556eb8e 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -14632,7 +14632,6 @@ F: net/netfilter/xt_SECMARK.c F: net/netlabel/ NETWORKING [MPTCP] -M: Mat Martineau <mathew.j.martineau@linux.intel.com> M: Matthieu Baerts <matthieu.baerts@tessares.net> L: netdev@vger.kernel.org L: mptcp@lists.linux.dev diff --git a/drivers/net/dsa/microchip/ksz9477_i2c.c b/drivers/net/dsa/microchip/ksz9477_i2c.c index c1a633ca1e6d..e315f669ec06 100644 --- a/drivers/net/dsa/microchip/ksz9477_i2c.c +++ b/drivers/net/dsa/microchip/ksz9477_i2c.c @@ -104,7 +104,7 @@ static const struct of_device_id ksz9477_dt_ids[] = { }, { .compatible = "microchip,ksz8563", - .data = &ksz_switch_chips[KSZ9893] + .data = &ksz_switch_chips[KSZ8563] }, { .compatible = "microchip,ksz9567", diff --git a/drivers/net/ethernet/adi/adin1110.c b/drivers/net/ethernet/adi/adin1110.c index 0805f249fff2..c26b8597945b 100644 --- a/drivers/net/ethernet/adi/adin1110.c +++ b/drivers/net/ethernet/adi/adin1110.c @@ -356,7 +356,7 @@ static int adin1110_read_fifo(struct adin1110_port_priv *port_priv) if ((port_priv->flags & IFF_ALLMULTI && rxb->pkt_type == PACKET_MULTICAST) || (port_priv->flags & IFF_BROADCAST && rxb->pkt_type == PACKET_BROADCAST)) - rxb->offload_fwd_mark = 1; + rxb->offload_fwd_mark = port_priv->priv->forwarding; netif_rx(rxb); diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index 59debdc344a5..58747292521d 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -11166,7 +11166,7 @@ static void tg3_reset_task(struct work_struct *work) rtnl_lock(); tg3_full_lock(tp, 0); - if (!netif_running(tp->dev)) { + if (tp->pcierr_recovery || !netif_running(tp->dev)) { tg3_flag_clear(tp, RESET_TASK_PENDING); tg3_full_unlock(tp); rtnl_unlock(); @@ -18101,6 +18101,9 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, netdev_info(netdev, "PCI I/O error detected\n"); + /* Want to make sure that the reset task doesn't run */ + tg3_reset_task_cancel(tp); + rtnl_lock(); /* Could be second call or maybe we don't have netdev yet */ @@ -18117,9 +18120,6 @@ static pci_ers_result_t tg3_io_error_detected(struct pci_dev *pdev, tg3_timer_stop(tp); - /* Want to make sure that the reset task doesn't run */ - tg3_reset_task_cancel(tp); - netif_device_detach(netdev); /* Clean up software state, even if MMIO is blocked */ diff --git a/drivers/net/ethernet/engleder/tsnep_main.c b/drivers/net/ethernet/engleder/tsnep_main.c index bf0190e1d2ea..00e2108f2ca4 100644 --- a/drivers/net/ethernet/engleder/tsnep_main.c +++ b/drivers/net/ethernet/engleder/tsnep_main.c @@ -450,7 +450,7 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, /* ring full, shall not happen because queue is stopped if full * below */ - netif_stop_queue(tx->adapter->netdev); + netif_stop_subqueue(tx->adapter->netdev, tx->queue_index); spin_unlock_irqrestore(&tx->lock, flags); @@ -493,7 +493,7 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, if (tsnep_tx_desc_available(tx) < (MAX_SKB_FRAGS + 1)) { /* ring can get full with next frame */ - netif_stop_queue(tx->adapter->netdev); + netif_stop_subqueue(tx->adapter->netdev, tx->queue_index); } spin_unlock_irqrestore(&tx->lock, flags); @@ -503,11 +503,14 @@ static netdev_tx_t tsnep_xmit_frame_ring(struct sk_buff *skb, static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) { + struct tsnep_tx_entry *entry; + struct netdev_queue *nq; unsigned long flags; int budget = 128; - struct tsnep_tx_entry *entry; - int count; int length; + int count; + + nq = netdev_get_tx_queue(tx->adapter->netdev, tx->queue_index); spin_lock_irqsave(&tx->lock, flags); @@ -564,8 +567,8 @@ static bool tsnep_tx_poll(struct tsnep_tx *tx, int napi_budget) } while (likely(budget)); if ((tsnep_tx_desc_available(tx) >= ((MAX_SKB_FRAGS + 1) * 2)) && - netif_queue_stopped(tx->adapter->netdev)) { - netif_wake_queue(tx->adapter->netdev); + netif_tx_queue_stopped(nq)) { + netif_tx_wake_queue(nq); } spin_unlock_irqrestore(&tx->lock, flags); diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c index 644f3c963730..2341597408d1 100644 --- a/drivers/net/ethernet/freescale/fec_main.c +++ b/drivers/net/ethernet/freescale/fec_main.c @@ -3191,7 +3191,7 @@ static void fec_enet_free_buffers(struct net_device *ndev) for (q = 0; q < fep->num_rx_queues; q++) { rxq = fep->rx_queue[q]; for (i = 0; i < rxq->bd.ring_size; i++) - page_pool_release_page(rxq->page_pool, rxq->rx_skb_info[i].page); + page_pool_put_full_page(rxq->page_pool, rxq->rx_skb_info[i].page, false); for (i = 0; i < XDP_STATS_TOTAL; i++) rxq->stats[i] = 0; diff --git a/drivers/net/ethernet/intel/iavf/iavf.h b/drivers/net/ethernet/intel/iavf/iavf.h index 0d1bab4ac1b0..2a9f1eeeb701 100644 --- a/drivers/net/ethernet/intel/iavf/iavf.h +++ b/drivers/net/ethernet/intel/iavf/iavf.h @@ -249,6 +249,7 @@ struct iavf_cloud_filter { /* board specific private data structure */ struct iavf_adapter { + struct workqueue_struct *wq; struct work_struct reset_task; struct work_struct adminq_task; struct delayed_work client_task; @@ -459,7 +460,6 @@ struct iavf_device { /* needed by iavf_ethtool.c */ extern char iavf_driver_name[]; -extern struct workqueue_struct *iavf_wq; static inline const char *iavf_state_str(enum iavf_state_t state) { diff --git a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c index d79ead5e8d0c..6f171d1d85b7 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_ethtool.c +++ b/drivers/net/ethernet/intel/iavf/iavf_ethtool.c @@ -532,7 +532,7 @@ static int iavf_set_priv_flags(struct net_device *netdev, u32 flags) if (changed_flags & IAVF_FLAG_LEGACY_RX) { if (netif_running(netdev)) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(iavf_wq, &adapter->reset_task); + queue_work(adapter->wq, &adapter->reset_task); } } @@ -672,7 +672,7 @@ static int iavf_set_ringparam(struct net_device *netdev, if (netif_running(netdev)) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(iavf_wq, &adapter->reset_task); + queue_work(adapter->wq, &adapter->reset_task); } return 0; @@ -1433,7 +1433,7 @@ static int iavf_add_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx adapter->aq_required |= IAVF_FLAG_AQ_ADD_FDIR_FILTER; spin_unlock_bh(&adapter->fdir_fltr_lock); - mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); ret: if (err && fltr) @@ -1474,7 +1474,7 @@ static int iavf_del_fdir_ethtool(struct iavf_adapter *adapter, struct ethtool_rx spin_unlock_bh(&adapter->fdir_fltr_lock); if (fltr && fltr->state == IAVF_FDIR_FLTR_DEL_REQUEST) - mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); return err; } @@ -1658,7 +1658,7 @@ iavf_set_adv_rss_hash_opt(struct iavf_adapter *adapter, spin_unlock_bh(&adapter->adv_rss_lock); if (!err) - mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); mutex_unlock(&adapter->crit_lock); diff --git a/drivers/net/ethernet/intel/iavf/iavf_main.c b/drivers/net/ethernet/intel/iavf/iavf_main.c index adc02adef83a..4b09785d2147 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_main.c +++ b/drivers/net/ethernet/intel/iavf/iavf_main.c @@ -49,7 +49,6 @@ MODULE_DESCRIPTION("Intel(R) Ethernet Adaptive Virtual Function Network Driver") MODULE_LICENSE("GPL v2"); static const struct net_device_ops iavf_netdev_ops; -struct workqueue_struct *iavf_wq; int iavf_status_to_errno(enum iavf_status status) { @@ -277,7 +276,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter) if (!(adapter->flags & (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED))) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(iavf_wq, &adapter->reset_task); + queue_work(adapter->wq, &adapter->reset_task); } } @@ -291,7 +290,7 @@ void iavf_schedule_reset(struct iavf_adapter *adapter) void iavf_schedule_request_stats(struct iavf_adapter *adapter) { adapter->aq_required |= IAVF_FLAG_AQ_REQUEST_STATS; - mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); } /** @@ -411,7 +410,7 @@ static irqreturn_t iavf_msix_aq(int irq, void *data) if (adapter->state != __IAVF_REMOVE) /* schedule work on the private workqueue */ - queue_work(iavf_wq, &adapter->adminq_task); + queue_work(adapter->wq, &adapter->adminq_task); return IRQ_HANDLED; } @@ -1034,7 +1033,7 @@ int iavf_replace_primary_mac(struct iavf_adapter *adapter, /* schedule the watchdog task to immediately process the request */ if (f) { - queue_work(iavf_wq, &adapter->watchdog_task.work); + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); return 0; } return -ENOMEM; @@ -1257,7 +1256,7 @@ static void iavf_up_complete(struct iavf_adapter *adapter) adapter->aq_required |= IAVF_FLAG_AQ_ENABLE_QUEUES; if (CLIENT_ENABLED(adapter)) adapter->flags |= IAVF_FLAG_CLIENT_NEEDS_OPEN; - mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); } /** @@ -1414,7 +1413,7 @@ void iavf_down(struct iavf_adapter *adapter) adapter->aq_required |= IAVF_FLAG_AQ_DISABLE_QUEUES; } - mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); } /** @@ -2248,7 +2247,7 @@ iavf_set_vlan_offload_features(struct iavf_adapter *adapter, if (aq_required) { adapter->aq_required |= aq_required; - mod_delayed_work(iavf_wq, &adapter->watchdog_task, 0); + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 0); } } @@ -2693,6 +2692,15 @@ static void iavf_watchdog_task(struct work_struct *work) goto restart_watchdog; } + if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES) && + adapter->netdev_registered && + !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section) && + rtnl_trylock()) { + netdev_update_features(adapter->netdev); + rtnl_unlock(); + adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; + } + if (adapter->flags & IAVF_FLAG_PF_COMMS_FAILED) iavf_change_state(adapter, __IAVF_COMM_FAILED); @@ -2700,7 +2708,7 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; mutex_unlock(&adapter->crit_lock); - queue_work(iavf_wq, &adapter->reset_task); + queue_work(adapter->wq, &adapter->reset_task); return; } @@ -2708,31 +2716,31 @@ static void iavf_watchdog_task(struct work_struct *work) case __IAVF_STARTUP: iavf_startup(adapter); mutex_unlock(&adapter->crit_lock); - queue_delayed_work(iavf_wq, &adapter->watchdog_task, + queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(30)); return; case __IAVF_INIT_VERSION_CHECK: iavf_init_version_check(adapter); mutex_unlock(&adapter->crit_lock); - queue_delayed_work(iavf_wq, &adapter->watchdog_task, + queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(30)); return; case __IAVF_INIT_GET_RESOURCES: iavf_init_get_resources(adapter); mutex_unlock(&adapter->crit_lock); - queue_delayed_work(iavf_wq, &adapter->watchdog_task, + queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(1)); return; case __IAVF_INIT_EXTENDED_CAPS: iavf_init_process_extended_caps(adapter); mutex_unlock(&adapter->crit_lock); - queue_delayed_work(iavf_wq, &adapter->watchdog_task, + queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(1)); return; case __IAVF_INIT_CONFIG_ADAPTER: iavf_init_config_adapter(adapter); mutex_unlock(&adapter->crit_lock); - queue_delayed_work(iavf_wq, &adapter->watchdog_task, + queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(1)); return; case __IAVF_INIT_FAILED: @@ -2751,14 +2759,14 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->flags |= IAVF_FLAG_PF_COMMS_FAILED; iavf_shutdown_adminq(hw); mutex_unlock(&adapter->crit_lock); - queue_delayed_work(iavf_wq, + queue_delayed_work(adapter->wq, &adapter->watchdog_task, (5 * HZ)); return; } /* Try again from failed step*/ iavf_change_state(adapter, adapter->last_state); mutex_unlock(&adapter->crit_lock); - queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ); return; case __IAVF_COMM_FAILED: if (test_bit(__IAVF_IN_REMOVE_TASK, @@ -2789,13 +2797,14 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; mutex_unlock(&adapter->crit_lock); - queue_delayed_work(iavf_wq, + queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(10)); return; case __IAVF_RESETTING: mutex_unlock(&adapter->crit_lock); - queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, + HZ * 2); return; case __IAVF_DOWN: case __IAVF_DOWN_PENDING: @@ -2834,9 +2843,9 @@ static void iavf_watchdog_task(struct work_struct *work) adapter->aq_required = 0; adapter->current_op = VIRTCHNL_OP_UNKNOWN; dev_err(&adapter->pdev->dev, "Hardware reset detected\n"); - queue_work(iavf_wq, &adapter->reset_task); + queue_work(adapter->wq, &adapter->reset_task); mutex_unlock(&adapter->crit_lock); - queue_delayed_work(iavf_wq, + queue_delayed_work(adapter->wq, &adapter->watchdog_task, HZ * 2); return; } @@ -2845,12 +2854,13 @@ static void iavf_watchdog_task(struct work_struct *work) mutex_unlock(&adapter->crit_lock); restart_watchdog: if (adapter->state >= __IAVF_DOWN) - queue_work(iavf_wq, &adapter->adminq_task); + queue_work(adapter->wq, &adapter->adminq_task); if (adapter->aq_required) - queue_delayed_work(iavf_wq, &adapter->watchdog_task, + queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(20)); else - queue_delayed_work(iavf_wq, &adapter->watchdog_task, HZ * 2); + queue_delayed_work(adapter->wq, &adapter->watchdog_task, + HZ * 2); } /** @@ -2952,7 +2962,7 @@ static void iavf_reset_task(struct work_struct *work) */ if (!mutex_trylock(&adapter->crit_lock)) { if (adapter->state != __IAVF_REMOVE) - queue_work(iavf_wq, &adapter->reset_task); + queue_work(adapter->wq, &adapter->reset_task); goto reset_finish; } @@ -3116,7 +3126,7 @@ continue_reset: bitmap_clear(adapter->vsi.active_cvlans, 0, VLAN_N_VID); bitmap_clear(adapter->vsi.active_svlans, 0, VLAN_N_VID); - mod_delayed_work(iavf_wq, &adapter->watchdog_task, 2); + mod_delayed_work(adapter->wq, &adapter->watchdog_task, 2); /* We were running when the reset started, so we need to restore some * state here. @@ -3208,7 +3218,7 @@ static void iavf_adminq_task(struct work_struct *work) if (adapter->state == __IAVF_REMOVE) return; - queue_work(iavf_wq, &adapter->adminq_task); + queue_work(adapter->wq, &adapter->adminq_task); goto out; } @@ -3232,24 +3242,6 @@ static void iavf_adminq_task(struct work_struct *work) } while (pending); mutex_unlock(&adapter->crit_lock); - if ((adapter->flags & IAVF_FLAG_SETUP_NETDEV_FEATURES)) { - if (adapter->netdev_registered || - !test_bit(__IAVF_IN_REMOVE_TASK, &adapter->crit_section)) { - struct net_device *netdev = adapter->netdev; - - rtnl_lock(); - netdev_update_features(netdev); - rtnl_unlock(); - /* Request VLAN offload settings */ - if (VLAN_V2_ALLOWED(adapter)) - iavf_set_vlan_offload_features - (adapter, 0, netdev->features); - - iavf_set_queue_vlan_tag_loc(adapter); - } - - adapter->flags &= ~IAVF_FLAG_SETUP_NETDEV_FEATURES; - } if ((adapter->flags & (IAVF_FLAG_RESET_PENDING | IAVF_FLAG_RESET_NEEDED)) || adapter->state == __IAVF_RESETTING) @@ -4349,7 +4341,7 @@ static int iavf_change_mtu(struct net_device *netdev, int new_mtu) if (netif_running(netdev)) { adapter->flags |= IAVF_FLAG_RESET_NEEDED; - queue_work(iavf_wq, &adapter->reset_task); + queue_work(adapter->wq, &adapter->reset_task); } return 0; @@ -4898,6 +4890,13 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) hw = &adapter->hw; hw->back = adapter; + adapter->wq = alloc_ordered_workqueue("%s", WQ_MEM_RECLAIM, + iavf_driver_name); + if (!adapter->wq) { + err = -ENOMEM; + goto err_alloc_wq; + } + adapter->msg_enable = BIT(DEFAULT_DEBUG_LEVEL_SHIFT) - 1; iavf_change_state(adapter, __IAVF_STARTUP); @@ -4942,7 +4941,7 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) INIT_WORK(&adapter->adminq_task, iavf_adminq_task); INIT_DELAYED_WORK(&adapter->watchdog_task, iavf_watchdog_task); INIT_DELAYED_WORK(&adapter->client_task, iavf_client_task); - queue_delayed_work(iavf_wq, &adapter->watchdog_task, + queue_delayed_work(adapter->wq, &adapter->watchdog_task, msecs_to_jiffies(5 * (pdev->devfn & 0x07))); /* Setup the wait queue for indicating transition to down status */ @@ -4954,6 +4953,8 @@ static int iavf_probe(struct pci_dev *pdev, const struct pci_device_id *ent) return 0; err_ioremap: + destroy_workqueue(adapter->wq); +err_alloc_wq: free_netdev(netdev); err_alloc_etherdev: pci_disable_pcie_error_reporting(pdev); @@ -5023,7 +5024,7 @@ static int __maybe_unused iavf_resume(struct device *dev_d) return err; } - queue_work(iavf_wq, &adapter->reset_task); + queue_work(adapter->wq, &adapter->reset_task); netif_device_attach(adapter->netdev); @@ -5170,6 +5171,8 @@ static void iavf_remove(struct pci_dev *pdev) } spin_unlock_bh(&adapter->adv_rss_lock); + destroy_workqueue(adapter->wq); + free_netdev(netdev); pci_disable_pcie_error_reporting(pdev); @@ -5196,24 +5199,11 @@ static struct pci_driver iavf_driver = { **/ static int __init iavf_init_module(void) { - int ret; - pr_info("iavf: %s\n", iavf_driver_string); pr_info("%s\n", iavf_copyright); - iavf_wq = alloc_workqueue("%s", WQ_UNBOUND | WQ_MEM_RECLAIM, 1, - iavf_driver_name); - if (!iavf_wq) { - pr_err("%s: Failed to create workqueue\n", iavf_driver_name); - return -ENOMEM; - } - - ret = pci_register_driver(&iavf_driver); - if (ret) - destroy_workqueue(iavf_wq); - - return ret; + return pci_register_driver(&iavf_driver); } module_init(iavf_init_module); @@ -5227,7 +5217,6 @@ module_init(iavf_init_module); static void __exit iavf_exit_module(void) { pci_unregister_driver(&iavf_driver); - destroy_workqueue(iavf_wq); } module_exit(iavf_exit_module); diff --git a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c index 24a701fd140e..365ca0c710c4 100644 --- a/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c +++ b/drivers/net/ethernet/intel/iavf/iavf_virtchnl.c @@ -1952,7 +1952,7 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, if (!(adapter->flags & IAVF_FLAG_RESET_PENDING)) { adapter->flags |= IAVF_FLAG_RESET_PENDING; dev_info(&adapter->pdev->dev, "Scheduling reset task\n"); - queue_work(iavf_wq, &adapter->reset_task); + queue_work(adapter->wq, &adapter->reset_task); } break; default: @@ -2226,6 +2226,14 @@ void iavf_virtchnl_completion(struct iavf_adapter *adapter, iavf_process_config(adapter); adapter->flags |= IAVF_FLAG_SETUP_NETDEV_FEATURES; + + /* Request VLAN offload settings */ + if (VLAN_V2_ALLOWED(adapter)) + iavf_set_vlan_offload_features(adapter, 0, + netdev->features); + + iavf_set_queue_vlan_tag_loc(adapter); + was_mac_changed = !ether_addr_equal(netdev->dev_addr, adapter->hw.mac.addr); diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c index 94aa834cd9a6..a596e07b3ce9 100644 --- a/drivers/net/ethernet/intel/ice/ice_lib.c +++ b/drivers/net/ethernet/intel/ice/ice_lib.c @@ -3235,9 +3235,6 @@ int ice_vsi_release(struct ice_vsi *vsi) } } - if (vsi->type == ICE_VSI_PF) - ice_devlink_destroy_pf_port(pf); - if (vsi->type == ICE_VSI_VF && vsi->agg_node && vsi->agg_node->valid) vsi->agg_node->num_vsis--; diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c index a9a7f8b52140..237ede2cffb0 100644 --- a/drivers/net/ethernet/intel/ice/ice_main.c +++ b/drivers/net/ethernet/intel/ice/ice_main.c @@ -4590,7 +4590,7 @@ static void ice_print_wake_reason(struct ice_pf *pf) } /** - * ice_register_netdev - register netdev and devlink port + * ice_register_netdev - register netdev * @pf: pointer to the PF struct */ static int ice_register_netdev(struct ice_pf *pf) @@ -4602,11 +4602,6 @@ static int ice_register_netdev(struct ice_pf *pf) if (!vsi || !vsi->netdev) return -EIO; - err = ice_devlink_create_pf_port(pf); - if (err) - goto err_devlink_create; - - SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); err = register_netdev(vsi->netdev); if (err) goto err_register_netdev; @@ -4617,8 +4612,6 @@ static int ice_register_netdev(struct ice_pf *pf) return 0; err_register_netdev: - ice_devlink_destroy_pf_port(pf); -err_devlink_create: free_netdev(vsi->netdev); vsi->netdev = NULL; clear_bit(ICE_VSI_NETDEV_ALLOCD, vsi->state); @@ -4636,6 +4629,7 @@ static int ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) { struct device *dev = &pdev->dev; + struct ice_vsi *vsi; struct ice_pf *pf; struct ice_hw *hw; int i, err; @@ -4918,6 +4912,18 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent) pcie_print_link_status(pf->pdev); probe_done: + err = ice_devlink_create_pf_port(pf); + if (err) + goto err_create_pf_port; + + vsi = ice_get_main_vsi(pf); + if (!vsi || !vsi->netdev) { + err = -EINVAL; + goto err_netdev_reg; + } + + SET_NETDEV_DEVLINK_PORT(vsi->netdev, &pf->devlink_port); + err = ice_register_netdev(pf); if (err) goto err_netdev_reg; @@ -4955,6 +4961,8 @@ err_init_aux_unroll: err_devlink_reg_param: ice_devlink_unregister_params(pf); err_netdev_reg: + ice_devlink_destroy_pf_port(pf); +err_create_pf_port: err_send_version_unroll: ice_vsi_release_all(pf); err_alloc_sw_unroll: @@ -5083,6 +5091,7 @@ static void ice_remove(struct pci_dev *pdev) ice_setup_mc_magic_wake(pf); ice_vsi_release_all(pf); mutex_destroy(&(&pf->hw)->fdir_fltr_lock); + ice_devlink_destroy_pf_port(pf); ice_set_wake(pf); ice_free_irq_msix_misc(pf); ice_for_each_vsi(pf, i) { diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index e708c2d04983..b144f2237748 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1259,13 +1259,20 @@ static int mana_gd_setup_irqs(struct pci_dev *pdev) gic->handler = NULL; gic->arg = NULL; + if (!i) + snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_hwc@pci:%s", + pci_name(pdev)); + else + snprintf(gic->name, MANA_IRQ_NAME_SZ, "mana_q%d@pci:%s", + i - 1, pci_name(pdev)); + irq = pci_irq_vector(pdev, i); if (irq < 0) { err = irq; goto free_mask; } - err = request_irq(irq, mana_gd_intr, 0, "mana_intr", gic); + err = request_irq(irq, mana_gd_intr, 0, gic->name, gic); if (err) goto free_mask; irq_set_affinity_and_hint(irq, req_mask); diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c index b4e0fc7f65bd..0f54849a3823 100644 --- a/drivers/net/ethernet/renesas/ravb_main.c +++ b/drivers/net/ethernet/renesas/ravb_main.c @@ -1101,14 +1101,14 @@ static void ravb_error_interrupt(struct net_device *ndev) ravb_write(ndev, ~(EIS_QFS | EIS_RESERVED), EIS); if (eis & EIS_QFS) { ris2 = ravb_read(ndev, RIS2); - ravb_write(ndev, ~(RIS2_QFF0 | RIS2_RFFF | RIS2_RESERVED), + ravb_write(ndev, ~(RIS2_QFF0 | RIS2_QFF1 | RIS2_RFFF | RIS2_RESERVED), RIS2); /* Receive Descriptor Empty int */ if (ris2 & RIS2_QFF0) priv->stats[RAVB_BE].rx_over_errors++; - /* Receive Descriptor Empty int */ + /* Receive Descriptor Empty int */ if (ris2 & RIS2_QFF1) priv->stats[RAVB_NC].rx_over_errors++; @@ -2973,6 +2973,9 @@ static int __maybe_unused ravb_suspend(struct device *dev) else ret = ravb_close(ndev); + if (priv->info->ccc_gac) + ravb_ptp_stop(ndev); + return ret; } @@ -3011,6 +3014,9 @@ static int __maybe_unused ravb_resume(struct device *dev) /* Restore descriptor base address table */ ravb_write(ndev, priv->desc_bat_dma, DBAT); + if (priv->info->ccc_gac) + ravb_ptp_init(ndev, priv->pdev); + if (netif_running(ndev)) { if (priv->wol_enabled) { ret = ravb_wol_restore(ndev); diff --git a/drivers/net/ethernet/renesas/rswitch.c b/drivers/net/ethernet/renesas/rswitch.c index 6441892636db..2370c7797a0a 100644 --- a/drivers/net/ethernet/renesas/rswitch.c +++ b/drivers/net/ethernet/renesas/rswitch.c @@ -1074,8 +1074,11 @@ static struct device_node *rswitch_get_port_node(struct rswitch_device *rdev) port = NULL; goto out; } - if (index == rdev->etha->index) + if (index == rdev->etha->index) { + if (!of_device_is_available(port)) + port = NULL; break; + } } out: @@ -1106,7 +1109,7 @@ static int rswitch_etha_get_params(struct rswitch_device *rdev) port = rswitch_get_port_node(rdev); if (!port) - return -ENODEV; + return 0; /* ignored */ err = of_get_phy_mode(port, &rdev->etha->phy_interface); of_node_put(port); @@ -1324,13 +1327,13 @@ static int rswitch_ether_port_init_all(struct rswitch_private *priv) { int i, err; - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { + rswitch_for_each_enabled_port(priv, i) { err = rswitch_ether_port_init_one(priv->rdev[i]); if (err) goto err_init_one; } - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { + rswitch_for_each_enabled_port(priv, i) { err = rswitch_serdes_init(priv->rdev[i]); if (err) goto err_serdes; @@ -1339,12 +1342,12 @@ static int rswitch_ether_port_init_all(struct rswitch_private *priv) return 0; err_serdes: - for (i--; i >= 0; i--) + rswitch_for_each_enabled_port_continue_reverse(priv, i) rswitch_serdes_deinit(priv->rdev[i]); i = RSWITCH_NUM_PORTS; err_init_one: - for (i--; i >= 0; i--) + rswitch_for_each_enabled_port_continue_reverse(priv, i) rswitch_ether_port_deinit_one(priv->rdev[i]); return err; @@ -1608,6 +1611,7 @@ static int rswitch_device_alloc(struct rswitch_private *priv, int index) netif_napi_add(ndev, &rdev->napi, rswitch_poll); port = rswitch_get_port_node(rdev); + rdev->disabled = !port; err = of_get_ethdev_address(port, ndev); of_node_put(port); if (err) { @@ -1707,16 +1711,16 @@ static int rswitch_init(struct rswitch_private *priv) if (err) goto err_ether_port_init_all; - for (i = 0; i < RSWITCH_NUM_PORTS; i++) { + rswitch_for_each_enabled_port(priv, i) { err = register_netdev(priv->rdev[i]->ndev); if (err) { - for (i--; i >= 0; i--) + rswitch_for_each_enabled_port_continue_reverse(priv, i) unregister_netdev(priv->rdev[i]->ndev); goto err_register_netdev; } } - for (i = 0; i < RSWITCH_NUM_PORTS; i++) + rswitch_for_each_enabled_port(priv, i) netdev_info(priv->rdev[i]->ndev, "MAC address %pM\n", priv->rdev[i]->ndev->dev_addr); diff --git a/drivers/net/ethernet/renesas/rswitch.h b/drivers/net/ethernet/renesas/rswitch.h index edbdd1b98d3d..49efb0f31c77 100644 --- a/drivers/net/ethernet/renesas/rswitch.h +++ b/drivers/net/ethernet/renesas/rswitch.h @@ -13,6 +13,17 @@ #define RSWITCH_MAX_NUM_QUEUES 128 #define RSWITCH_NUM_PORTS 3 +#define rswitch_for_each_enabled_port(priv, i) \ + for (i = 0; i < RSWITCH_NUM_PORTS; i++) \ + if (priv->rdev[i]->disabled) \ + continue; \ + else + +#define rswitch_for_each_enabled_port_continue_reverse(priv, i) \ + for (i--; i >= 0; i--) \ + if (priv->rdev[i]->disabled) \ + continue; \ + else #define TX_RING_SIZE 1024 #define RX_RING_SIZE 1024 @@ -938,6 +949,7 @@ struct rswitch_device { struct rswitch_gwca_queue *tx_queue; struct rswitch_gwca_queue *rx_queue; u8 ts_tag; + bool disabled; int port; struct rswitch_etha *etha; diff --git a/drivers/net/mdio/mdio-mux-meson-g12a.c b/drivers/net/mdio/mdio-mux-meson-g12a.c index 4a2e94faf57e..c4542ecf5623 100644 --- a/drivers/net/mdio/mdio-mux-meson-g12a.c +++ b/drivers/net/mdio/mdio-mux-meson-g12a.c @@ -4,6 +4,7 @@ */ #include <linux/bitfield.h> +#include <linux/delay.h> #include <linux/clk.h> #include <linux/clk-provider.h> #include <linux/device.h> @@ -150,6 +151,7 @@ static const struct clk_ops g12a_ephy_pll_ops = { static int g12a_enable_internal_mdio(struct g12a_mdio_mux *priv) { + u32 value; int ret; /* Enable the phy clock */ @@ -163,18 +165,25 @@ static int g12a_enable_internal_mdio(struct g12a_mdio_mux *priv) /* Initialize ephy control */ writel(EPHY_G12A_ID, priv->regs + ETH_PHY_CNTL0); - writel(FIELD_PREP(PHY_CNTL1_ST_MODE, 3) | - FIELD_PREP(PHY_CNTL1_ST_PHYADD, EPHY_DFLT_ADD) | - FIELD_PREP(PHY_CNTL1_MII_MODE, EPHY_MODE_RMII) | - PHY_CNTL1_CLK_EN | - PHY_CNTL1_CLKFREQ | - PHY_CNTL1_PHY_ENB, - priv->regs + ETH_PHY_CNTL1); + + /* Make sure we get a 0 -> 1 transition on the enable bit */ + value = FIELD_PREP(PHY_CNTL1_ST_MODE, 3) | + FIELD_PREP(PHY_CNTL1_ST_PHYADD, EPHY_DFLT_ADD) | + FIELD_PREP(PHY_CNTL1_MII_MODE, EPHY_MODE_RMII) | + PHY_CNTL1_CLK_EN | + PHY_CNTL1_CLKFREQ; + writel(value, priv->regs + ETH_PHY_CNTL1); writel(PHY_CNTL2_USE_INTERNAL | PHY_CNTL2_SMI_SRC_MAC | PHY_CNTL2_RX_CLK_EPHY, priv->regs + ETH_PHY_CNTL2); + value |= PHY_CNTL1_PHY_ENB; + writel(value, priv->regs + ETH_PHY_CNTL1); + + /* The phy needs a bit of time to power up */ + mdelay(10); + return 0; } diff --git a/include/net/mana/gdma.h b/include/net/mana/gdma.h index b3ba04615caa..56189e4252da 100644 --- a/include/net/mana/gdma.h +++ b/include/net/mana/gdma.h @@ -336,9 +336,12 @@ struct gdma_queue_spec { }; }; +#define MANA_IRQ_NAME_SZ 32 + struct gdma_irq_context { void (*handler)(void *arg); void *arg; + char name[MANA_IRQ_NAME_SZ]; }; struct gdma_context { diff --git a/include/uapi/linux/netfilter/nf_conntrack_sctp.h b/include/uapi/linux/netfilter/nf_conntrack_sctp.h index c742469afe21..2d6f80d75ae7 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_sctp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_sctp.h @@ -15,8 +15,7 @@ enum sctp_conntrack { SCTP_CONNTRACK_SHUTDOWN_RECD, SCTP_CONNTRACK_SHUTDOWN_ACK_SENT, SCTP_CONNTRACK_HEARTBEAT_SENT, - SCTP_CONNTRACK_HEARTBEAT_ACKED, - SCTP_CONNTRACK_DATA_SENT, + SCTP_CONNTRACK_HEARTBEAT_ACKED, /* no longer used */ SCTP_CONNTRACK_MAX }; diff --git a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h index 94e74034706d..aa805e6d4e28 100644 --- a/include/uapi/linux/netfilter/nfnetlink_cttimeout.h +++ b/include/uapi/linux/netfilter/nfnetlink_cttimeout.h @@ -94,8 +94,7 @@ enum ctattr_timeout_sctp { CTA_TIMEOUT_SCTP_SHUTDOWN_RECD, CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, CTA_TIMEOUT_SCTP_HEARTBEAT_SENT, - CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, - CTA_TIMEOUT_SCTP_DATA_SENT, + CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED, /* no longer used */ __CTA_TIMEOUT_SCTP_MAX }; #define CTA_TIMEOUT_SCTP_MAX (__CTA_TIMEOUT_SCTP_MAX - 1) diff --git a/lib/nlattr.c b/lib/nlattr.c index 9055e8b4d144..489e15bde5c1 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -10,6 +10,7 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/jiffies.h> +#include <linux/nospec.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/types.h> @@ -381,6 +382,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype, if (type <= 0 || type > maxtype) return 0; + type = array_index_nospec(type, maxtype + 1); pt = &policy[type]; BUG_ON(pt->type > NLA_TYPE_MAX); @@ -596,6 +598,7 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype, } continue; } + type = array_index_nospec(type, maxtype + 1); if (policy) { int err = validate_nla(nla, maxtype, policy, validate, extack, depth); diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index 5581d22cc191..078a0a420c8a 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -137,12 +137,12 @@ static int ops_init(const struct pernet_operations *ops, struct net *net) return 0; if (ops->id && ops->size) { -cleanup: ng = rcu_dereference_protected(net->gen, lockdep_is_held(&pernet_ops_rwsem)); ng->ptr[*ops->id] = NULL; } +cleanup: kfree(data); out: diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c index ce9ff3c62e84..3bb890a40ed7 100644 --- a/net/ipv4/fib_semantics.c +++ b/net/ipv4/fib_semantics.c @@ -30,6 +30,7 @@ #include <linux/slab.h> #include <linux/netlink.h> #include <linux/hash.h> +#include <linux/nospec.h> #include <net/arp.h> #include <net/inet_dscp.h> @@ -1022,6 +1023,7 @@ bool fib_metrics_match(struct fib_config *cfg, struct fib_info *fi) if (type > RTAX_MAX) return false; + type = array_index_nospec(type, RTAX_MAX + 1); if (type == RTAX_CC_ALGO) { char tmp[TCP_CA_NAME_MAX]; bool ecn_ca = false; diff --git a/net/ipv4/metrics.c b/net/ipv4/metrics.c index 7fcfdfd8f9de..0e3ee1532848 100644 --- a/net/ipv4/metrics.c +++ b/net/ipv4/metrics.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0-only #include <linux/netlink.h> +#include <linux/nospec.h> #include <linux/rtnetlink.h> #include <linux/types.h> #include <net/ip.h> @@ -25,6 +26,7 @@ static int ip_metrics_convert(struct net *net, struct nlattr *fc_mx, return -EINVAL; } + type = array_index_nospec(type, RTAX_MAX + 1); if (type == RTAX_CC_ALGO) { char tmp[TCP_CA_NAME_MAX]; diff --git a/net/ipv6/ip6_output.c b/net/ipv6/ip6_output.c index 60fd91bb5171..c314fdde0097 100644 --- a/net/ipv6/ip6_output.c +++ b/net/ipv6/ip6_output.c @@ -547,7 +547,20 @@ int ip6_forward(struct sk_buff *skb) pneigh_lookup(&nd_tbl, net, &hdr->daddr, skb->dev, 0)) { int proxied = ip6_forward_proxy_check(skb); if (proxied > 0) { - hdr->hop_limit--; + /* It's tempting to decrease the hop limit + * here by 1, as we do at the end of the + * function too. + * + * But that would be incorrect, as proxying is + * not forwarding. The ip6_input function + * will handle this packet locally, and it + * depends on the hop limit being unchanged. + * + * One example is the NDP hop limit, that + * always has to stay 255, but other would be + * similar checks around RA packets, where the + * user can even change the desired limit. + */ return ip6_input(skb); } else if (proxied < 0) { __IP6_INC_STATS(net, idev, IPSTATS_MIB_INDISCARDS); diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index fc9e728b6333..45bbe3e54cc2 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -544,9 +544,6 @@ static int mctp_sk_init(struct sock *sk) static void mctp_sk_close(struct sock *sk, long timeout) { - struct mctp_sock *msk = container_of(sk, struct mctp_sock, sk); - - del_timer_sync(&msk->key_expiry); sk_common_release(sk); } @@ -580,7 +577,14 @@ static void mctp_sk_unhash(struct sock *sk) spin_lock_irqsave(&key->lock, fl2); __mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_CLOSED); } + sock_set_flag(sk, SOCK_DEAD); spin_unlock_irqrestore(&net->mctp.keys_lock, flags); + + /* Since there are no more tag allocations (we have removed all of the + * keys), stop any pending expiry events. the timer cannot be re-queued + * as the sk is no longer observable + */ + del_timer_sync(&msk->key_expiry); } static struct proto mctp_proto = { diff --git a/net/mctp/route.c b/net/mctp/route.c index f9a80b82dc51..f51a05ec7162 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -147,6 +147,7 @@ static struct mctp_sk_key *mctp_key_alloc(struct mctp_sock *msk, key->valid = true; spin_lock_init(&key->lock); refcount_set(&key->refs, 1); + sock_hold(key->sk); return key; } @@ -165,6 +166,7 @@ void mctp_key_unref(struct mctp_sk_key *key) mctp_dev_release_key(key->dev, key); spin_unlock_irqrestore(&key->lock, flags); + sock_put(key->sk); kfree(key); } @@ -177,6 +179,11 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) spin_lock_irqsave(&net->mctp.keys_lock, flags); + if (sock_flag(&msk->sk, SOCK_DEAD)) { + rc = -EINVAL; + goto out_unlock; + } + hlist_for_each_entry(tmp, &net->mctp.keys, hlist) { if (mctp_key_match(tmp, key->local_addr, key->peer_addr, key->tag)) { @@ -198,6 +205,7 @@ static int mctp_key_add(struct mctp_sk_key *key, struct mctp_sock *msk) hlist_add_head(&key->sklist, &msk->keys); } +out_unlock: spin_unlock_irqrestore(&net->mctp.keys_lock, flags); return rc; @@ -315,8 +323,8 @@ static int mctp_frag_queue(struct mctp_sk_key *key, struct sk_buff *skb) static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) { + struct mctp_sk_key *key, *any_key = NULL; struct net *net = dev_net(skb->dev); - struct mctp_sk_key *key; struct mctp_sock *msk; struct mctp_hdr *mh; unsigned long f; @@ -361,13 +369,11 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * key for reassembly - we'll create a more specific * one for future packets if required (ie, !EOM). */ - key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f); - if (key) { - msk = container_of(key->sk, + any_key = mctp_lookup_key(net, skb, MCTP_ADDR_ANY, &f); + if (any_key) { + msk = container_of(any_key->sk, struct mctp_sock, sk); - spin_unlock_irqrestore(&key->lock, f); - mctp_key_unref(key); - key = NULL; + spin_unlock_irqrestore(&any_key->lock, f); } } @@ -419,14 +425,14 @@ static int mctp_route_input(struct mctp_route *route, struct sk_buff *skb) * this function. */ rc = mctp_key_add(key, msk); - if (rc) { - kfree(key); - } else { + if (!rc) trace_mctp_key_acquire(key); - /* we don't need to release key->lock on exit */ - mctp_key_unref(key); - } + /* we don't need to release key->lock on exit, so + * clean up here and suppress the unlock via + * setting to NULL + */ + mctp_key_unref(key); key = NULL; } else { @@ -473,6 +479,8 @@ out_unlock: spin_unlock_irqrestore(&key->lock, f); mctp_key_unref(key); } + if (any_key) + mctp_key_unref(any_key); out: if (rc) kfree_skb(skb); diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c index d88b92a8ffca..945dd40e7077 100644 --- a/net/netfilter/nf_conntrack_proto_sctp.c +++ b/net/netfilter/nf_conntrack_proto_sctp.c @@ -27,22 +27,16 @@ #include <net/netfilter/nf_conntrack_ecache.h> #include <net/netfilter/nf_conntrack_timeout.h> -/* FIXME: Examine ipfilter's timeouts and conntrack transitions more - closely. They're more complex. --RR - - And so for me for SCTP :D -Kiran */ - static const char *const sctp_conntrack_names[] = { - "NONE", - "CLOSED", - "COOKIE_WAIT", - "COOKIE_ECHOED", - "ESTABLISHED", - "SHUTDOWN_SENT", - "SHUTDOWN_RECD", - "SHUTDOWN_ACK_SENT", - "HEARTBEAT_SENT", - "HEARTBEAT_ACKED", + [SCTP_CONNTRACK_NONE] = "NONE", + [SCTP_CONNTRACK_CLOSED] = "CLOSED", + [SCTP_CONNTRACK_COOKIE_WAIT] = "COOKIE_WAIT", + [SCTP_CONNTRACK_COOKIE_ECHOED] = "COOKIE_ECHOED", + [SCTP_CONNTRACK_ESTABLISHED] = "ESTABLISHED", + [SCTP_CONNTRACK_SHUTDOWN_SENT] = "SHUTDOWN_SENT", + [SCTP_CONNTRACK_SHUTDOWN_RECD] = "SHUTDOWN_RECD", + [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = "SHUTDOWN_ACK_SENT", + [SCTP_CONNTRACK_HEARTBEAT_SENT] = "HEARTBEAT_SENT", }; #define SECS * HZ @@ -54,13 +48,11 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { [SCTP_CONNTRACK_CLOSED] = 10 SECS, [SCTP_CONNTRACK_COOKIE_WAIT] = 3 SECS, [SCTP_CONNTRACK_COOKIE_ECHOED] = 3 SECS, - [SCTP_CONNTRACK_ESTABLISHED] = 5 DAYS, + [SCTP_CONNTRACK_ESTABLISHED] = 210 SECS, [SCTP_CONNTRACK_SHUTDOWN_SENT] = 300 SECS / 1000, [SCTP_CONNTRACK_SHUTDOWN_RECD] = 300 SECS / 1000, [SCTP_CONNTRACK_SHUTDOWN_ACK_SENT] = 3 SECS, [SCTP_CONNTRACK_HEARTBEAT_SENT] = 30 SECS, - [SCTP_CONNTRACK_HEARTBEAT_ACKED] = 210 SECS, - [SCTP_CONNTRACK_DATA_SENT] = 30 SECS, }; #define SCTP_FLAG_HEARTBEAT_VTAG_FAILED 1 @@ -74,8 +66,6 @@ static const unsigned int sctp_timeouts[SCTP_CONNTRACK_MAX] = { #define sSR SCTP_CONNTRACK_SHUTDOWN_RECD #define sSA SCTP_CONNTRACK_SHUTDOWN_ACK_SENT #define sHS SCTP_CONNTRACK_HEARTBEAT_SENT -#define sHA SCTP_CONNTRACK_HEARTBEAT_ACKED -#define sDS SCTP_CONNTRACK_DATA_SENT #define sIV SCTP_CONNTRACK_MAX /* @@ -98,10 +88,6 @@ SHUTDOWN_ACK_SENT - We have seen a SHUTDOWN_ACK chunk in the direction opposite CLOSED - We have seen a SHUTDOWN_COMPLETE chunk in the direction of the SHUTDOWN chunk. Connection is closed. HEARTBEAT_SENT - We have seen a HEARTBEAT in a new flow. -HEARTBEAT_ACKED - We have seen a HEARTBEAT-ACK/DATA/SACK in the direction - opposite to that of the HEARTBEAT/DATA chunk. Secondary connection - is established. -DATA_SENT - We have seen a DATA/SACK in a new flow. */ /* TODO @@ -115,38 +101,36 @@ cookie echoed to closed. */ /* SCTP conntrack state transitions */ -static const u8 sctp_conntracks[2][12][SCTP_CONNTRACK_MAX] = { +static const u8 sctp_conntracks[2][11][SCTP_CONNTRACK_MAX] = { { /* ORIGINAL */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */ -/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW, sHA, sCW}, -/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL}, -/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, -/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL, sSS, sCL}, -/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA, sHA, sSA}, -/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't have Stale cookie*/ -/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* 5.2.4 - Big TODO */ -/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL, sHA, sCL},/* Can't come in orig dir */ -/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL, sHA, sCL}, -/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS}, -/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS}, -/* data/sack */ {sDS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS} +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ +/* init */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCW}, +/* init_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL}, +/* abort */ {sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sCL}, +/* shutdown */ {sCL, sCL, sCW, sCE, sSS, sSS, sSR, sSA, sCL}, +/* shutdown_ack */ {sSA, sCL, sCW, sCE, sES, sSA, sSA, sSA, sSA}, +/* error */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't have Stale cookie*/ +/* cookie_echo */ {sCL, sCL, sCE, sCE, sES, sSS, sSR, sSA, sCL},/* 5.2.4 - Big TODO */ +/* cookie_ack */ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sCL},/* Can't come in orig dir */ +/* shutdown_comp*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sCL, sCL}, +/* heartbeat */ {sHS, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, +/* heartbeat_ack*/ {sCL, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, }, { /* REPLY */ -/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sDS */ -/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* INIT in sCL Big TODO */ -/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV}, -/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV, sCL, sIV}, -/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV, sSR, sIV}, -/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV, sHA, sIV}, -/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV, sHA, sIV}, -/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV, sHA, sIV},/* Can't come in reply dir */ -/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV, sHA, sIV}, -/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV, sHA, sIV}, -/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS, sHA, sHA}, -/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA}, -/* data/sack */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHA, sHA, sHA}, +/* sNO, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS */ +/* init */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* INIT in sCL Big TODO */ +/* init_ack */ {sIV, sCW, sCW, sCE, sES, sSS, sSR, sSA, sIV}, +/* abort */ {sIV, sCL, sCL, sCL, sCL, sCL, sCL, sCL, sIV}, +/* shutdown */ {sIV, sCL, sCW, sCE, sSR, sSS, sSR, sSA, sIV}, +/* shutdown_ack */ {sIV, sCL, sCW, sCE, sES, sSA, sSA, sSA, sIV}, +/* error */ {sIV, sCL, sCW, sCL, sES, sSS, sSR, sSA, sIV}, +/* cookie_echo */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sIV},/* Can't come in reply dir */ +/* cookie_ack */ {sIV, sCL, sCW, sES, sES, sSS, sSR, sSA, sIV}, +/* shutdown_comp*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sCL, sIV}, +/* heartbeat */ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sHS}, +/* heartbeat_ack*/ {sIV, sCL, sCW, sCE, sES, sSS, sSR, sSA, sES}, } }; @@ -160,8 +144,8 @@ static void sctp_print_conntrack(struct seq_file *s, struct nf_conn *ct) #define for_each_sctp_chunk(skb, sch, _sch, offset, dataoff, count) \ for ((offset) = (dataoff) + sizeof(struct sctphdr), (count) = 0; \ - (offset) < (skb)->len && \ - ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))); \ + ((sch) = skb_header_pointer((skb), (offset), sizeof(_sch), &(_sch))) && \ + (sch)->length; \ (offset) += (ntohs((sch)->length) + 3) & ~3, (count)++) /* Some validity checks to make sure the chunks are fine */ @@ -258,11 +242,6 @@ static int sctp_new_state(enum ip_conntrack_dir dir, pr_debug("SCTP_CID_HEARTBEAT_ACK"); i = 10; break; - case SCTP_CID_DATA: - case SCTP_CID_SACK: - pr_debug("SCTP_CID_DATA/SACK"); - i = 11; - break; default: /* Other chunks like DATA or SACK do not change the state */ pr_debug("Unknown chunk type, Will stay in %s\n", @@ -316,9 +295,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb, ih->init_tag); ct->proto.sctp.vtag[IP_CT_DIR_REPLY] = ih->init_tag; - } else if (sch->type == SCTP_CID_HEARTBEAT || - sch->type == SCTP_CID_DATA || - sch->type == SCTP_CID_SACK) { + } else if (sch->type == SCTP_CID_HEARTBEAT) { pr_debug("Setting vtag %x for secondary conntrack\n", sh->vtag); ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag; @@ -404,19 +381,19 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, if (!sctp_new(ct, skb, sh, dataoff)) return -NF_ACCEPT; - } else { - /* Check the verification tag (Sec 8.5) */ - if (!test_bit(SCTP_CID_INIT, map) && - !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) && - !test_bit(SCTP_CID_COOKIE_ECHO, map) && - !test_bit(SCTP_CID_ABORT, map) && - !test_bit(SCTP_CID_SHUTDOWN_ACK, map) && - !test_bit(SCTP_CID_HEARTBEAT, map) && - !test_bit(SCTP_CID_HEARTBEAT_ACK, map) && - sh->vtag != ct->proto.sctp.vtag[dir]) { - pr_debug("Verification tag check failed\n"); - goto out; - } + } + + /* Check the verification tag (Sec 8.5) */ + if (!test_bit(SCTP_CID_INIT, map) && + !test_bit(SCTP_CID_SHUTDOWN_COMPLETE, map) && + !test_bit(SCTP_CID_COOKIE_ECHO, map) && + !test_bit(SCTP_CID_ABORT, map) && + !test_bit(SCTP_CID_SHUTDOWN_ACK, map) && + !test_bit(SCTP_CID_HEARTBEAT, map) && + !test_bit(SCTP_CID_HEARTBEAT_ACK, map) && + sh->vtag != ct->proto.sctp.vtag[dir]) { + pr_debug("Verification tag check failed\n"); + goto out; } old_state = new_state = SCTP_CONNTRACK_NONE; @@ -424,22 +401,29 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, for_each_sctp_chunk (skb, sch, _sch, offset, dataoff, count) { /* Special cases of Verification tag check (Sec 8.5.1) */ if (sch->type == SCTP_CID_INIT) { - /* Sec 8.5.1 (A) */ + /* (A) vtag MUST be zero */ if (sh->vtag != 0) goto out_unlock; } else if (sch->type == SCTP_CID_ABORT) { - /* Sec 8.5.1 (B) */ - if (sh->vtag != ct->proto.sctp.vtag[dir] && - sh->vtag != ct->proto.sctp.vtag[!dir]) + /* (B) vtag MUST match own vtag if T flag is unset OR + * MUST match peer's vtag if T flag is set + */ + if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[dir]) || + ((sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_SHUTDOWN_COMPLETE) { - /* Sec 8.5.1 (C) */ - if (sh->vtag != ct->proto.sctp.vtag[dir] && - sh->vtag != ct->proto.sctp.vtag[!dir] && - sch->flags & SCTP_CHUNK_FLAG_T) + /* (C) vtag MUST match own vtag if T flag is unset OR + * MUST match peer's vtag if T flag is set + */ + if ((!(sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[dir]) || + ((sch->flags & SCTP_CHUNK_FLAG_T) && + sh->vtag != ct->proto.sctp.vtag[!dir])) goto out_unlock; } else if (sch->type == SCTP_CID_COOKIE_ECHO) { - /* Sec 8.5.1 (D) */ + /* (D) vtag must be same as init_vtag as found in INIT_ACK */ if (sh->vtag != ct->proto.sctp.vtag[dir]) goto out_unlock; } else if (sch->type == SCTP_CID_HEARTBEAT) { @@ -476,11 +460,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, } else if (ct->proto.sctp.flags & SCTP_FLAG_HEARTBEAT_VTAG_FAILED) { ct->proto.sctp.flags &= ~SCTP_FLAG_HEARTBEAT_VTAG_FAILED; } - } else if (sch->type == SCTP_CID_DATA || sch->type == SCTP_CID_SACK) { - if (ct->proto.sctp.vtag[dir] == 0) { - pr_debug("Setting vtag %x for dir %d\n", sh->vtag, dir); - ct->proto.sctp.vtag[dir] = sh->vtag; - } } old_state = ct->proto.sctp.state; @@ -518,8 +497,12 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, } ct->proto.sctp.state = new_state; - if (old_state != new_state) + if (old_state != new_state) { nf_conntrack_event_cache(IPCT_PROTOINFO, ct); + if (new_state == SCTP_CONNTRACK_ESTABLISHED && + !test_and_set_bit(IPS_ASSURED_BIT, &ct->status)) + nf_conntrack_event_cache(IPCT_ASSURED, ct); + } } spin_unlock_bh(&ct->lock); @@ -533,14 +516,6 @@ int nf_conntrack_sctp_packet(struct nf_conn *ct, nf_ct_refresh_acct(ct, ctinfo, skb, timeouts[new_state]); - if (old_state == SCTP_CONNTRACK_COOKIE_ECHOED && - dir == IP_CT_DIR_REPLY && - new_state == SCTP_CONNTRACK_ESTABLISHED) { - pr_debug("Setting assured bit\n"); - set_bit(IPS_ASSURED_BIT, &ct->status); - nf_conntrack_event_cache(IPCT_ASSURED, ct); - } - return NF_ACCEPT; out_unlock: @@ -701,7 +676,6 @@ sctp_timeout_nla_policy[CTA_TIMEOUT_SCTP_MAX+1] = { [CTA_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_HEARTBEAT_SENT] = { .type = NLA_U32 }, [CTA_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { .type = NLA_U32 }, - [CTA_TIMEOUT_SCTP_DATA_SENT] = { .type = NLA_U32 }, }; #endif /* CONFIG_NF_CONNTRACK_TIMEOUT */ diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 0250725e38a4..460294bd4b60 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -601,8 +601,6 @@ enum nf_ct_sysctl_index { NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_RECD, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_SHUTDOWN_ACK_SENT, NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_SENT, - NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED, - NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT, #endif #ifdef CONFIG_NF_CT_PROTO_DCCP NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST, @@ -887,18 +885,6 @@ static struct ctl_table nf_ct_sysctl_table[] = { .mode = 0644, .proc_handler = proc_dointvec_jiffies, }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_HEARTBEAT_ACKED] = { - .procname = "nf_conntrack_sctp_timeout_heartbeat_acked", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, - [NF_SYSCTL_CT_PROTO_TIMEOUT_SCTP_DATA_SENT] = { - .procname = "nf_conntrack_sctp_timeout_data_sent", - .maxlen = sizeof(unsigned int), - .mode = 0644, - .proc_handler = proc_dointvec_jiffies, - }, #endif #ifdef CONFIG_NF_CT_PROTO_DCCP [NF_SYSCTL_CT_PROTO_TIMEOUT_DCCP_REQUEST] = { @@ -1042,8 +1028,6 @@ static void nf_conntrack_standalone_init_sctp_sysctl(struct net *net, XASSIGN(SHUTDOWN_RECD, sn); XASSIGN(SHUTDOWN_ACK_SENT, sn); XASSIGN(HEARTBEAT_SENT, sn); - XASSIGN(HEARTBEAT_ACKED, sn); - XASSIGN(DATA_SENT, sn); #undef XASSIGN #endif } diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c index 7325bee7d144..19ea4d3c3553 100644 --- a/net/netfilter/nft_set_rbtree.c +++ b/net/netfilter/nft_set_rbtree.c @@ -38,10 +38,12 @@ static bool nft_rbtree_interval_start(const struct nft_rbtree_elem *rbe) return !nft_rbtree_interval_end(rbe); } -static bool nft_rbtree_equal(const struct nft_set *set, const void *this, - const struct nft_rbtree_elem *interval) +static int nft_rbtree_cmp(const struct nft_set *set, + const struct nft_rbtree_elem *e1, + const struct nft_rbtree_elem *e2) { - return memcmp(this, nft_set_ext_key(&interval->ext), set->klen) == 0; + return memcmp(nft_set_ext_key(&e1->ext), nft_set_ext_key(&e2->ext), + set->klen); } static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set, @@ -52,7 +54,6 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set const struct nft_rbtree_elem *rbe, *interval = NULL; u8 genmask = nft_genmask_cur(net); const struct rb_node *parent; - const void *this; int d; parent = rcu_dereference_raw(priv->root.rb_node); @@ -62,12 +63,11 @@ static bool __nft_rbtree_lookup(const struct net *net, const struct nft_set *set rbe = rb_entry(parent, struct nft_rbtree_elem, node); - this = nft_set_ext_key(&rbe->ext); - d = memcmp(this, key, set->klen); + d = memcmp(nft_set_ext_key(&rbe->ext), key, set->klen); if (d < 0) { parent = rcu_dereference_raw(parent->rb_left); if (interval && - nft_rbtree_equal(set, this, interval) && + !nft_rbtree_cmp(set, rbe, interval) && nft_rbtree_interval_end(rbe) && nft_rbtree_interval_start(interval)) continue; @@ -215,154 +215,216 @@ static void *nft_rbtree_get(const struct net *net, const struct nft_set *set, return rbe; } +static int nft_rbtree_gc_elem(const struct nft_set *__set, + struct nft_rbtree *priv, + struct nft_rbtree_elem *rbe) +{ + struct nft_set *set = (struct nft_set *)__set; + struct rb_node *prev = rb_prev(&rbe->node); + struct nft_rbtree_elem *rbe_prev; + struct nft_set_gc_batch *gcb; + + gcb = nft_set_gc_batch_check(set, NULL, GFP_ATOMIC); + if (!gcb) + return -ENOMEM; + + /* search for expired end interval coming before this element. */ + do { + rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node); + if (nft_rbtree_interval_end(rbe_prev)) + break; + + prev = rb_prev(prev); + } while (prev != NULL); + + rb_erase(&rbe_prev->node, &priv->root); + rb_erase(&rbe->node, &priv->root); + atomic_sub(2, &set->nelems); + + nft_set_gc_batch_add(gcb, rbe); + nft_set_gc_batch_complete(gcb); + + return 0; +} + +static bool nft_rbtree_update_first(const struct nft_set *set, + struct nft_rbtree_elem *rbe, + struct rb_node *first) +{ + struct nft_rbtree_elem *first_elem; + + first_elem = rb_entry(first, struct nft_rbtree_elem, node); + /* this element is closest to where the new element is to be inserted: + * update the first element for the node list path. + */ + if (nft_rbtree_cmp(set, rbe, first_elem) < 0) + return true; + + return false; +} + static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set, struct nft_rbtree_elem *new, struct nft_set_ext **ext) { - bool overlap = false, dup_end_left = false, dup_end_right = false; + struct nft_rbtree_elem *rbe, *rbe_le = NULL, *rbe_ge = NULL; + struct rb_node *node, *parent, **p, *first = NULL; struct nft_rbtree *priv = nft_set_priv(set); u8 genmask = nft_genmask_next(net); - struct nft_rbtree_elem *rbe; - struct rb_node *parent, **p; - int d; + int d, err; - /* Detect overlaps as we descend the tree. Set the flag in these cases: - * - * a1. _ _ __>| ?_ _ __| (insert end before existing end) - * a2. _ _ ___| ?_ _ _>| (insert end after existing end) - * a3. _ _ ___? >|_ _ __| (insert start before existing end) - * - * and clear it later on, as we eventually reach the points indicated by - * '?' above, in the cases described below. We'll always meet these - * later, locally, due to tree ordering, and overlaps for the intervals - * that are the closest together are always evaluated last. - * - * b1. _ _ __>| !_ _ __| (insert end before existing start) - * b2. _ _ ___| !_ _ _>| (insert end after existing start) - * b3. _ _ ___! >|_ _ __| (insert start after existing end, as a leaf) - * '--' no nodes falling in this range - * b4. >|_ _ ! (insert start before existing start) - * - * Case a3. resolves to b3.: - * - if the inserted start element is the leftmost, because the '0' - * element in the tree serves as end element - * - otherwise, if an existing end is found immediately to the left. If - * there are existing nodes in between, we need to further descend the - * tree before we can conclude the new start isn't causing an overlap - * - * or to b4., which, preceded by a3., means we already traversed one or - * more existing intervals entirely, from the right. - * - * For a new, rightmost pair of elements, we'll hit cases b3. and b2., - * in that order. - * - * The flag is also cleared in two special cases: - * - * b5. |__ _ _!|<_ _ _ (insert start right before existing end) - * b6. |__ _ >|!__ _ _ (insert end right after existing start) - * - * which always happen as last step and imply that no further - * overlapping is possible. - * - * Another special case comes from the fact that start elements matching - * an already existing start element are allowed: insertion is not - * performed but we return -EEXIST in that case, and the error will be - * cleared by the caller if NLM_F_EXCL is not present in the request. - * This way, request for insertion of an exact overlap isn't reported as - * error to userspace if not desired. - * - * However, if the existing start matches a pre-existing start, but the - * end element doesn't match the corresponding pre-existing end element, - * we need to report a partial overlap. This is a local condition that - * can be noticed without need for a tracking flag, by checking for a - * local duplicated end for a corresponding start, from left and right, - * separately. + /* Descend the tree to search for an existing element greater than the + * key value to insert that is greater than the new element. This is the + * first element to walk the ordered elements to find possible overlap. */ - parent = NULL; p = &priv->root.rb_node; while (*p != NULL) { parent = *p; rbe = rb_entry(parent, struct nft_rbtree_elem, node); - d = memcmp(nft_set_ext_key(&rbe->ext), - nft_set_ext_key(&new->ext), - set->klen); + d = nft_rbtree_cmp(set, rbe, new); + if (d < 0) { p = &parent->rb_left; - - if (nft_rbtree_interval_start(new)) { - if (nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext) && !*p) - overlap = false; - } else { - if (dup_end_left && !*p) - return -ENOTEMPTY; - - overlap = nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, - genmask) && - !nft_set_elem_expired(&rbe->ext); - - if (overlap) { - dup_end_right = true; - continue; - } - } } else if (d > 0) { - p = &parent->rb_right; + if (!first || + nft_rbtree_update_first(set, rbe, first)) + first = &rbe->node; - if (nft_rbtree_interval_end(new)) { - if (dup_end_right && !*p) - return -ENOTEMPTY; - - overlap = nft_rbtree_interval_end(rbe) && - nft_set_elem_active(&rbe->ext, - genmask) && - !nft_set_elem_expired(&rbe->ext); - - if (overlap) { - dup_end_left = true; - continue; - } - } else if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) { - overlap = nft_rbtree_interval_end(rbe); - } + p = &parent->rb_right; } else { - if (nft_rbtree_interval_end(rbe) && - nft_rbtree_interval_start(new)) { + if (nft_rbtree_interval_end(rbe)) p = &parent->rb_left; - - if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) - overlap = false; - } else if (nft_rbtree_interval_start(rbe) && - nft_rbtree_interval_end(new)) { + else p = &parent->rb_right; + } + } + + if (!first) + first = rb_first(&priv->root); + + /* Detect overlap by going through the list of valid tree nodes. + * Values stored in the tree are in reversed order, starting from + * highest to lowest value. + */ + for (node = first; node != NULL; node = rb_next(node)) { + rbe = rb_entry(node, struct nft_rbtree_elem, node); - if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) - overlap = false; - } else if (nft_set_elem_active(&rbe->ext, genmask) && - !nft_set_elem_expired(&rbe->ext)) { - *ext = &rbe->ext; - return -EEXIST; - } else { - overlap = false; - if (nft_rbtree_interval_end(rbe)) - p = &parent->rb_left; - else - p = &parent->rb_right; + if (!nft_set_elem_active(&rbe->ext, genmask)) + continue; + + /* perform garbage collection to avoid bogus overlap reports. */ + if (nft_set_elem_expired(&rbe->ext)) { + err = nft_rbtree_gc_elem(set, priv, rbe); + if (err < 0) + return err; + + continue; + } + + d = nft_rbtree_cmp(set, rbe, new); + if (d == 0) { + /* Matching end element: no need to look for an + * overlapping greater or equal element. + */ + if (nft_rbtree_interval_end(rbe)) { + rbe_le = rbe; + break; + } + + /* first element that is greater or equal to key value. */ + if (!rbe_ge) { + rbe_ge = rbe; + continue; + } + + /* this is a closer more or equal element, update it. */ + if (nft_rbtree_cmp(set, rbe_ge, new) != 0) { + rbe_ge = rbe; + continue; + } + + /* element is equal to key value, make sure flags are + * the same, an existing more or equal start element + * must not be replaced by more or equal end element. + */ + if ((nft_rbtree_interval_start(new) && + nft_rbtree_interval_start(rbe_ge)) || + (nft_rbtree_interval_end(new) && + nft_rbtree_interval_end(rbe_ge))) { + rbe_ge = rbe; + continue; } + } else if (d > 0) { + /* annotate element greater than the new element. */ + rbe_ge = rbe; + continue; + } else if (d < 0) { + /* annotate element less than the new element. */ + rbe_le = rbe; + break; } + } - dup_end_left = dup_end_right = false; + /* - new start element matching existing start element: full overlap + * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. + */ + if (rbe_ge && !nft_rbtree_cmp(set, new, rbe_ge) && + nft_rbtree_interval_start(rbe_ge) == nft_rbtree_interval_start(new)) { + *ext = &rbe_ge->ext; + return -EEXIST; } - if (overlap) + /* - new end element matching existing end element: full overlap + * reported as -EEXIST, cleared by caller if NLM_F_EXCL is not given. + */ + if (rbe_le && !nft_rbtree_cmp(set, new, rbe_le) && + nft_rbtree_interval_end(rbe_le) == nft_rbtree_interval_end(new)) { + *ext = &rbe_le->ext; + return -EEXIST; + } + + /* - new start element with existing closest, less or equal key value + * being a start element: partial overlap, reported as -ENOTEMPTY. + * Anonymous sets allow for two consecutive start element since they + * are constant, skip them to avoid bogus overlap reports. + */ + if (!nft_set_is_anonymous(set) && rbe_le && + nft_rbtree_interval_start(rbe_le) && nft_rbtree_interval_start(new)) + return -ENOTEMPTY; + + /* - new end element with existing closest, less or equal key value + * being a end element: partial overlap, reported as -ENOTEMPTY. + */ + if (rbe_le && + nft_rbtree_interval_end(rbe_le) && nft_rbtree_interval_end(new)) return -ENOTEMPTY; + /* - new end element with existing closest, greater or equal key value + * being an end element: partial overlap, reported as -ENOTEMPTY + */ + if (rbe_ge && + nft_rbtree_interval_end(rbe_ge) && nft_rbtree_interval_end(new)) + return -ENOTEMPTY; + + /* Accepted element: pick insertion point depending on key value */ + parent = NULL; + p = &priv->root.rb_node; + while (*p != NULL) { + parent = *p; + rbe = rb_entry(parent, struct nft_rbtree_elem, node); + d = nft_rbtree_cmp(set, rbe, new); + + if (d < 0) + p = &parent->rb_left; + else if (d > 0) + p = &parent->rb_right; + else if (nft_rbtree_interval_end(rbe)) + p = &parent->rb_left; + else + p = &parent->rb_right; + } + rb_link_node_rcu(&new->node, parent, p); rb_insert_color(&new->node, &priv->root); return 0; @@ -501,23 +563,37 @@ static void nft_rbtree_gc(struct work_struct *work) struct nft_rbtree *priv; struct rb_node *node; struct nft_set *set; + struct net *net; + u8 genmask; priv = container_of(work, struct nft_rbtree, gc_work.work); set = nft_set_container_of(priv); + net = read_pnet(&set->net); + genmask = nft_genmask_cur(net); write_lock_bh(&priv->lock); write_seqcount_begin(&priv->count); for (node = rb_first(&priv->root); node != NULL; node = rb_next(node)) { rbe = rb_entry(node, struct nft_rbtree_elem, node); + if (!nft_set_elem_active(&rbe->ext, genmask)) + continue; + + /* elements are reversed in the rbtree for historical reasons, + * from highest to lowest value, that is why end element is + * always visited before the start element. + */ if (nft_rbtree_interval_end(rbe)) { rbe_end = rbe; continue; } if (!nft_set_elem_expired(&rbe->ext)) continue; - if (nft_set_elem_mark_busy(&rbe->ext)) + + if (nft_set_elem_mark_busy(&rbe->ext)) { + rbe_end = NULL; continue; + } if (rbe_prev) { rb_erase(&rbe_prev->node, &priv->root); diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index bca2a470ccad..c64277659753 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -580,7 +580,9 @@ static int netlink_insert(struct sock *sk, u32 portid) if (nlk_sk(sk)->bound) goto err; - nlk_sk(sk)->portid = portid; + /* portid can be read locklessly from netlink_getname(). */ + WRITE_ONCE(nlk_sk(sk)->portid, portid); + sock_hold(sk); err = __netlink_insert(table, sk); @@ -1096,9 +1098,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, return -EINVAL; if (addr->sa_family == AF_UNSPEC) { - sk->sk_state = NETLINK_UNCONNECTED; - nlk->dst_portid = 0; - nlk->dst_group = 0; + /* paired with READ_ONCE() in netlink_getsockbyportid() */ + WRITE_ONCE(sk->sk_state, NETLINK_UNCONNECTED); + /* dst_portid and dst_group can be read locklessly */ + WRITE_ONCE(nlk->dst_portid, 0); + WRITE_ONCE(nlk->dst_group, 0); return 0; } if (addr->sa_family != AF_NETLINK) @@ -1119,9 +1123,11 @@ static int netlink_connect(struct socket *sock, struct sockaddr *addr, err = netlink_autobind(sock); if (err == 0) { - sk->sk_state = NETLINK_CONNECTED; - nlk->dst_portid = nladdr->nl_pid; - nlk->dst_group = ffs(nladdr->nl_groups); + /* paired with READ_ONCE() in netlink_getsockbyportid() */ + WRITE_ONCE(sk->sk_state, NETLINK_CONNECTED); + /* dst_portid and dst_group can be read locklessly */ + WRITE_ONCE(nlk->dst_portid, nladdr->nl_pid); + WRITE_ONCE(nlk->dst_group, ffs(nladdr->nl_groups)); } return err; @@ -1138,10 +1144,12 @@ static int netlink_getname(struct socket *sock, struct sockaddr *addr, nladdr->nl_pad = 0; if (peer) { - nladdr->nl_pid = nlk->dst_portid; - nladdr->nl_groups = netlink_group_mask(nlk->dst_group); + /* Paired with WRITE_ONCE() in netlink_connect() */ + nladdr->nl_pid = READ_ONCE(nlk->dst_portid); + nladdr->nl_groups = netlink_group_mask(READ_ONCE(nlk->dst_group)); } else { - nladdr->nl_pid = nlk->portid; + /* Paired with WRITE_ONCE() in netlink_insert() */ + nladdr->nl_pid = READ_ONCE(nlk->portid); netlink_lock_table(); nladdr->nl_groups = nlk->groups ? nlk->groups[0] : 0; netlink_unlock_table(); @@ -1168,8 +1176,9 @@ static struct sock *netlink_getsockbyportid(struct sock *ssk, u32 portid) /* Don't bother queuing skb if kernel socket has no input function */ nlk = nlk_sk(sock); - if (sock->sk_state == NETLINK_CONNECTED && - nlk->dst_portid != nlk_sk(ssk)->portid) { + /* dst_portid and sk_state can be changed in netlink_connect() */ + if (READ_ONCE(sock->sk_state) == NETLINK_CONNECTED && + READ_ONCE(nlk->dst_portid) != nlk_sk(ssk)->portid) { sock_put(sock); return ERR_PTR(-ECONNREFUSED); } @@ -1886,8 +1895,9 @@ static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) goto out; netlink_skb_flags |= NETLINK_SKB_DST; } else { - dst_portid = nlk->dst_portid; - dst_group = nlk->dst_group; + /* Paired with WRITE_ONCE() in netlink_connect() */ + dst_portid = READ_ONCE(nlk->dst_portid); + dst_group = READ_ONCE(nlk->dst_group); } /* Paired with WRITE_ONCE() in netlink_insert() */ diff --git a/net/netrom/nr_timer.c b/net/netrom/nr_timer.c index a8da88db7893..4e7c968cde2d 100644 --- a/net/netrom/nr_timer.c +++ b/net/netrom/nr_timer.c @@ -121,6 +121,7 @@ static void nr_heartbeat_expiry(struct timer_list *t) is accepted() it isn't 'dead' so doesn't get removed. */ if (sock_flag(sk, SOCK_DESTROY) || (sk->sk_state == TCP_LISTEN && sock_flag(sk, SOCK_DEAD))) { + sock_hold(sk); bh_unlock_sock(sk); nr_destroy_socket(sk); goto out; diff --git a/net/sched/sch_taprio.c b/net/sched/sch_taprio.c index 9a11a499ea2d..c322a61eaeea 100644 --- a/net/sched/sch_taprio.c +++ b/net/sched/sch_taprio.c @@ -1700,7 +1700,6 @@ static void taprio_reset(struct Qdisc *sch) int i; hrtimer_cancel(&q->advance_timer); - qdisc_synchronize(sch); if (q->qdiscs) { for (i = 0; i < dev->num_tx_queues; i++) diff --git a/net/sctp/bind_addr.c b/net/sctp/bind_addr.c index 59e653b528b1..6b95d3ba8fe1 100644 --- a/net/sctp/bind_addr.c +++ b/net/sctp/bind_addr.c @@ -73,6 +73,12 @@ int sctp_bind_addr_copy(struct net *net, struct sctp_bind_addr *dest, } } + /* If somehow no addresses were found that can be used with this + * scope, it's an error. + */ + if (list_empty(&dest->address_list)) + error = -ENETUNREACH; + out: if (error) sctp_bind_addr_clean(dest); diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 3b55502b2965..5c7ad301d742 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -482,6 +482,12 @@ static int x25_listen(struct socket *sock, int backlog) int rc = -EOPNOTSUPP; lock_sock(sk); + if (sock->state != SS_UNCONNECTED) { + rc = -EINVAL; + release_sock(sk); + return rc; + } + if (sk->sk_state != TCP_LISTEN) { memset(&x25_sk(sk)->dest_addr, 0, X25_ADDR_LEN); sk->sk_max_ack_backlog = backlog; |