diff options
45 files changed, 511 insertions, 184 deletions
diff --git a/Documentation/devicetree/bindings/net/socionext-netsec.txt b/Documentation/devicetree/bindings/net/socionext-netsec.txt index 9d6c9feb12ff..a3c1dffaa4bb 100644 --- a/Documentation/devicetree/bindings/net/socionext-netsec.txt +++ b/Documentation/devicetree/bindings/net/socionext-netsec.txt @@ -30,7 +30,9 @@ Optional properties: (See ethernet.txt file in the same directory) - max-frame-size: See ethernet.txt in the same directory. The MAC address will be determined using the optional properties -defined in ethernet.txt. +defined in ethernet.txt. The 'phy-mode' property is required, but may +be set to the empty string if the PHY configuration is programmed by +the firmware or set by hardware straps, and needs to be preserved. Example: eth0: ethernet@522d0000 { diff --git a/MAINTAINERS b/MAINTAINERS index 0f59b0412953..6d50cbf198b5 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3244,7 +3244,8 @@ R: KP Singh <kpsingh@chromium.org> L: netdev@vger.kernel.org L: bpf@vger.kernel.org S: Supported -Q: https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147 +W: https://bpf.io/ +Q: https://patchwork.kernel.org/project/netdevbpf/list/?delegate=121173 T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git T: git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git F: Documentation/bpf/ diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index 0b5b2b33b3b6..1e9a0adda2d6 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -54,7 +54,7 @@ static void bcm_sf2_recalc_clock(struct dsa_switch *ds) unsigned long new_rate; unsigned int ports_active; /* Frequenty in Mhz */ - const unsigned long rate_table[] = { + static const unsigned long rate_table[] = { 59220000, 60820000, 62500000, diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c index 0f865daeb36d..bf5e0e9bd0e2 100644 --- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c +++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c @@ -1163,7 +1163,6 @@ int aq_nic_set_link_ksettings(struct aq_nic_s *self, default: err = -1; goto err_exit; - break; } if (!(self->aq_nic_cfg.aq_hw_caps->link_speed_msk & rate)) { err = -1; diff --git a/drivers/net/ethernet/chelsio/inline_crypto/Kconfig b/drivers/net/ethernet/chelsio/inline_crypto/Kconfig index 7dfa57348d54..bc06e83fd3c6 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/Kconfig +++ b/drivers/net/ethernet/chelsio/inline_crypto/Kconfig @@ -16,6 +16,7 @@ if CHELSIO_INLINE_CRYPTO config CRYPTO_DEV_CHELSIO_TLS tristate "Chelsio Crypto Inline TLS Driver" depends on CHELSIO_T4 + depends on TLS depends on TLS_TOE help Support Chelsio Inline TLS with Chelsio crypto accelerator. diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c index 05520dccd906..ec4f79049a06 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_cm.c @@ -92,11 +92,13 @@ static void chtls_sock_release(struct kref *ref) static struct net_device *chtls_find_netdev(struct chtls_dev *cdev, struct sock *sk) { + struct adapter *adap = pci_get_drvdata(cdev->pdev); struct net_device *ndev = cdev->ports[0]; #if IS_ENABLED(CONFIG_IPV6) struct net_device *temp; int addr_type; #endif + int i; switch (sk->sk_family) { case PF_INET: @@ -127,8 +129,12 @@ static struct net_device *chtls_find_netdev(struct chtls_dev *cdev, return NULL; if (is_vlan_dev(ndev)) - return vlan_dev_real_dev(ndev); - return ndev; + ndev = vlan_dev_real_dev(ndev); + + for_each_port(adap, i) + if (cdev->ports[i] == ndev) + return ndev; + return NULL; } static void assign_rxopt(struct sock *sk, unsigned int opt) @@ -477,7 +483,6 @@ void chtls_destroy_sock(struct sock *sk) chtls_purge_write_queue(sk); free_tls_keyid(sk); kref_put(&csk->kref, chtls_sock_release); - csk->cdev = NULL; if (sk->sk_family == AF_INET) sk->sk_prot = &tcp_prot; #if IS_ENABLED(CONFIG_IPV6) @@ -736,14 +741,13 @@ void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk) #if IS_ENABLED(CONFIG_IPV6) if (sk->sk_family == PF_INET6) { - struct chtls_sock *csk; + struct net_device *ndev = chtls_find_netdev(cdev, sk); int addr_type = 0; - csk = rcu_dereference_sk_user_data(sk); addr_type = ipv6_addr_type((const struct in6_addr *) &sk->sk_v6_rcv_saddr); if (addr_type != IPV6_ADDR_ANY) - cxgb4_clip_release(csk->egress_dev, (const u32 *) + cxgb4_clip_release(ndev, (const u32 *) &sk->sk_v6_rcv_saddr, 1); } #endif @@ -1157,6 +1161,9 @@ static struct sock *chtls_recv_sock(struct sock *lsk, ndev = n->dev; if (!ndev) goto free_dst; + if (is_vlan_dev(ndev)) + ndev = vlan_dev_real_dev(ndev); + port_id = cxgb4_port_idx(ndev); csk = chtls_sock_create(cdev); diff --git a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c index 2e9acae1cba3..9fb5ca6682ea 100644 --- a/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c +++ b/drivers/net/ethernet/chelsio/inline_crypto/chtls/chtls_io.c @@ -902,9 +902,9 @@ static int chtls_skb_copy_to_page_nocache(struct sock *sk, return 0; } -static int csk_mem_free(struct chtls_dev *cdev, struct sock *sk) +static bool csk_mem_free(struct chtls_dev *cdev, struct sock *sk) { - return (cdev->max_host_sndbuf - sk->sk_wmem_queued); + return (cdev->max_host_sndbuf - sk->sk_wmem_queued > 0); } static int csk_wait_memory(struct chtls_dev *cdev, @@ -1240,6 +1240,7 @@ int chtls_sendpage(struct sock *sk, struct page *page, copied = 0; csk = rcu_dereference_sk_user_data(sk); cdev = csk->cdev; + lock_sock(sk); timeo = sock_sndtimeo(sk, flags & MSG_DONTWAIT); err = sk_stream_wait_connect(sk, &timeo); diff --git a/drivers/net/ethernet/cisco/enic/enic_ethtool.c b/drivers/net/ethernet/cisco/enic/enic_ethtool.c index a4dd52bba2c3..1a9803f2073e 100644 --- a/drivers/net/ethernet/cisco/enic/enic_ethtool.c +++ b/drivers/net/ethernet/cisco/enic/enic_ethtool.c @@ -434,7 +434,6 @@ static int enic_grxclsrule(struct enic *enic, struct ethtool_rxnfc *cmd) break; default: return -EINVAL; - break; } fsp->h_u.tcp_ip4_spec.ip4src = flow_get_u32_src(&n->keys); diff --git a/drivers/net/ethernet/ibm/ibmvnic.c b/drivers/net/ethernet/ibm/ibmvnic.c index 1f7fe6b3dd5a..8148f796a807 100644 --- a/drivers/net/ethernet/ibm/ibmvnic.c +++ b/drivers/net/ethernet/ibm/ibmvnic.c @@ -4235,8 +4235,13 @@ static int handle_change_mac_rsp(union ibmvnic_crq *crq, dev_err(dev, "Error %ld in CHANGE_MAC_ADDR_RSP\n", rc); goto out; } + /* crq->change_mac_addr.mac_addr is the requested one + * crq->change_mac_addr_rsp.mac_addr is the returned valid one. + */ ether_addr_copy(netdev->dev_addr, &crq->change_mac_addr_rsp.mac_addr[0]); + ether_addr_copy(adapter->mac_addr, + &crq->change_mac_addr_rsp.mac_addr[0]); out: complete(&adapter->fw_done); return rc; diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c index de563cfd294d..4b93ba149ec5 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_x540.c @@ -350,7 +350,6 @@ static s32 ixgbe_calc_eeprom_checksum_X540(struct ixgbe_hw *hw) if (ixgbe_read_eerd_generic(hw, pointer, &length)) { hw_dbg(hw, "EEPROM read failed\n"); return IXGBE_ERR_EEPROM; - break; } /* Skip pointer section if length is invalid. */ diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig index 62a820b1eb16..3362b148de23 100644 --- a/drivers/net/ethernet/mediatek/Kconfig +++ b/drivers/net/ethernet/mediatek/Kconfig @@ -17,6 +17,7 @@ config NET_MEDIATEK_SOC config NET_MEDIATEK_STAR_EMAC tristate "MediaTek STAR Ethernet MAC support" select PHYLIB + select REGMAP_MMIO help This driver supports the ethernet MAC IP first used on MediaTek MT85** SoCs. diff --git a/drivers/net/ethernet/sfc/efx_common.c b/drivers/net/ethernet/sfc/efx_common.c index 72a3f0e09f52..de797e1ac5a9 100644 --- a/drivers/net/ethernet/sfc/efx_common.c +++ b/drivers/net/ethernet/sfc/efx_common.c @@ -1014,6 +1014,7 @@ int efx_init_struct(struct efx_nic *efx, efx->num_mac_stats = MC_CMD_MAC_NSTATS; BUILD_BUG_ON(MC_CMD_MAC_NSTATS - 1 != MC_CMD_MAC_GENERATION_END); mutex_init(&efx->mac_lock); + init_rwsem(&efx->filter_sem); #ifdef CONFIG_RFS_ACCEL mutex_init(&efx->rps_mutex); spin_lock_init(&efx->rps_hash_lock); diff --git a/drivers/net/ethernet/sfc/rx_common.c b/drivers/net/ethernet/sfc/rx_common.c index 5e29284c89c9..19cf7cac1e6e 100644 --- a/drivers/net/ethernet/sfc/rx_common.c +++ b/drivers/net/ethernet/sfc/rx_common.c @@ -797,7 +797,6 @@ int efx_probe_filters(struct efx_nic *efx) { int rc; - init_rwsem(&efx->filter_sem); mutex_lock(&efx->mac_lock); down_write(&efx->filter_sem); rc = efx->type->filter_table_probe(efx); diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c index 806eb651cea3..1503cc9ec6e2 100644 --- a/drivers/net/ethernet/socionext/netsec.c +++ b/drivers/net/ethernet/socionext/netsec.c @@ -6,6 +6,7 @@ #include <linux/pm_runtime.h> #include <linux/acpi.h> #include <linux/of_mdio.h> +#include <linux/of_net.h> #include <linux/etherdevice.h> #include <linux/interrupt.h> #include <linux/io.h> @@ -1833,6 +1834,14 @@ static const struct net_device_ops netsec_netdev_ops = { static int netsec_of_probe(struct platform_device *pdev, struct netsec_priv *priv, u32 *phy_addr) { + int err; + + err = of_get_phy_mode(pdev->dev.of_node, &priv->phy_interface); + if (err) { + dev_err(&pdev->dev, "missing required property 'phy-mode'\n"); + return err; + } + priv->phy_np = of_parse_phandle(pdev->dev.of_node, "phy-handle", 0); if (!priv->phy_np) { dev_err(&pdev->dev, "missing required property 'phy-handle'\n"); @@ -1859,6 +1868,14 @@ static int netsec_acpi_probe(struct platform_device *pdev, if (!IS_ENABLED(CONFIG_ACPI)) return -ENODEV; + /* ACPI systems are assumed to configure the PHY in firmware, so + * there is really no need to discover the PHY mode from the DSDT. + * Since firmware is known to exist in the field that configures the + * PHY correctly but passes the wrong mode string in the phy-mode + * device property, we have no choice but to ignore it. + */ + priv->phy_interface = PHY_INTERFACE_MODE_NA; + ret = device_property_read_u32(&pdev->dev, "phy-channel", phy_addr); if (ret) { dev_err(&pdev->dev, @@ -1995,13 +2012,6 @@ static int netsec_probe(struct platform_device *pdev) priv->msg_enable = NETIF_MSG_TX_ERR | NETIF_MSG_HW | NETIF_MSG_DRV | NETIF_MSG_LINK | NETIF_MSG_PROBE; - priv->phy_interface = device_get_phy_mode(&pdev->dev); - if ((int)priv->phy_interface < 0) { - dev_err(&pdev->dev, "missing required property 'phy-mode'\n"); - ret = -ENODEV; - goto free_ndev; - } - priv->ioaddr = devm_ioremap(&pdev->dev, mmio_res->start, resource_size(mmio_res)); if (!priv->ioaddr) { diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index d2d2c4a53cf2..21b71148c532 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -68,8 +68,6 @@ static const unsigned long guest_offloads[] = { (1ULL << VIRTIO_NET_F_GUEST_ECN) | \ (1ULL << VIRTIO_NET_F_GUEST_UFO)) -#define GUEST_OFFLOAD_CSUM_MASK (1ULL << VIRTIO_NET_F_GUEST_CSUM) - struct virtnet_stat_desc { char desc[ETH_GSTRING_LEN]; size_t offset; @@ -2524,48 +2522,29 @@ static int virtnet_get_phys_port_name(struct net_device *dev, char *buf, return 0; } -static netdev_features_t virtnet_fix_features(struct net_device *netdev, - netdev_features_t features) -{ - /* If Rx checksum is disabled, LRO should also be disabled. */ - if (!(features & NETIF_F_RXCSUM)) - features &= ~NETIF_F_LRO; - - return features; -} - static int virtnet_set_features(struct net_device *dev, netdev_features_t features) { struct virtnet_info *vi = netdev_priv(dev); - u64 offloads = vi->guest_offloads; + u64 offloads; int err; - /* Don't allow configuration while XDP is active. */ - if (vi->xdp_queue_pairs) - return -EBUSY; - if ((dev->features ^ features) & NETIF_F_LRO) { + if (vi->xdp_queue_pairs) + return -EBUSY; + if (features & NETIF_F_LRO) - offloads |= GUEST_OFFLOAD_LRO_MASK & - vi->guest_offloads_capable; + offloads = vi->guest_offloads_capable; else - offloads &= ~GUEST_OFFLOAD_LRO_MASK; - } + offloads = vi->guest_offloads_capable & + ~GUEST_OFFLOAD_LRO_MASK; - if ((dev->features ^ features) & NETIF_F_RXCSUM) { - if (features & NETIF_F_RXCSUM) - offloads |= GUEST_OFFLOAD_CSUM_MASK & - vi->guest_offloads_capable; - else - offloads &= ~GUEST_OFFLOAD_CSUM_MASK; + err = virtnet_set_guest_offloads(vi, offloads); + if (err) + return err; + vi->guest_offloads = offloads; } - err = virtnet_set_guest_offloads(vi, offloads); - if (err) - return err; - - vi->guest_offloads = offloads; return 0; } @@ -2584,7 +2563,6 @@ static const struct net_device_ops virtnet_netdev = { .ndo_features_check = passthru_features_check, .ndo_get_phys_port_name = virtnet_get_phys_port_name, .ndo_set_features = virtnet_set_features, - .ndo_fix_features = virtnet_fix_features, }; static void virtnet_config_changed_work(struct work_struct *work) @@ -3035,10 +3013,8 @@ static int virtnet_probe(struct virtio_device *vdev) if (virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO4) || virtio_has_feature(vdev, VIRTIO_NET_F_GUEST_TSO6)) dev->features |= NETIF_F_LRO; - if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) { - dev->hw_features |= NETIF_F_RXCSUM; + if (virtio_has_feature(vdev, VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)) dev->hw_features |= NETIF_F_LRO; - } dev->vlan_features = dev->features; diff --git a/drivers/net/wan/hdlc.c b/drivers/net/wan/hdlc.c index 9b00708676cf..1bdd3df0867a 100644 --- a/drivers/net/wan/hdlc.c +++ b/drivers/net/wan/hdlc.c @@ -46,7 +46,15 @@ static struct hdlc_proto *first_proto; static int hdlc_rcv(struct sk_buff *skb, struct net_device *dev, struct packet_type *p, struct net_device *orig_dev) { - struct hdlc_device *hdlc = dev_to_hdlc(dev); + struct hdlc_device *hdlc; + + /* First make sure "dev" is an HDLC device */ + if (!(dev->priv_flags & IFF_WAN_HDLC)) { + kfree_skb(skb); + return NET_RX_SUCCESS; + } + + hdlc = dev_to_hdlc(dev); if (!net_eq(dev_net(dev), &init_net)) { kfree_skb(skb); diff --git a/drivers/net/wan/hdlc_raw_eth.c b/drivers/net/wan/hdlc_raw_eth.c index 08e0a46501de..c70a518b8b47 100644 --- a/drivers/net/wan/hdlc_raw_eth.c +++ b/drivers/net/wan/hdlc_raw_eth.c @@ -99,6 +99,7 @@ static int raw_eth_ioctl(struct net_device *dev, struct ifreq *ifr) old_qlen = dev->tx_queue_len; ether_setup(dev); dev->tx_queue_len = old_qlen; + dev->priv_flags &= ~IFF_TX_SKB_SHARING; eth_hw_addr_random(dev); call_netdevice_notifiers(NETDEV_POST_TYPE_CHANGE, dev); netif_dormant_off(dev); diff --git a/drivers/net/wan/lmc/lmc_proto.c b/drivers/net/wan/lmc/lmc_proto.c index e8b0b902b424..4e9cc83b615a 100644 --- a/drivers/net/wan/lmc/lmc_proto.c +++ b/drivers/net/wan/lmc/lmc_proto.c @@ -89,17 +89,13 @@ __be16 lmc_proto_type(lmc_softc_t *sc, struct sk_buff *skb) /*FOLD00*/ switch(sc->if_type){ case LMC_PPP: return hdlc_type_trans(skb, sc->lmc_device); - break; case LMC_NET: return htons(ETH_P_802_2); - break; case LMC_RAW: /* Packet type for skbuff kind of useless */ return htons(ETH_P_802_2); - break; default: printk(KERN_WARNING "%s: No protocol set for this interface, assuming 802.2 (which is wrong!!)\n", sc->name); return htons(ETH_P_802_2); - break; } } diff --git a/drivers/nfc/st21nfca/core.c b/drivers/nfc/st21nfca/core.c index 2ce17932a073..6ca0d2f56b18 100644 --- a/drivers/nfc/st21nfca/core.c +++ b/drivers/nfc/st21nfca/core.c @@ -794,7 +794,6 @@ static int st21nfca_hci_im_transceive(struct nfc_hci_dev *hdev, skb->len, st21nfca_hci_data_exchange_cb, info); - break; default: return 1; } diff --git a/drivers/nfc/trf7970a.c b/drivers/nfc/trf7970a.c index 3bd97c73f983..c70f62fe321e 100644 --- a/drivers/nfc/trf7970a.c +++ b/drivers/nfc/trf7970a.c @@ -1382,7 +1382,6 @@ static int trf7970a_is_iso15693_write_or_lock(u8 cmd) case ISO15693_CMD_WRITE_DSFID: case ISO15693_CMD_LOCK_DSFID: return 1; - break; default: return 0; } diff --git a/include/linux/filter.h b/include/linux/filter.h index 20fc24c9779a..72d62cbc1578 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -607,12 +607,21 @@ struct bpf_skb_data_end { void *data_end; }; +struct bpf_nh_params { + u32 nh_family; + union { + u32 ipv4_nh; + struct in6_addr ipv6_nh; + }; +}; + struct bpf_redirect_info { u32 flags; u32 tgt_index; void *tgt_value; struct bpf_map *map; u32 kern_flags; + struct bpf_nh_params nh; }; DECLARE_PER_CPU(struct bpf_redirect_info, bpf_redirect_info); diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 666cd0390699..9f118771e248 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -240,7 +240,7 @@ struct netlink_dump_control { int (*done)(struct netlink_callback *); void *data; struct module *module; - u16 min_dump_alloc; + u32 min_dump_alloc; }; int __netlink_dump_start(struct sock *ssk, struct sk_buff *skb, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index bf5a99d803e4..e6ceac3f7d62 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -3677,15 +3677,19 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) * Description * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it * populates L2 addresses as well, meaning, internally, the helper - * performs a FIB lookup based on the skb's networking header to - * get the address of the next hop and then relies on the neighbor - * lookup for the L2 address of the nexthop. + * relies on the neighbor lookup for the L2 address of the nexthop. + * + * The helper will perform a FIB lookup based on the skb's + * networking header to get the address of the next hop, unless + * this is supplied by the caller in the *params* argument. The + * *plen* argument indicates the len of *params* and should be set + * to 0 if *params* is NULL. * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types, and enabled @@ -4906,6 +4910,16 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +struct bpf_redir_neigh { + /* network family for lookup (AF_INET, AF_INET6) */ + __u32 nh_family; + /* network address of nexthop; skips fib lookup to find gateway */ + union { + __be32 ipv4_nh; + __u32 ipv6_nh[4]; /* in6_addr; network order */ + }; +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1110ecd7d1f3..8f50c9c19f1b 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -2913,7 +2913,6 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_CGROUP_INET_INGRESS: case BPF_CGROUP_INET_EGRESS: return BPF_PROG_TYPE_CGROUP_SKB; - break; case BPF_CGROUP_INET_SOCK_CREATE: case BPF_CGROUP_INET_SOCK_RELEASE: case BPF_CGROUP_INET4_POST_BIND: diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 39d7f44e7c92..6200519582a6 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -5133,24 +5133,19 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn regs[BPF_REG_0].id = ++env->id_gen; } else { regs[BPF_REG_0].type = PTR_TO_MAP_VALUE_OR_NULL; - regs[BPF_REG_0].id = ++env->id_gen; } } else if (fn->ret_type == RET_PTR_TO_SOCKET_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCKET_OR_NULL; - regs[BPF_REG_0].id = ++env->id_gen; } else if (fn->ret_type == RET_PTR_TO_SOCK_COMMON_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_SOCK_COMMON_OR_NULL; - regs[BPF_REG_0].id = ++env->id_gen; } else if (fn->ret_type == RET_PTR_TO_TCP_SOCK_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL; - regs[BPF_REG_0].id = ++env->id_gen; } else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) { mark_reg_known_zero(env, regs, BPF_REG_0); regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL; - regs[BPF_REG_0].id = ++env->id_gen; regs[BPF_REG_0].mem_size = meta.mem_size; } else if (fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID_OR_NULL || fn->ret_type == RET_PTR_TO_MEM_OR_BTF_ID) { @@ -5199,6 +5194,9 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } + if (reg_type_may_be_null(regs[BPF_REG_0].type)) + regs[BPF_REG_0].id = ++env->id_gen; + if (is_ptr_cast_function(func_id)) { /* For release_reference() */ regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id; @@ -7212,7 +7210,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state, struct bpf_reg_state *reg, u32 id, bool is_null) { - if (reg_type_may_be_null(reg->type) && reg->id == id) { + if (reg_type_may_be_null(reg->type) && reg->id == id && + !WARN_ON_ONCE(!reg->id)) { /* Old offset (both fixed and variable parts) should * have been known-zero, because we don't allow pointer * arithmetic on pointers that might be NULL. diff --git a/net/core/filter.c b/net/core/filter.c index c5e2a1c5fd8d..6d0fa65a4a46 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -2165,12 +2165,12 @@ static int __bpf_redirect(struct sk_buff *skb, struct net_device *dev, } #if IS_ENABLED(CONFIG_IPV6) -static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) +static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb, + struct net_device *dev, struct bpf_nh_params *nh) { - struct dst_entry *dst = skb_dst(skb); - struct net_device *dev = dst->dev; u32 hh_len = LL_RESERVED_SPACE(dev); const struct in6_addr *nexthop; + struct dst_entry *dst = NULL; struct neighbour *neigh; if (dev_xmit_recursion()) { @@ -2196,8 +2196,13 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) } rcu_read_lock_bh(); - nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), - &ipv6_hdr(skb)->daddr); + if (!nh) { + dst = skb_dst(skb); + nexthop = rt6_nexthop(container_of(dst, struct rt6_info, dst), + &ipv6_hdr(skb)->daddr); + } else { + nexthop = &nh->ipv6_nh; + } neigh = ip_neigh_gw6(dev, nexthop); if (likely(!IS_ERR(neigh))) { int ret; @@ -2210,36 +2215,43 @@ static int bpf_out_neigh_v6(struct net *net, struct sk_buff *skb) return ret; } rcu_read_unlock_bh(); - IP6_INC_STATS(dev_net(dst->dev), - ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); + if (dst) + IP6_INC_STATS(dev_net(dst->dev), + ip6_dst_idev(dst), IPSTATS_MIB_OUTNOROUTES); out_drop: kfree_skb(skb); return -ENETDOWN; } -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { const struct ipv6hdr *ip6h = ipv6_hdr(skb); struct net *net = dev_net(dev); int err, ret = NET_XMIT_DROP; - struct dst_entry *dst; - struct flowi6 fl6 = { - .flowi6_flags = FLOWI_FLAG_ANYSRC, - .flowi6_mark = skb->mark, - .flowlabel = ip6_flowinfo(ip6h), - .flowi6_oif = dev->ifindex, - .flowi6_proto = ip6h->nexthdr, - .daddr = ip6h->daddr, - .saddr = ip6h->saddr, - }; - dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); - if (IS_ERR(dst)) - goto out_drop; + if (!nh) { + struct dst_entry *dst; + struct flowi6 fl6 = { + .flowi6_flags = FLOWI_FLAG_ANYSRC, + .flowi6_mark = skb->mark, + .flowlabel = ip6_flowinfo(ip6h), + .flowi6_oif = dev->ifindex, + .flowi6_proto = ip6h->nexthdr, + .daddr = ip6h->daddr, + .saddr = ip6h->saddr, + }; + + dst = ipv6_stub->ipv6_dst_lookup_flow(net, NULL, &fl6, NULL); + if (IS_ERR(dst)) + goto out_drop; - skb_dst_set(skb, dst); + skb_dst_set(skb, dst); + } else if (nh->nh_family != AF_INET6) { + goto out_drop; + } - err = bpf_out_neigh_v6(net, skb); + err = bpf_out_neigh_v6(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) dev->stats.tx_errors++; else @@ -2252,7 +2264,8 @@ out_xmit: return ret; } #else -static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { kfree_skb(skb); return NET_XMIT_DROP; @@ -2260,11 +2273,9 @@ static int __bpf_redirect_neigh_v6(struct sk_buff *skb, struct net_device *dev) #endif /* CONFIG_IPV6 */ #if IS_ENABLED(CONFIG_INET) -static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb) +static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb, + struct net_device *dev, struct bpf_nh_params *nh) { - struct dst_entry *dst = skb_dst(skb); - struct rtable *rt = container_of(dst, struct rtable, dst); - struct net_device *dev = dst->dev; u32 hh_len = LL_RESERVED_SPACE(dev); struct neighbour *neigh; bool is_v6gw = false; @@ -2292,7 +2303,21 @@ static int bpf_out_neigh_v4(struct net *net, struct sk_buff *skb) } rcu_read_lock_bh(); - neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); + if (!nh) { + struct dst_entry *dst = skb_dst(skb); + struct rtable *rt = container_of(dst, struct rtable, dst); + + neigh = ip_neigh_for_gw(rt, skb, &is_v6gw); + } else if (nh->nh_family == AF_INET6) { + neigh = ip_neigh_gw6(dev, &nh->ipv6_nh); + is_v6gw = true; + } else if (nh->nh_family == AF_INET) { + neigh = ip_neigh_gw4(dev, nh->ipv4_nh); + } else { + rcu_read_unlock_bh(); + goto out_drop; + } + if (likely(!IS_ERR(neigh))) { int ret; @@ -2309,33 +2334,37 @@ out_drop: return -ENETDOWN; } -static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { const struct iphdr *ip4h = ip_hdr(skb); struct net *net = dev_net(dev); int err, ret = NET_XMIT_DROP; - struct rtable *rt; - struct flowi4 fl4 = { - .flowi4_flags = FLOWI_FLAG_ANYSRC, - .flowi4_mark = skb->mark, - .flowi4_tos = RT_TOS(ip4h->tos), - .flowi4_oif = dev->ifindex, - .flowi4_proto = ip4h->protocol, - .daddr = ip4h->daddr, - .saddr = ip4h->saddr, - }; - rt = ip_route_output_flow(net, &fl4, NULL); - if (IS_ERR(rt)) - goto out_drop; - if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { - ip_rt_put(rt); - goto out_drop; - } + if (!nh) { + struct flowi4 fl4 = { + .flowi4_flags = FLOWI_FLAG_ANYSRC, + .flowi4_mark = skb->mark, + .flowi4_tos = RT_TOS(ip4h->tos), + .flowi4_oif = dev->ifindex, + .flowi4_proto = ip4h->protocol, + .daddr = ip4h->daddr, + .saddr = ip4h->saddr, + }; + struct rtable *rt; + + rt = ip_route_output_flow(net, &fl4, NULL); + if (IS_ERR(rt)) + goto out_drop; + if (rt->rt_type != RTN_UNICAST && rt->rt_type != RTN_LOCAL) { + ip_rt_put(rt); + goto out_drop; + } - skb_dst_set(skb, &rt->dst); + skb_dst_set(skb, &rt->dst); + } - err = bpf_out_neigh_v4(net, skb); + err = bpf_out_neigh_v4(net, skb, dev, nh); if (unlikely(net_xmit_eval(err))) dev->stats.tx_errors++; else @@ -2348,14 +2377,16 @@ out_xmit: return ret; } #else -static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh_v4(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { kfree_skb(skb); return NET_XMIT_DROP; } #endif /* CONFIG_INET */ -static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) +static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev, + struct bpf_nh_params *nh) { struct ethhdr *ethh = eth_hdr(skb); @@ -2370,9 +2401,9 @@ static int __bpf_redirect_neigh(struct sk_buff *skb, struct net_device *dev) skb_reset_network_header(skb); if (skb->protocol == htons(ETH_P_IP)) - return __bpf_redirect_neigh_v4(skb, dev); + return __bpf_redirect_neigh_v4(skb, dev, nh); else if (skb->protocol == htons(ETH_P_IPV6)) - return __bpf_redirect_neigh_v6(skb, dev); + return __bpf_redirect_neigh_v6(skb, dev, nh); out: kfree_skb(skb); return -ENOTSUPP; @@ -2382,7 +2413,8 @@ out: enum { BPF_F_NEIGH = (1ULL << 1), BPF_F_PEER = (1ULL << 2), -#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER) + BPF_F_NEXTHOP = (1ULL << 3), +#define BPF_F_REDIRECT_INTERNAL (BPF_F_NEIGH | BPF_F_PEER | BPF_F_NEXTHOP) }; BPF_CALL_3(bpf_clone_redirect, struct sk_buff *, skb, u32, ifindex, u64, flags) @@ -2455,7 +2487,8 @@ int skb_do_redirect(struct sk_buff *skb) return -EAGAIN; } return flags & BPF_F_NEIGH ? - __bpf_redirect_neigh(skb, dev) : + __bpf_redirect_neigh(skb, dev, flags & BPF_F_NEXTHOP ? + &ri->nh : NULL) : __bpf_redirect(skb, dev, flags); out_drop: kfree_skb(skb); @@ -2504,16 +2537,21 @@ static const struct bpf_func_proto bpf_redirect_peer_proto = { .arg2_type = ARG_ANYTHING, }; -BPF_CALL_2(bpf_redirect_neigh, u32, ifindex, u64, flags) +BPF_CALL_4(bpf_redirect_neigh, u32, ifindex, struct bpf_redir_neigh *, params, + int, plen, u64, flags) { struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - if (unlikely(flags)) + if (unlikely((plen && plen < sizeof(*params)) || flags)) return TC_ACT_SHOT; - ri->flags = BPF_F_NEIGH; + ri->flags = BPF_F_NEIGH | (plen ? BPF_F_NEXTHOP : 0); ri->tgt_index = ifindex; + BUILD_BUG_ON(sizeof(struct bpf_redir_neigh) != sizeof(struct bpf_nh_params)); + if (plen) + memcpy(&ri->nh, params, sizeof(ri->nh)); + return TC_ACT_REDIRECT; } @@ -2522,7 +2560,9 @@ static const struct bpf_func_proto bpf_redirect_neigh_proto = { .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_ANYTHING, - .arg2_type = ARG_ANYTHING, + .arg2_type = ARG_PTR_TO_MEM_OR_NULL, + .arg3_type = ARG_CONST_SIZE_OR_ZERO, + .arg4_type = ARG_ANYTHING, }; BPF_CALL_2(bpf_msg_apply_bytes, struct sk_msg *, msg, u32, bytes) diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index 68e0682450c6..7d7223691783 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -3709,13 +3709,13 @@ static int rtnl_dellinkprop(struct sk_buff *skb, struct nlmsghdr *nlh, return rtnl_linkprop(RTM_DELLINKPROP, skb, nlh, extack); } -static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) +static u32 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) { struct net *net = sock_net(skb->sk); - struct net_device *dev; + size_t min_ifinfo_dump_size = 0; struct nlattr *tb[IFLA_MAX+1]; u32 ext_filter_mask = 0; - u16 min_ifinfo_dump_size = 0; + struct net_device *dev; int hdrlen; /* Same kernel<->userspace interface hack as in rtnl_dump_ifinfo. */ @@ -3735,9 +3735,8 @@ static u16 rtnl_calcit(struct sk_buff *skb, struct nlmsghdr *nlh) */ rcu_read_lock(); for_each_netdev_rcu(net, dev) { - min_ifinfo_dump_size = max_t(u16, min_ifinfo_dump_size, - if_nlmsg_size(dev, - ext_filter_mask)); + min_ifinfo_dump_size = max(min_ifinfo_dump_size, + if_nlmsg_size(dev, ext_filter_mask)); } rcu_read_unlock(); @@ -5494,7 +5493,7 @@ static int rtnetlink_rcv_msg(struct sk_buff *skb, struct nlmsghdr *nlh, if (kind == 2 && nlh->nlmsg_flags&NLM_F_DUMP) { struct sock *rtnl; rtnl_dumpit_func dumpit; - u16 min_dump_alloc = 0; + u32 min_dump_alloc = 0; link = rtnl_get_link(family, type); if (!link || !link->dumpit) { diff --git a/net/mpls/mpls_iptunnel.c b/net/mpls/mpls_iptunnel.c index 2def85718d94..ef59e25dc482 100644 --- a/net/mpls/mpls_iptunnel.c +++ b/net/mpls/mpls_iptunnel.c @@ -300,5 +300,6 @@ static void __exit mpls_iptunnel_exit(void) module_exit(mpls_iptunnel_exit); MODULE_ALIAS_RTNL_LWT(MPLS); +MODULE_SOFTDEP("post: mpls_gso"); MODULE_DESCRIPTION("MultiProtocol Label Switching IP Tunnels"); MODULE_LICENSE("GPL v2"); diff --git a/net/mptcp/Kconfig b/net/mptcp/Kconfig index 698bc3525160..a014149aa323 100644 --- a/net/mptcp/Kconfig +++ b/net/mptcp/Kconfig @@ -19,14 +19,11 @@ config INET_MPTCP_DIAG config MPTCP_IPV6 bool "MPTCP: IPv6 support for Multipath TCP" - select IPV6 + depends on IPV6=y default y -endif - config MPTCP_KUNIT_TESTS tristate "This builds the MPTCP KUnit tests" if !KUNIT_ALL_TESTS - select MPTCP depends on KUNIT default KUNIT_ALL_TESTS help @@ -39,3 +36,4 @@ config MPTCP_KUNIT_TESTS If unsure, say N. +endif diff --git a/net/mptcp/options.c b/net/mptcp/options.c index 092a2d48bfd3..a044dd43411d 100644 --- a/net/mptcp/options.c +++ b/net/mptcp/options.c @@ -241,7 +241,6 @@ static void mptcp_parse_option(const struct sk_buff *skb, } mp_opt->add_addr = 1; - mp_opt->port = 0; mp_opt->addr_id = *ptr++; pr_debug("ADD_ADDR: id=%d, echo=%d", mp_opt->addr_id, mp_opt->echo); if (mp_opt->family == MPTCP_ADDR_IPVERSION_4) { @@ -297,6 +296,8 @@ void mptcp_get_options(const struct sk_buff *skb, mp_opt->mp_capable = 0; mp_opt->mp_join = 0; mp_opt->add_addr = 0; + mp_opt->ahmac = 0; + mp_opt->port = 0; mp_opt->rm_addr = 0; mp_opt->dss = 0; diff --git a/net/nfc/netlink.c b/net/nfc/netlink.c index e894254c17d4..8709f3d4e7c4 100644 --- a/net/nfc/netlink.c +++ b/net/nfc/netlink.c @@ -1217,7 +1217,7 @@ static int nfc_genl_fw_download(struct sk_buff *skb, struct genl_info *info) u32 idx; char firmware_name[NFC_FIRMWARE_NAME_MAXSIZE + 1]; - if (!info->attrs[NFC_ATTR_DEVICE_INDEX]) + if (!info->attrs[NFC_ATTR_DEVICE_INDEX] || !info->attrs[NFC_ATTR_FIRMWARE_NAME]) return -EINVAL; idx = nla_get_u32(info->attrs[NFC_ATTR_DEVICE_INDEX]); diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index 9c79fb92c2da..aba3cd85f284 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -156,11 +156,11 @@ tcf_ct_flow_table_add_action_nat_udp(const struct nf_conntrack_tuple *tuple, __be16 target_dst = target.dst.u.udp.port; if (target_src != tuple->src.u.udp.port) - tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_UDP, offsetof(struct udphdr, source), 0xFFFF, be16_to_cpu(target_src)); if (target_dst != tuple->dst.u.udp.port) - tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_TCP, + tcf_ct_add_mangle_action(action, FLOW_ACT_MANGLE_HDR_TYPE_UDP, offsetof(struct udphdr, dest), 0xFFFF, be16_to_cpu(target_dst)); } diff --git a/net/sched/act_tunnel_key.c b/net/sched/act_tunnel_key.c index a229751ee8c4..85c0d0d5b9da 100644 --- a/net/sched/act_tunnel_key.c +++ b/net/sched/act_tunnel_key.c @@ -459,7 +459,7 @@ static int tunnel_key_init(struct net *net, struct nlattr *nla, metadata = __ipv6_tun_set_dst(&saddr, &daddr, tos, ttl, dst_port, 0, flags, - key_id, 0); + key_id, opts_len); } else { NL_SET_ERR_MSG(extack, "Missing either ipv4 or ipv6 src and dst"); ret = -EINVAL; diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c index 41a55c6cbeb8..faeabff283a2 100644 --- a/net/sched/cls_api.c +++ b/net/sched/cls_api.c @@ -3712,7 +3712,7 @@ int tc_setup_flow_action(struct flow_action *flow_action, entry->gate.num_entries = tcf_gate_num_entries(act); err = tcf_gate_get_entries(entry, act); if (err) - goto err_out; + goto err_out_locked; } else { err = -EOPNOTSUPP; goto err_out_locked; diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c index 8142d02b33e6..b363503357e5 100644 --- a/samples/bpf/sockex3_kern.c +++ b/samples/bpf/sockex3_kern.c @@ -44,17 +44,17 @@ static inline void parse_eth_proto(struct __sk_buff *skb, u32 proto) switch (proto) { case ETH_P_8021Q: case ETH_P_8021AD: - bpf_tail_call_static(skb, &jmp_table, PARSE_VLAN); + bpf_tail_call(skb, &jmp_table, PARSE_VLAN); break; case ETH_P_MPLS_UC: case ETH_P_MPLS_MC: - bpf_tail_call_static(skb, &jmp_table, PARSE_MPLS); + bpf_tail_call(skb, &jmp_table, PARSE_MPLS); break; case ETH_P_IP: - bpf_tail_call_static(skb, &jmp_table, PARSE_IP); + bpf_tail_call(skb, &jmp_table, PARSE_IP); break; case ETH_P_IPV6: - bpf_tail_call_static(skb, &jmp_table, PARSE_IPV6); + bpf_tail_call(skb, &jmp_table, PARSE_IPV6); break; } } diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py index 7d86fdd190be..6769caae142f 100755 --- a/scripts/bpf_helpers_doc.py +++ b/scripts/bpf_helpers_doc.py @@ -453,6 +453,7 @@ class PrinterHelpers(Printer): 'struct bpf_perf_event_data', 'struct bpf_perf_event_value', 'struct bpf_pidns_info', + 'struct bpf_redir_neigh', 'struct bpf_sk_lookup', 'struct bpf_sock', 'struct bpf_sock_addr', diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index bf5a99d803e4..e6ceac3f7d62 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -3677,15 +3677,19 @@ union bpf_attr { * Return * The id is returned or 0 in case the id could not be retrieved. * - * long bpf_redirect_neigh(u32 ifindex, u64 flags) + * long bpf_redirect_neigh(u32 ifindex, struct bpf_redir_neigh *params, int plen, u64 flags) * Description * Redirect the packet to another net device of index *ifindex* * and fill in L2 addresses from neighboring subsystem. This helper * is somewhat similar to **bpf_redirect**\ (), except that it * populates L2 addresses as well, meaning, internally, the helper - * performs a FIB lookup based on the skb's networking header to - * get the address of the next hop and then relies on the neighbor - * lookup for the L2 address of the nexthop. + * relies on the neighbor lookup for the L2 address of the nexthop. + * + * The helper will perform a FIB lookup based on the skb's + * networking header to get the address of the next hop, unless + * this is supplied by the caller in the *params* argument. The + * *plen* argument indicates the len of *params* and should be set + * to 0 if *params* is NULL. * * The *flags* argument is reserved and must be 0. The helper is * currently only supported for tc BPF program types, and enabled @@ -4906,6 +4910,16 @@ struct bpf_fib_lookup { __u8 dmac[6]; /* ETH_ALEN */ }; +struct bpf_redir_neigh { + /* network family for lookup (AF_INET, AF_INET6) */ + __u32 nh_family; + /* network address of nexthop; skips fib lookup to find gateway */ + union { + __be32 ipv4_nh; + __u32 ipv6_nh[4]; /* in6_addr; network order */ + }; +}; + enum bpf_task_fd_type { BPF_FD_TYPE_RAW_TRACEPOINT, /* tp name */ BPF_FD_TYPE_TRACEPOINT, /* tp name */ diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 2bdb7d6dbad2..72b251110c4d 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -72,6 +72,7 @@ /* * Helper function to perform a tail call with a constant/immediate map slot. */ +#if __clang_major__ >= 8 && defined(__bpf__) static __always_inline void bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) { @@ -98,6 +99,7 @@ bpf_tail_call_static(void *ctx, const void *map, const __u32 slot) :: [ctx]"r"(ctx), [map]"r"(map), [slot]"i"(slot) : "r0", "r1", "r2", "r3", "r4", "r5"); } +#endif /* * Helper structure used by eBPF C program diff --git a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c index 28e26bd3e0ca..b58b775d19f3 100644 --- a/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c +++ b/tools/testing/selftests/bpf/prog_tests/ksyms_btf.c @@ -5,18 +5,17 @@ #include <bpf/libbpf.h> #include <bpf/btf.h> #include "test_ksyms_btf.skel.h" +#include "test_ksyms_btf_null_check.skel.h" static int duration; -void test_ksyms_btf(void) +static void test_basic(void) { __u64 runqueues_addr, bpf_prog_active_addr; __u32 this_rq_cpu; int this_bpf_prog_active; struct test_ksyms_btf *skel = NULL; struct test_ksyms_btf__data *data; - struct btf *btf; - int percpu_datasec; int err; err = kallsyms_find("runqueues", &runqueues_addr); @@ -31,20 +30,6 @@ void test_ksyms_btf(void) if (CHECK(err == -ENOENT, "ksym_find", "symbol 'bpf_prog_active' not found\n")) return; - btf = libbpf_find_kernel_btf(); - if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n", - PTR_ERR(btf))) - return; - - percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu", - BTF_KIND_DATASEC); - if (percpu_datasec < 0) { - printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n", - __func__); - test__skip(); - goto cleanup; - } - skel = test_ksyms_btf__open_and_load(); if (CHECK(!skel, "skel_open", "failed to open and load skeleton\n")) goto cleanup; @@ -83,6 +68,42 @@ void test_ksyms_btf(void) data->out__bpf_prog_active); cleanup: - btf__free(btf); test_ksyms_btf__destroy(skel); } + +static void test_null_check(void) +{ + struct test_ksyms_btf_null_check *skel; + + skel = test_ksyms_btf_null_check__open_and_load(); + CHECK(skel, "skel_open", "unexpected load of a prog missing null check\n"); + + test_ksyms_btf_null_check__destroy(skel); +} + +void test_ksyms_btf(void) +{ + int percpu_datasec; + struct btf *btf; + + btf = libbpf_find_kernel_btf(); + if (CHECK(IS_ERR(btf), "btf_exists", "failed to load kernel BTF: %ld\n", + PTR_ERR(btf))) + return; + + percpu_datasec = btf__find_by_name_kind(btf, ".data..percpu", + BTF_KIND_DATASEC); + btf__free(btf); + if (percpu_datasec < 0) { + printf("%s:SKIP:no PERCPU DATASEC in kernel btf\n", + __func__); + test__skip(); + return; + } + + if (test__start_subtest("basic")) + test_basic(); + + if (test__start_subtest("null_check")) + test_null_check(); +} diff --git a/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c new file mode 100644 index 000000000000..8bc8f7c637bc --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_ksyms_btf_null_check.c @@ -0,0 +1,31 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2020 Facebook */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> + +extern const struct rq runqueues __ksym; /* struct type global var. */ +extern const int bpf_prog_active __ksym; /* int type global var. */ + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + struct rq *rq; + int *active; + __u32 cpu; + + cpu = bpf_get_smp_processor_id(); + rq = (struct rq *)bpf_per_cpu_ptr(&runqueues, cpu); + active = (int *)bpf_per_cpu_ptr(&bpf_prog_active, cpu); + if (active) { + /* READ_ONCE */ + *(volatile int *)active; + /* !rq has not been tested, so verifier should reject. */ + *(volatile int *)(&rq->cpu); + } + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh.c b/tools/testing/selftests/bpf/progs/test_tc_neigh.c index fe182616b112..b985ac4e7a81 100644 --- a/tools/testing/selftests/bpf/progs/test_tc_neigh.c +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <stddef.h> #include <stdint.h> #include <stdbool.h> @@ -118,7 +119,7 @@ SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_src), 0); + return bpf_redirect_neigh(get_dev_ifindex(dev_src), NULL, 0, 0); } SEC("src_ingress") int tc_src(struct __sk_buff *skb) @@ -142,7 +143,7 @@ SEC("src_ingress") int tc_src(struct __sk_buff *skb) if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) return TC_ACT_SHOT; - return bpf_redirect_neigh(get_dev_ifindex(dev_dst), 0); + return bpf_redirect_neigh(get_dev_ifindex(dev_dst), NULL, 0, 0); } char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c new file mode 100644 index 000000000000..d82ed3457030 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tc_neigh_fib.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdint.h> +#include <stdbool.h> +#include <stddef.h> + +#include <linux/bpf.h> +#include <linux/stddef.h> +#include <linux/pkt_cls.h> +#include <linux/if_ether.h> +#include <linux/in.h> +#include <linux/ip.h> +#include <linux/ipv6.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> + +#ifndef ctx_ptr +# define ctx_ptr(field) (void *)(long)(field) +#endif + +#define AF_INET 2 +#define AF_INET6 10 + +static __always_inline int fill_fib_params_v4(struct __sk_buff *skb, + struct bpf_fib_lookup *fib_params) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct iphdr *ip4h; + + if (data + sizeof(struct ethhdr) > data_end) + return -1; + + ip4h = (struct iphdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip4h + 1) > data_end) + return -1; + + fib_params->family = AF_INET; + fib_params->tos = ip4h->tos; + fib_params->l4_protocol = ip4h->protocol; + fib_params->sport = 0; + fib_params->dport = 0; + fib_params->tot_len = bpf_ntohs(ip4h->tot_len); + fib_params->ipv4_src = ip4h->saddr; + fib_params->ipv4_dst = ip4h->daddr; + + return 0; +} + +static __always_inline int fill_fib_params_v6(struct __sk_buff *skb, + struct bpf_fib_lookup *fib_params) +{ + struct in6_addr *src = (struct in6_addr *)fib_params->ipv6_src; + struct in6_addr *dst = (struct in6_addr *)fib_params->ipv6_dst; + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + struct ipv6hdr *ip6h; + + if (data + sizeof(struct ethhdr) > data_end) + return -1; + + ip6h = (struct ipv6hdr *)(data + sizeof(struct ethhdr)); + if ((void *)(ip6h + 1) > data_end) + return -1; + + fib_params->family = AF_INET6; + fib_params->flowinfo = 0; + fib_params->l4_protocol = ip6h->nexthdr; + fib_params->sport = 0; + fib_params->dport = 0; + fib_params->tot_len = bpf_ntohs(ip6h->payload_len); + *src = ip6h->saddr; + *dst = ip6h->daddr; + + return 0; +} + +SEC("chk_egress") int tc_chk(struct __sk_buff *skb) +{ + void *data_end = ctx_ptr(skb->data_end); + void *data = ctx_ptr(skb->data); + __u32 *raw = data; + + if (data + sizeof(struct ethhdr) > data_end) + return TC_ACT_SHOT; + + return !raw[0] && !raw[1] && !raw[2] ? TC_ACT_SHOT : TC_ACT_OK; +} + +static __always_inline int tc_redir(struct __sk_buff *skb) +{ + struct bpf_fib_lookup fib_params = { .ifindex = skb->ingress_ifindex }; + __u8 zero[ETH_ALEN * 2]; + int ret = -1; + + switch (skb->protocol) { + case __bpf_constant_htons(ETH_P_IP): + ret = fill_fib_params_v4(skb, &fib_params); + break; + case __bpf_constant_htons(ETH_P_IPV6): + ret = fill_fib_params_v6(skb, &fib_params); + break; + } + + if (ret) + return TC_ACT_OK; + + ret = bpf_fib_lookup(skb, &fib_params, sizeof(fib_params), 0); + if (ret == BPF_FIB_LKUP_RET_NOT_FWDED || ret < 0) + return TC_ACT_OK; + + __builtin_memset(&zero, 0, sizeof(zero)); + if (bpf_skb_store_bytes(skb, 0, &zero, sizeof(zero), 0) < 0) + return TC_ACT_SHOT; + + if (ret == BPF_FIB_LKUP_RET_NO_NEIGH) { + struct bpf_redir_neigh nh_params = {}; + + nh_params.nh_family = fib_params.family; + __builtin_memcpy(&nh_params.ipv6_nh, &fib_params.ipv6_dst, + sizeof(nh_params.ipv6_nh)); + + return bpf_redirect_neigh(fib_params.ifindex, &nh_params, + sizeof(nh_params), 0); + + } else if (ret == BPF_FIB_LKUP_RET_SUCCESS) { + void *data_end = ctx_ptr(skb->data_end); + struct ethhdr *eth = ctx_ptr(skb->data); + + if (eth + 1 > data_end) + return TC_ACT_SHOT; + + __builtin_memcpy(eth->h_dest, fib_params.dmac, ETH_ALEN); + __builtin_memcpy(eth->h_source, fib_params.smac, ETH_ALEN); + + return bpf_redirect(fib_params.ifindex, 0); + } + + return TC_ACT_SHOT; +} + +/* these are identical, but keep them separate for compatibility with the + * section names expected by test_tc_redirect.sh + */ +SEC("dst_ingress") int tc_dst(struct __sk_buff *skb) +{ + return tc_redir(skb); +} + +SEC("src_ingress") int tc_src(struct __sk_buff *skb) +{ + return tc_redir(skb); +} + +char __license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_tc_redirect.sh b/tools/testing/selftests/bpf/test_tc_redirect.sh index 6d7482562140..8868aa1ca902 100755 --- a/tools/testing/selftests/bpf/test_tc_redirect.sh +++ b/tools/testing/selftests/bpf/test_tc_redirect.sh @@ -24,8 +24,7 @@ command -v timeout >/dev/null 2>&1 || \ { echo >&2 "timeout is not available"; exit 1; } command -v ping >/dev/null 2>&1 || \ { echo >&2 "ping is not available"; exit 1; } -command -v ping6 >/dev/null 2>&1 || \ - { echo >&2 "ping6 is not available"; exit 1; } +if command -v ping6 >/dev/null 2>&1; then PING6=ping6; else PING6=ping; fi command -v perl >/dev/null 2>&1 || \ { echo >&2 "perl is not available"; exit 1; } command -v jq >/dev/null 2>&1 || \ @@ -152,7 +151,7 @@ netns_test_connectivity() echo -e "${TEST}: ${GREEN}PASS${NC}" TEST="ICMPv6 connectivity test" - ip netns exec ${NS_SRC} ping6 $PING_ARG ${IP6_DST} + ip netns exec ${NS_SRC} $PING6 $PING_ARG ${IP6_DST} if [ $? -ne 0 ]; then echo -e "${TEST}: ${RED}FAIL${NC}" exit 1 @@ -170,6 +169,7 @@ hex_mem_str() netns_setup_bpf() { local obj=$1 + local use_forwarding=${2:-0} ip netns exec ${NS_FWD} tc qdisc add dev veth_src_fwd clsact ip netns exec ${NS_FWD} tc filter add dev veth_src_fwd ingress bpf da obj $obj sec src_ingress @@ -179,6 +179,14 @@ netns_setup_bpf() ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd ingress bpf da obj $obj sec dst_ingress ip netns exec ${NS_FWD} tc filter add dev veth_dst_fwd egress bpf da obj $obj sec chk_egress + if [ "$use_forwarding" -eq "1" ]; then + # bpf_fib_lookup() checks if forwarding is enabled + ip netns exec ${NS_FWD} sysctl -w net.ipv4.ip_forward=1 + ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_dst_fwd.forwarding=1 + ip netns exec ${NS_FWD} sysctl -w net.ipv6.conf.veth_src_fwd.forwarding=1 + return 0 + fi + veth_src=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_src_fwd/ifindex) veth_dst=$(ip netns exec ${NS_FWD} cat /sys/class/net/veth_dst_fwd/ifindex) @@ -200,5 +208,9 @@ netns_setup_bpf test_tc_neigh.o netns_test_connectivity netns_cleanup netns_setup +netns_setup_bpf test_tc_neigh_fib.o 1 +netns_test_connectivity +netns_cleanup +netns_setup netns_setup_bpf test_tc_peer.o netns_test_connectivity diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c index b1aac2641498..ce13ece08d51 100644 --- a/tools/testing/selftests/bpf/verifier/sock.c +++ b/tools/testing/selftests/bpf/verifier/sock.c @@ -631,3 +631,28 @@ .prog_type = BPF_PROG_TYPE_SK_REUSEPORT, .result = ACCEPT, }, +{ + "mark null check on return value of bpf_skc_to helpers", + .insns = { + BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_1, offsetof(struct __sk_buff, sk)), + BPF_JMP_IMM(BPF_JNE, BPF_REG_1, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_MOV64_REG(BPF_REG_6, BPF_REG_1), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_sock), + BPF_MOV64_REG(BPF_REG_7, BPF_REG_0), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_6), + BPF_EMIT_CALL(BPF_FUNC_skc_to_tcp_request_sock), + BPF_MOV64_REG(BPF_REG_8, BPF_REG_0), + BPF_JMP_IMM(BPF_JNE, BPF_REG_8, 0, 2), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + BPF_LDX_MEM(BPF_B, BPF_REG_0, BPF_REG_7, 0), + BPF_EXIT_INSN(), + }, + .prog_type = BPF_PROG_TYPE_SCHED_CLS, + .result = REJECT, + .errstr = "invalid mem access", + .result_unpriv = REJECT, + .errstr_unpriv = "unknown func", +}, diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index 8df5cb8f71ff..741a1c4f4ae8 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -1,4 +1,5 @@ CONFIG_MPTCP=y +CONFIG_IPV6=y CONFIG_MPTCP_IPV6=y CONFIG_INET_DIAG=m CONFIG_INET_MPTCP_DIAG=m |