diff options
41 files changed, 404 insertions, 112 deletions
diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h index 841da29cef93..f6c0938027ec 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb.h +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb.h @@ -178,6 +178,8 @@ struct kvaser_usb_dev_cfg { extern const struct kvaser_usb_dev_ops kvaser_usb_hydra_dev_ops; extern const struct kvaser_usb_dev_ops kvaser_usb_leaf_dev_ops; +void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv); + int kvaser_usb_recv_cmd(const struct kvaser_usb *dev, void *cmd, int len, int *actual_len); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c index 824cab80aa02..e91648ed7386 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_core.c @@ -477,7 +477,7 @@ static void kvaser_usb_reset_tx_urb_contexts(struct kvaser_usb_net_priv *priv) /* This method might sleep. Do not call it in the atomic context * of URB completions. */ -static void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv) +void kvaser_usb_unlink_tx_urbs(struct kvaser_usb_net_priv *priv) { usb_kill_anchored_urbs(&priv->tx_submitted); kvaser_usb_reset_tx_urb_contexts(priv); @@ -729,6 +729,7 @@ static int kvaser_usb_init_one(struct kvaser_usb *dev, int channel) init_usb_anchor(&priv->tx_submitted); init_completion(&priv->start_comp); init_completion(&priv->stop_comp); + init_completion(&priv->flush_comp); priv->can.ctrlmode_supported = 0; priv->dev = dev; diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c index 6871d474dabf..7b52fda73d82 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_hydra.c @@ -1916,7 +1916,7 @@ static int kvaser_usb_hydra_flush_queue(struct kvaser_usb_net_priv *priv) { int err; - init_completion(&priv->flush_comp); + reinit_completion(&priv->flush_comp); err = kvaser_usb_hydra_send_simple_cmd(priv->dev, CMD_FLUSH_QUEUE, priv->channel); diff --git a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c index 07f687f29b34..50f2ac8319ff 100644 --- a/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c +++ b/drivers/net/can/usb/kvaser_usb/kvaser_usb_leaf.c @@ -310,6 +310,38 @@ struct kvaser_cmd { } u; } __packed; +#define CMD_SIZE_ANY 0xff +#define kvaser_fsize(field) sizeof_field(struct kvaser_cmd, field) + +static const u8 kvaser_usb_leaf_cmd_sizes_leaf[] = { + [CMD_START_CHIP_REPLY] = kvaser_fsize(u.simple), + [CMD_STOP_CHIP_REPLY] = kvaser_fsize(u.simple), + [CMD_GET_CARD_INFO_REPLY] = kvaser_fsize(u.cardinfo), + [CMD_TX_ACKNOWLEDGE] = kvaser_fsize(u.tx_acknowledge_header), + [CMD_GET_SOFTWARE_INFO_REPLY] = kvaser_fsize(u.leaf.softinfo), + [CMD_RX_STD_MESSAGE] = kvaser_fsize(u.leaf.rx_can), + [CMD_RX_EXT_MESSAGE] = kvaser_fsize(u.leaf.rx_can), + [CMD_LEAF_LOG_MESSAGE] = kvaser_fsize(u.leaf.log_message), + [CMD_CHIP_STATE_EVENT] = kvaser_fsize(u.leaf.chip_state_event), + [CMD_CAN_ERROR_EVENT] = kvaser_fsize(u.leaf.error_event), + /* ignored events: */ + [CMD_FLUSH_QUEUE_REPLY] = CMD_SIZE_ANY, +}; + +static const u8 kvaser_usb_leaf_cmd_sizes_usbcan[] = { + [CMD_START_CHIP_REPLY] = kvaser_fsize(u.simple), + [CMD_STOP_CHIP_REPLY] = kvaser_fsize(u.simple), + [CMD_GET_CARD_INFO_REPLY] = kvaser_fsize(u.cardinfo), + [CMD_TX_ACKNOWLEDGE] = kvaser_fsize(u.tx_acknowledge_header), + [CMD_GET_SOFTWARE_INFO_REPLY] = kvaser_fsize(u.usbcan.softinfo), + [CMD_RX_STD_MESSAGE] = kvaser_fsize(u.usbcan.rx_can), + [CMD_RX_EXT_MESSAGE] = kvaser_fsize(u.usbcan.rx_can), + [CMD_CHIP_STATE_EVENT] = kvaser_fsize(u.usbcan.chip_state_event), + [CMD_CAN_ERROR_EVENT] = kvaser_fsize(u.usbcan.error_event), + /* ignored events: */ + [CMD_USBCAN_CLOCK_OVERFLOW_EVENT] = CMD_SIZE_ANY, +}; + /* Summary of a kvaser error event, for a unified Leaf/Usbcan error * handling. Some discrepancies between the two families exist: * @@ -397,6 +429,43 @@ static const struct kvaser_usb_dev_cfg kvaser_usb_leaf_imx_dev_cfg_32mhz = { .bittiming_const = &kvaser_usb_flexc_bittiming_const, }; +static int kvaser_usb_leaf_verify_size(const struct kvaser_usb *dev, + const struct kvaser_cmd *cmd) +{ + /* buffer size >= cmd->len ensured by caller */ + u8 min_size = 0; + + switch (dev->driver_info->family) { + case KVASER_LEAF: + if (cmd->id < ARRAY_SIZE(kvaser_usb_leaf_cmd_sizes_leaf)) + min_size = kvaser_usb_leaf_cmd_sizes_leaf[cmd->id]; + break; + case KVASER_USBCAN: + if (cmd->id < ARRAY_SIZE(kvaser_usb_leaf_cmd_sizes_usbcan)) + min_size = kvaser_usb_leaf_cmd_sizes_usbcan[cmd->id]; + break; + } + + if (min_size == CMD_SIZE_ANY) + return 0; + + if (min_size) { + min_size += CMD_HEADER_LEN; + if (cmd->len >= min_size) + return 0; + + dev_err_ratelimited(&dev->intf->dev, + "Received command %u too short (size %u, needed %u)", + cmd->id, cmd->len, min_size); + return -EIO; + } + + dev_warn_ratelimited(&dev->intf->dev, + "Unhandled command (%d, size %d)\n", + cmd->id, cmd->len); + return -EINVAL; +} + static void * kvaser_usb_leaf_frame_to_cmd(const struct kvaser_usb_net_priv *priv, const struct sk_buff *skb, int *cmd_len, @@ -502,6 +571,9 @@ static int kvaser_usb_leaf_wait_cmd(const struct kvaser_usb *dev, u8 id, end: kfree(buf); + if (err == 0) + err = kvaser_usb_leaf_verify_size(dev, cmd); + return err; } @@ -1133,6 +1205,9 @@ static void kvaser_usb_leaf_stop_chip_reply(const struct kvaser_usb *dev, static void kvaser_usb_leaf_handle_command(const struct kvaser_usb *dev, const struct kvaser_cmd *cmd) { + if (kvaser_usb_leaf_verify_size(dev, cmd) < 0) + return; + switch (cmd->id) { case CMD_START_CHIP_REPLY: kvaser_usb_leaf_start_chip_reply(dev, cmd); @@ -1351,9 +1426,13 @@ static int kvaser_usb_leaf_set_mode(struct net_device *netdev, switch (mode) { case CAN_MODE_START: + kvaser_usb_unlink_tx_urbs(priv); + err = kvaser_usb_leaf_simple_cmd_async(priv, CMD_START_CHIP); if (err) return err; + + priv->can.state = CAN_STATE_ERROR_ACTIVE; break; default: return -EOPNOTSUPP; diff --git a/drivers/net/ethernet/broadcom/bcmsysport.h b/drivers/net/ethernet/broadcom/bcmsysport.h index 16b73bb9acc7..5af16e5f9ad0 100644 --- a/drivers/net/ethernet/broadcom/bcmsysport.h +++ b/drivers/net/ethernet/broadcom/bcmsysport.h @@ -484,7 +484,7 @@ struct bcm_rsb { /* Number of Receive hardware descriptor words */ #define SP_NUM_HW_RX_DESC_WORDS 1024 -#define SP_LT_NUM_HW_RX_DESC_WORDS 256 +#define SP_LT_NUM_HW_RX_DESC_WORDS 512 /* Internal linked-list RAM size */ #define SP_NUM_TX_DESC 1536 diff --git a/drivers/net/ethernet/marvell/prestera/prestera_span.c b/drivers/net/ethernet/marvell/prestera/prestera_span.c index f0e9d6ea88c5..1005182ce3bc 100644 --- a/drivers/net/ethernet/marvell/prestera/prestera_span.c +++ b/drivers/net/ethernet/marvell/prestera/prestera_span.c @@ -107,7 +107,7 @@ static int prestera_span_put(struct prestera_switch *sw, u8 span_id) entry = prestera_span_entry_find_by_id(sw->span, span_id); if (!entry) - return false; + return -ENOENT; if (!refcount_dec_and_test(&entry->ref_count)) return 0; @@ -151,6 +151,9 @@ int prestera_span_rule_del(struct prestera_flow_block_binding *binding, { int err; + if (binding->span_id == PRESTERA_SPAN_INVALID_ID) + return -ENOENT; + err = prestera_hw_span_unbind(binding->port, ingress); if (err) return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c index a53e205f4a89..be74e1403328 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc/meter.c @@ -115,6 +115,7 @@ mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev, struct mlx5e_flow_meters *flow_meters; u8 cir_man, cir_exp, cbs_man, cbs_exp; struct mlx5_aso_wqe *aso_wqe; + unsigned long expires; struct mlx5_aso *aso; u64 rate, burst; u8 ds_cnt; @@ -187,7 +188,12 @@ mlx5e_tc_meter_modify(struct mlx5_core_dev *mdev, mlx5_aso_post_wqe(aso, true, &aso_wqe->ctrl); /* With newer FW, the wait for the first ASO WQE is more than 2us, put the wait 10ms. */ - err = mlx5_aso_poll_cq(aso, true, 10); + expires = jiffies + msecs_to_jiffies(10); + do { + err = mlx5_aso_poll_cq(aso, true); + if (err) + usleep_range(2, 10); + } while (err && time_is_after_jiffies(expires)); mutex_unlock(&flow_meters->aso_lock); return err; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c index 5da746da898d..41970067917b 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/macsec.c @@ -1405,7 +1405,7 @@ static int macsec_aso_set_arm_event(struct mlx5_core_dev *mdev, struct mlx5e_mac MLX5_ACCESS_ASO_OPC_MOD_MACSEC); macsec_aso_build_ctrl(aso, &aso_wqe->aso_ctrl, in); mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); - err = mlx5_aso_poll_cq(maso, false, 10); + err = mlx5_aso_poll_cq(maso, false); mutex_unlock(&aso->aso_lock); return err; @@ -1430,7 +1430,7 @@ static int macsec_aso_query(struct mlx5_core_dev *mdev, struct mlx5e_macsec *mac macsec_aso_build_wqe_ctrl_seg(aso, &aso_wqe->aso_ctrl, NULL); mlx5_aso_post_wqe(maso, false, &aso_wqe->ctrl); - err = mlx5_aso_poll_cq(maso, false, 10); + err = mlx5_aso_poll_cq(maso, false); if (err) goto err_out; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c index 21e14507ff5c..baa8092f335e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.c @@ -381,20 +381,12 @@ void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data, WRITE_ONCE(doorbell_cseg, NULL); } -int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data, u32 interval_ms) +int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data) { struct mlx5_aso_cq *cq = &aso->cq; struct mlx5_cqe64 *cqe; - unsigned long expires; cqe = mlx5_cqwq_get_cqe(&cq->wq); - - expires = jiffies + msecs_to_jiffies(interval_ms); - while (!cqe && time_is_after_jiffies(expires)) { - usleep_range(2, 10); - cqe = mlx5_cqwq_get_cqe(&cq->wq); - } - if (!cqe) return -ETIMEDOUT; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h index d854e01d7fc5..2d40dcf9d42e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/aso.h @@ -83,7 +83,7 @@ void mlx5_aso_build_wqe(struct mlx5_aso *aso, u8 ds_cnt, u32 obj_id, u32 opc_mode); void mlx5_aso_post_wqe(struct mlx5_aso *aso, bool with_data, struct mlx5_wqe_ctrl_seg *doorbell_cseg); -int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data, u32 interval_ms); +int mlx5_aso_poll_cq(struct mlx5_aso *aso, bool with_data); struct mlx5_aso *mlx5_aso_create(struct mlx5_core_dev *mdev, u32 pdn); void mlx5_aso_destroy(struct mlx5_aso *aso); diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c index 3ab3e4536b99..8593cafa6368 100644 --- a/drivers/net/ethernet/netronome/nfp/flower/offload.c +++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c @@ -373,10 +373,10 @@ nfp_flower_calculate_key_layers(struct nfp_app *app, if (ipv6_tun) { key_layer_two |= NFP_FLOWER_LAYER2_TUN_IPV6; key_size += - sizeof(struct nfp_flower_ipv6_udp_tun); + sizeof(struct nfp_flower_ipv6_gre_tun); } else { key_size += - sizeof(struct nfp_flower_ipv4_udp_tun); + sizeof(struct nfp_flower_ipv4_gre_tun); } if (enc_op.key) { diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c index 3cbe4ec46234..7f86068f3ff6 100644 --- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c +++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c @@ -2476,7 +2476,10 @@ static int am65_cpsw_nuss_register_devlink(struct am65_cpsw_common *common) port = am65_common_get_port(common, i); dl_port = &port->devlink_port; - attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + if (port->ndev) + attrs.flavour = DEVLINK_PORT_FLAVOUR_PHYSICAL; + else + attrs.flavour = DEVLINK_PORT_FLAVOUR_UNUSED; attrs.phys.port_number = port->port_id; attrs.switch_id.id_len = sizeof(resource_size_t); memcpy(attrs.switch_id.id, common->switch_id, attrs.switch_id.id_len); diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c index 3757e069c486..54a17b576eac 100644 --- a/drivers/net/phy/micrel.c +++ b/drivers/net/phy/micrel.c @@ -1838,7 +1838,7 @@ static int ksz886x_cable_test_start(struct phy_device *phydev) return phy_clear_bits(phydev, MII_BMCR, BMCR_ANENABLE | BMCR_SPEED100); } -static int ksz886x_cable_test_result_trans(u16 status, u16 mask) +static __always_inline int ksz886x_cable_test_result_trans(u16 status, u16 mask) { switch (FIELD_GET(mask, status)) { case KSZ8081_LMD_STAT_NORMAL: @@ -1854,13 +1854,13 @@ static int ksz886x_cable_test_result_trans(u16 status, u16 mask) } } -static bool ksz886x_cable_test_failed(u16 status, u16 mask) +static __always_inline bool ksz886x_cable_test_failed(u16 status, u16 mask) { return FIELD_GET(mask, status) == KSZ8081_LMD_STAT_FAIL; } -static bool ksz886x_cable_test_fault_length_valid(u16 status, u16 mask) +static __always_inline bool ksz886x_cable_test_fault_length_valid(u16 status, u16 mask) { switch (FIELD_GET(mask, status)) { case KSZ8081_LMD_STAT_OPEN: @@ -1871,7 +1871,8 @@ static bool ksz886x_cable_test_fault_length_valid(u16 status, u16 mask) return false; } -static int ksz886x_cable_test_fault_length(struct phy_device *phydev, u16 status, u16 data_mask) +static __always_inline int ksz886x_cable_test_fault_length(struct phy_device *phydev, + u16 status, u16 data_mask) { int dt; diff --git a/drivers/net/phy/sfp-bus.c b/drivers/net/phy/sfp-bus.c index 29e3fa86bac3..daac293e8ede 100644 --- a/drivers/net/phy/sfp-bus.c +++ b/drivers/net/phy/sfp-bus.c @@ -257,6 +257,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, case SFF8024_ECC_100GBASE_SR4_25GBASE_SR: phylink_set(modes, 100000baseSR4_Full); phylink_set(modes, 25000baseSR_Full); + __set_bit(PHY_INTERFACE_MODE_25GBASER, interfaces); break; case SFF8024_ECC_100GBASE_LR4_25GBASE_LR: case SFF8024_ECC_100GBASE_ER4_25GBASE_ER: @@ -268,6 +269,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, case SFF8024_ECC_25GBASE_CR_S: case SFF8024_ECC_25GBASE_CR_N: phylink_set(modes, 25000baseCR_Full); + __set_bit(PHY_INTERFACE_MODE_25GBASER, interfaces); break; case SFF8024_ECC_10GBASE_T_SFI: case SFF8024_ECC_10GBASE_T_SR: @@ -276,6 +278,7 @@ void sfp_parse_support(struct sfp_bus *bus, const struct sfp_eeprom_id *id, break; case SFF8024_ECC_5GBASE_T: phylink_set(modes, 5000baseT_Full); + __set_bit(PHY_INTERFACE_MODE_5GBASER, interfaces); break; case SFF8024_ECC_2_5GBASE_T: phylink_set(modes, 2500baseT_Full); diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d664ba5812d8..37943ba3a73c 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -1182,6 +1182,8 @@ void ipv6_icmp_error(struct sock *sk, struct sk_buff *skb, int err, __be16 port, void ipv6_local_error(struct sock *sk, int err, struct flowi6 *fl6, u32 info); void ipv6_local_rxpmtu(struct sock *sk, struct flowi6 *fl6, u32 mtu); +void inet6_cleanup_sock(struct sock *sk); +void inet6_sock_destruct(struct sock *sk); int inet6_release(struct socket *sock); int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len); int inet6_getname(struct socket *sock, struct sockaddr *uaddr, diff --git a/include/net/udp.h b/include/net/udp.h index 5ee88ddf79c3..fee053bcd17c 100644 --- a/include/net/udp.h +++ b/include/net/udp.h @@ -247,7 +247,7 @@ static inline bool udp_sk_bound_dev_eq(struct net *net, int bound_dev_if, } /* net/ipv4/udp.c */ -void udp_destruct_sock(struct sock *sk); +void udp_destruct_common(struct sock *sk); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len); int __udp_enqueue_schedule_skb(struct sock *sk, struct sk_buff *skb); void udp_skb_destructor(struct sock *sk, struct sk_buff *skb); diff --git a/include/net/udplite.h b/include/net/udplite.h index 0143b373602e..299c14ce2bb9 100644 --- a/include/net/udplite.h +++ b/include/net/udplite.h @@ -25,14 +25,6 @@ static __inline__ int udplite_getfrag(void *from, char *to, int offset, return copy_from_iter_full(to, len, &msg->msg_iter) ? 0 : -EFAULT; } -/* Designate sk as UDP-Lite socket */ -static inline int udplite_sk_init(struct sock *sk) -{ - udp_init_sock(sk); - udp_sk(sk)->pcflag = UDPLITE_BIT; - return 0; -} - /* * Checksumming routines */ diff --git a/net/core/sock.c b/net/core/sock.c index eeb6cbac6f49..a3ba0358c77c 100644 --- a/net/core/sock.c +++ b/net/core/sock.c @@ -3610,7 +3610,8 @@ int sock_common_getsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; - return sk->sk_prot->getsockopt(sk, level, optname, optval, optlen); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + return READ_ONCE(sk->sk_prot)->getsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_getsockopt); @@ -3636,7 +3637,8 @@ int sock_common_setsockopt(struct socket *sock, int level, int optname, { struct sock *sk = sock->sk; - return sk->sk_prot->setsockopt(sk, level, optname, optval, optlen); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + return READ_ONCE(sk->sk_prot)->setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(sock_common_setsockopt); diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e2c219382345..3dd02396517d 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -558,22 +558,27 @@ int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; int err; if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; + + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (uaddr->sa_family == AF_UNSPEC) - return sk->sk_prot->disconnect(sk, flags); + return prot->disconnect(sk, flags); if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { - err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); + err = prot->pre_connect(sk, uaddr, addr_len); if (err) return err; } if (data_race(!inet_sk(sk)->inet_num) && inet_autobind(sk)) return -EAGAIN; - return sk->sk_prot->connect(sk, uaddr, addr_len); + return prot->connect(sk, uaddr, addr_len); } EXPORT_SYMBOL(inet_dgram_connect); @@ -734,10 +739,11 @@ EXPORT_SYMBOL(inet_stream_connect); int inet_accept(struct socket *sock, struct socket *newsock, int flags, bool kern) { - struct sock *sk1 = sock->sk; + struct sock *sk1 = sock->sk, *sk2; int err = -EINVAL; - struct sock *sk2 = sk1->sk_prot->accept(sk1, flags, &err, kern); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + sk2 = READ_ONCE(sk1->sk_prot)->accept(sk1, flags, &err, kern); if (!sk2) goto do_err; @@ -825,12 +831,15 @@ ssize_t inet_sendpage(struct socket *sock, struct page *page, int offset, size_t size, int flags) { struct sock *sk = sock->sk; + const struct proto *prot; if (unlikely(inet_send_prepare(sk))) return -EAGAIN; - if (sk->sk_prot->sendpage) - return sk->sk_prot->sendpage(sk, page, offset, size, flags); + /* IPV6_ADDRFORM can change sk->sk_prot under us. */ + prot = READ_ONCE(sk->sk_prot); + if (prot->sendpage) + return prot->sendpage(sk, page, offset, size, flags); return sock_no_sendpage(sock, page, offset, size, flags); } EXPORT_SYMBOL(inet_sendpage); diff --git a/net/ipv4/netfilter/ipt_rpfilter.c b/net/ipv4/netfilter/ipt_rpfilter.c index 8183bbcabb4a..ff85db52b2e5 100644 --- a/net/ipv4/netfilter/ipt_rpfilter.c +++ b/net/ipv4/netfilter/ipt_rpfilter.c @@ -77,7 +77,7 @@ static bool rpfilter_mt(const struct sk_buff *skb, struct xt_action_param *par) flow.flowi4_mark = info->flags & XT_RPFILTER_VALID_MARK ? skb->mark : 0; flow.flowi4_tos = iph->tos & IPTOS_RT_MASK; flow.flowi4_scope = RT_SCOPE_UNIVERSE; - flow.flowi4_oif = l3mdev_master_ifindex_rcu(xt_in(par)); + flow.flowi4_l3mdev = l3mdev_master_ifindex_rcu(xt_in(par)); return rpfilter_lookup_reverse(xt_net(par), &flow, xt_in(par), info->flags) ^ invert; } diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c index 7ade04ff972d..e886147eed11 100644 --- a/net/ipv4/netfilter/nft_fib_ipv4.c +++ b/net/ipv4/netfilter/nft_fib_ipv4.c @@ -84,7 +84,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs, oif = NULL; if (priv->flags & NFTA_FIB_F_IIF) - fl4.flowi4_oif = l3mdev_master_ifindex_rcu(oif); + fl4.flowi4_l3mdev = l3mdev_master_ifindex_rcu(oif); if (nft_hook(pkt) == NF_INET_PRE_ROUTING && nft_fib_is_loopback(pkt->skb, nft_in(pkt))) { diff --git a/net/ipv4/ping.c b/net/ipv4/ping.c index 517042caf6dc..bde333b24837 100644 --- a/net/ipv4/ping.c +++ b/net/ipv4/ping.c @@ -617,21 +617,9 @@ int ping_getfrag(void *from, char *to, { struct pingfakehdr *pfh = from; - if (offset == 0) { - fraglen -= sizeof(struct icmphdr); - if (fraglen < 0) - BUG(); - if (!csum_and_copy_from_iter_full(to + sizeof(struct icmphdr), - fraglen, &pfh->wcheck, - &pfh->msg->msg_iter)) - return -EFAULT; - } else if (offset < sizeof(struct icmphdr)) { - BUG(); - } else { - if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck, - &pfh->msg->msg_iter)) - return -EFAULT; - } + if (!csum_and_copy_from_iter_full(to, fraglen, &pfh->wcheck, + &pfh->msg->msg_iter)) + return -EFAULT; #if IS_ENABLED(CONFIG_IPV6) /* For IPv6, checksum each skb as we go along, as expected by @@ -639,7 +627,7 @@ int ping_getfrag(void *from, char *to, * wcheck, it will be finalized in ping_v4_push_pending_frames. */ if (pfh->family == AF_INET6) { - skb->csum = pfh->wcheck; + skb->csum = csum_block_add(skb->csum, pfh->wcheck, odd); skb->ip_summed = CHECKSUM_NONE; pfh->wcheck = 0; } @@ -842,7 +830,8 @@ back_from_confirm: pfh.family = AF_INET; err = ip_append_data(sk, &fl4, ping_getfrag, &pfh, len, - 0, &ipc, &rt, msg->msg_flags); + sizeof(struct icmphdr), &ipc, &rt, + msg->msg_flags); if (err) ip_flush_pending_frames(sk); else diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index 0c51abeee172..f8232811a5be 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -3796,8 +3796,9 @@ int tcp_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval, const struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->setsockopt(sk, level, optname, - optval, optlen); + /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */ + return READ_ONCE(icsk->icsk_af_ops)->setsockopt(sk, level, optname, + optval, optlen); return do_tcp_setsockopt(sk, level, optname, optval, optlen); } EXPORT_SYMBOL(tcp_setsockopt); @@ -4396,8 +4397,9 @@ int tcp_getsockopt(struct sock *sk, int level, int optname, char __user *optval, struct inet_connection_sock *icsk = inet_csk(sk); if (level != SOL_TCP) - return icsk->icsk_af_ops->getsockopt(sk, level, optname, - optval, optlen); + /* Paired with WRITE_ONCE() in do_ipv6_setsockopt() and tcp_v6_connect() */ + return READ_ONCE(icsk->icsk_af_ops)->getsockopt(sk, level, optname, + optval, optlen); return do_tcp_getsockopt(sk, level, optname, USER_SOCKPTR(optval), USER_SOCKPTR(optlen)); } diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index ddc7ba0554bd..112f28f93693 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -375,6 +375,7 @@ static void tcp_cdg_init(struct sock *sk) struct cdg *ca = inet_csk_ca(sk); struct tcp_sock *tp = tcp_sk(sk); + ca->gradients = NULL; /* We silently fall back to window = 1 if allocation fails. */ if (window > 1) ca->gradients = kcalloc(window, sizeof(ca->gradients[0]), @@ -388,6 +389,7 @@ static void tcp_cdg_release(struct sock *sk) struct cdg *ca = inet_csk_ca(sk); kfree(ca->gradients); + ca->gradients = NULL; } static struct tcp_congestion_ops tcp_cdg __read_mostly = { diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index d63118ce5900..8126f67d18b3 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1598,7 +1598,7 @@ drop: } EXPORT_SYMBOL_GPL(__udp_enqueue_schedule_skb); -void udp_destruct_sock(struct sock *sk) +void udp_destruct_common(struct sock *sk) { /* reclaim completely the forward allocated memory */ struct udp_sock *up = udp_sk(sk); @@ -1611,10 +1611,14 @@ void udp_destruct_sock(struct sock *sk) kfree_skb(skb); } udp_rmem_release(sk, total, 0, true); +} +EXPORT_SYMBOL_GPL(udp_destruct_common); +static void udp_destruct_sock(struct sock *sk) +{ + udp_destruct_common(sk); inet_sock_destruct(sk); } -EXPORT_SYMBOL_GPL(udp_destruct_sock); int udp_init_sock(struct sock *sk) { @@ -1622,7 +1626,6 @@ int udp_init_sock(struct sock *sk) sk->sk_destruct = udp_destruct_sock; return 0; } -EXPORT_SYMBOL_GPL(udp_init_sock); void skb_consume_udp(struct sock *sk, struct sk_buff *skb, int len) { diff --git a/net/ipv4/udplite.c b/net/ipv4/udplite.c index 6e08a76ae1e7..e0c9cc39b81e 100644 --- a/net/ipv4/udplite.c +++ b/net/ipv4/udplite.c @@ -17,6 +17,14 @@ struct udp_table udplite_table __read_mostly; EXPORT_SYMBOL(udplite_table); +/* Designate sk as UDP-Lite socket */ +static int udplite_sk_init(struct sock *sk) +{ + udp_init_sock(sk); + udp_sk(sk)->pcflag = UDPLITE_BIT; + return 0; +} + static int udplite_rcv(struct sk_buff *skb) { return __udp4_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index d40b7d60e00e..024191004982 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -109,6 +109,12 @@ static __inline__ struct ipv6_pinfo *inet6_sk_generic(struct sock *sk) return (struct ipv6_pinfo *)(((u8 *)sk) + offset); } +void inet6_sock_destruct(struct sock *sk) +{ + inet6_cleanup_sock(sk); + inet_sock_destruct(sk); +} + static int inet6_create(struct net *net, struct socket *sock, int protocol, int kern) { @@ -201,7 +207,7 @@ lookup_protocol: inet->hdrincl = 1; } - sk->sk_destruct = inet_sock_destruct; + sk->sk_destruct = inet6_sock_destruct; sk->sk_family = PF_INET6; sk->sk_protocol = protocol; @@ -510,6 +516,12 @@ void inet6_destroy_sock(struct sock *sk) } EXPORT_SYMBOL_GPL(inet6_destroy_sock); +void inet6_cleanup_sock(struct sock *sk) +{ + inet6_destroy_sock(sk); +} +EXPORT_SYMBOL_GPL(inet6_cleanup_sock); + /* * This does both peername and sockname. */ diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c index 2d2f4dd9e5df..532f4478c884 100644 --- a/net/ipv6/ipv6_sockglue.c +++ b/net/ipv6/ipv6_sockglue.c @@ -419,15 +419,18 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, rtnl_lock(); sockopt_lock_sock(sk); + /* Another thread has converted the socket into IPv4 with + * IPV6_ADDRFORM concurrently. + */ + if (unlikely(sk->sk_family != AF_INET6)) + goto unlock; + switch (optname) { case IPV6_ADDRFORM: if (optlen < sizeof(int)) goto e_inval; if (val == PF_INET) { - struct ipv6_txoptions *opt; - struct sk_buff *pktopt; - if (sk->sk_type == SOCK_RAW) break; @@ -458,7 +461,6 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, break; } - fl6_free_socklist(sk); __ipv6_sock_mc_close(sk); __ipv6_sock_ac_close(sk); @@ -475,9 +477,10 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, &tcp_prot, 1); - /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + /* Paired with READ_ONCE(sk->sk_prot) in inet6_stream_ops */ WRITE_ONCE(sk->sk_prot, &tcp_prot); - icsk->icsk_af_ops = &ipv4_specific; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv4_specific); sk->sk_socket->ops = &inet_stream_ops; sk->sk_family = PF_INET; tcp_sync_mss(sk, icsk->icsk_pmtu_cookie); @@ -490,19 +493,19 @@ int do_ipv6_setsockopt(struct sock *sk, int level, int optname, sock_prot_inuse_add(net, sk->sk_prot, -1); sock_prot_inuse_add(net, prot, 1); - /* Paired with READ_ONCE(sk->sk_prot) in net/ipv6/af_inet6.c */ + /* Paired with READ_ONCE(sk->sk_prot) in inet6_dgram_ops */ WRITE_ONCE(sk->sk_prot, prot); sk->sk_socket->ops = &inet_dgram_ops; sk->sk_family = PF_INET; } - opt = xchg((__force struct ipv6_txoptions **)&np->opt, - NULL); - if (opt) { - atomic_sub(opt->tot_len, &sk->sk_omem_alloc); - txopt_put(opt); - } - pktopt = xchg(&np->pktoptions, NULL); - kfree_skb(pktopt); + + /* Disable all options not to allocate memory anymore, + * but there is still a race. See the lockless path + * in udpv6_sendmsg() and ipv6_local_rxpmtu(). + */ + np->rxopt.all = 0; + + inet6_cleanup_sock(sk); /* * ... and add it to the refcnt debug socks count @@ -994,6 +997,7 @@ done: break; } +unlock: sockopt_release_sock(sk); if (needs_rtnl) rtnl_unlock(); diff --git a/net/ipv6/netfilter/ip6t_rpfilter.c b/net/ipv6/netfilter/ip6t_rpfilter.c index d800801a5dd2..69d86b040a6a 100644 --- a/net/ipv6/netfilter/ip6t_rpfilter.c +++ b/net/ipv6/netfilter/ip6t_rpfilter.c @@ -37,6 +37,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, bool ret = false; struct flowi6 fl6 = { .flowi6_iif = LOOPBACK_IFINDEX, + .flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev), .flowlabel = (* (__be32 *) iph) & IPV6_FLOWINFO_MASK, .flowi6_proto = iph->nexthdr, .daddr = iph->saddr, @@ -55,9 +56,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, if (rpfilter_addr_linklocal(&iph->saddr)) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6.flowi6_oif = dev->ifindex; - /* Set flowi6_oif for vrf devices to lookup route in l3mdev domain. */ - } else if (netif_is_l3_master(dev) || netif_is_l3_slave(dev) || - (flags & XT_RPFILTER_LOOSE) == 0) + } else if ((flags & XT_RPFILTER_LOOSE) == 0) fl6.flowi6_oif = dev->ifindex; rt = (void *)ip6_route_lookup(net, &fl6, skb, lookup_flags); @@ -72,9 +71,7 @@ static bool rpfilter_lookup_reverse6(struct net *net, const struct sk_buff *skb, goto out; } - if (rt->rt6i_idev->dev == dev || - l3mdev_master_ifindex_rcu(rt->rt6i_idev->dev) == dev->ifindex || - (flags & XT_RPFILTER_LOOSE)) + if (rt->rt6i_idev->dev == dev || (flags & XT_RPFILTER_LOOSE)) ret = true; out: ip6_rt_put(rt); diff --git a/net/ipv6/netfilter/nft_fib_ipv6.c b/net/ipv6/netfilter/nft_fib_ipv6.c index 1d7e520d9966..91faac610e03 100644 --- a/net/ipv6/netfilter/nft_fib_ipv6.c +++ b/net/ipv6/netfilter/nft_fib_ipv6.c @@ -41,9 +41,8 @@ static int nft_fib6_flowi_init(struct flowi6 *fl6, const struct nft_fib *priv, if (ipv6_addr_type(&fl6->daddr) & IPV6_ADDR_LINKLOCAL) { lookup_flags |= RT6_LOOKUP_F_IFACE; fl6->flowi6_oif = get_ifindex(dev ? dev : pkt->skb->dev); - } else if ((priv->flags & NFTA_FIB_F_IIF) && - (netif_is_l3_master(dev) || netif_is_l3_slave(dev))) { - fl6->flowi6_oif = dev->ifindex; + } else if (priv->flags & NFTA_FIB_F_IIF) { + fl6->flowi6_l3mdev = l3mdev_master_ifindex_rcu(dev); } if (ipv6_addr_type(&fl6->saddr) & IPV6_ADDR_UNICAST) diff --git a/net/ipv6/ping.c b/net/ipv6/ping.c index 5f2ef8493714..86c26e48d065 100644 --- a/net/ipv6/ping.c +++ b/net/ipv6/ping.c @@ -179,7 +179,7 @@ static int ping_v6_sendmsg(struct sock *sk, struct msghdr *msg, size_t len) lock_sock(sk); err = ip6_append_data(sk, ping_getfrag, &pfh, len, - 0, &ipc6, &fl6, rt, + sizeof(struct icmp6hdr), &ipc6, &fl6, rt, MSG_DONTWAIT); if (err) { diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index a8adda623da1..2a3f9296df1e 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -238,7 +238,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, sin.sin_port = usin->sin6_port; sin.sin_addr.s_addr = usin->sin6_addr.s6_addr32[3]; - icsk->icsk_af_ops = &ipv6_mapped; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv6_mapped); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, true); sk->sk_backlog_rcv = tcp_v4_do_rcv; @@ -250,7 +251,8 @@ static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, if (err) { icsk->icsk_ext_hdr_len = exthdrlen; - icsk->icsk_af_ops = &ipv6_specific; + /* Paired with READ_ONCE() in tcp_(get|set)sockopt() */ + WRITE_ONCE(icsk->icsk_af_ops, &ipv6_specific); if (sk_is_mptcp(sk)) mptcpv6_handle_mapped(sk, false); sk->sk_backlog_rcv = tcp_v6_do_rcv; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 91e795bb9ade..8d09f0ea5b8c 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -56,6 +56,19 @@ #include <trace/events/skb.h> #include "udp_impl.h" +static void udpv6_destruct_sock(struct sock *sk) +{ + udp_destruct_common(sk); + inet6_sock_destruct(sk); +} + +int udpv6_init_sock(struct sock *sk) +{ + skb_queue_head_init(&udp_sk(sk)->reader_queue); + sk->sk_destruct = udpv6_destruct_sock; + return 0; +} + static u32 udp6_ehashfn(const struct net *net, const struct in6_addr *laddr, const u16 lport, @@ -1733,7 +1746,7 @@ struct proto udpv6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, - .init = udp_init_sock, + .init = udpv6_init_sock, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, diff --git a/net/ipv6/udp_impl.h b/net/ipv6/udp_impl.h index 4251e49d32a0..0590f566379d 100644 --- a/net/ipv6/udp_impl.h +++ b/net/ipv6/udp_impl.h @@ -12,6 +12,7 @@ int __udp6_lib_rcv(struct sk_buff *, struct udp_table *, int); int __udp6_lib_err(struct sk_buff *, struct inet6_skb_parm *, u8, u8, int, __be32, struct udp_table *); +int udpv6_init_sock(struct sock *sk); int udp_v6_get_port(struct sock *sk, unsigned short snum); void udp_v6_rehash(struct sock *sk); diff --git a/net/ipv6/udplite.c b/net/ipv6/udplite.c index b70725856259..67eaf3ca14ce 100644 --- a/net/ipv6/udplite.c +++ b/net/ipv6/udplite.c @@ -12,6 +12,13 @@ #include <linux/proc_fs.h> #include "udp_impl.h" +static int udplitev6_sk_init(struct sock *sk) +{ + udpv6_init_sock(sk); + udp_sk(sk)->pcflag = UDPLITE_BIT; + return 0; +} + static int udplitev6_rcv(struct sk_buff *skb) { return __udp6_lib_rcv(skb, &udplite_table, IPPROTO_UDPLITE); @@ -38,7 +45,7 @@ struct proto udplitev6_prot = { .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, - .init = udplite_sk_init, + .init = udplitev6_sk_init, .destroy = udpv6_destroy_sock, .setsockopt = udpv6_setsockopt, .getsockopt = udpv6_getsockopt, diff --git a/net/mctp/af_mctp.c b/net/mctp/af_mctp.c index c2fc2a7b2528..b6b5e496fa40 100644 --- a/net/mctp/af_mctp.c +++ b/net/mctp/af_mctp.c @@ -295,11 +295,12 @@ __must_hold(&net->mctp.keys_lock) mctp_dev_release_key(key->dev, key); spin_unlock_irqrestore(&key->lock, flags); - hlist_del(&key->hlist); - hlist_del(&key->sklist); - - /* unref for the lists */ - mctp_key_unref(key); + if (!hlist_unhashed(&key->hlist)) { + hlist_del_init(&key->hlist); + hlist_del_init(&key->sklist); + /* unref for the lists */ + mctp_key_unref(key); + } kfree_skb(skb); } @@ -373,9 +374,17 @@ static int mctp_ioctl_alloctag(struct mctp_sock *msk, unsigned long arg) ctl.tag = tag | MCTP_TAG_OWNER | MCTP_TAG_PREALLOC; if (copy_to_user((void __user *)arg, &ctl, sizeof(ctl))) { - spin_lock_irqsave(&key->lock, flags); - __mctp_key_remove(key, net, flags, MCTP_TRACE_KEY_DROPPED); + unsigned long fl2; + /* Unwind our key allocation: the keys list lock needs to be + * taken before the individual key locks, and we need a valid + * flags value (fl2) to pass to __mctp_key_remove, hence the + * second spin_lock_irqsave() rather than a plain spin_lock(). + */ + spin_lock_irqsave(&net->mctp.keys_lock, flags); + spin_lock_irqsave(&key->lock, fl2); + __mctp_key_remove(key, net, fl2, MCTP_TRACE_KEY_DROPPED); mctp_key_unref(key); + spin_unlock_irqrestore(&net->mctp.keys_lock, flags); return -EFAULT; } diff --git a/net/mctp/route.c b/net/mctp/route.c index 3b24b8d18b5b..2155f15a074c 100644 --- a/net/mctp/route.c +++ b/net/mctp/route.c @@ -228,12 +228,12 @@ __releases(&key->lock) if (!key->manual_alloc) { spin_lock_irqsave(&net->mctp.keys_lock, flags); - hlist_del(&key->hlist); - hlist_del(&key->sklist); + if (!hlist_unhashed(&key->hlist)) { + hlist_del_init(&key->hlist); + hlist_del_init(&key->sklist); + mctp_key_unref(key); + } spin_unlock_irqrestore(&net->mctp.keys_lock, flags); - - /* unref for the lists */ - mctp_key_unref(key); } /* and one for the local reference */ diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index cb255d8ed99a..c7b10234cf7c 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -1015,7 +1015,8 @@ static int __ovs_ct_lookup(struct net *net, struct sw_flow_key *key, * connections which we will commit, we may need to attach * the helper here. */ - if (info->commit && info->helper && !nfct_help(ct)) { + if (!nf_ct_is_confirmed(ct) && info->commit && + info->helper && !nfct_help(ct)) { int err = __nf_ct_try_assign_helper(ct, info->ct, GFP_ATOMIC); if (err) diff --git a/tools/testing/selftests/netfilter/Makefile b/tools/testing/selftests/netfilter/Makefile index 600e3a19d5e2..4504ee07be08 100644 --- a/tools/testing/selftests/netfilter/Makefile +++ b/tools/testing/selftests/netfilter/Makefile @@ -6,7 +6,7 @@ TEST_PROGS := nft_trans_stress.sh nft_fib.sh nft_nat.sh bridge_brouter.sh \ nft_concat_range.sh nft_conntrack_helper.sh \ nft_queue.sh nft_meta.sh nf_nat_edemux.sh \ ipip-conntrack-mtu.sh conntrack_tcp_unreplied.sh \ - conntrack_vrf.sh nft_synproxy.sh + conntrack_vrf.sh nft_synproxy.sh rpath.sh CFLAGS += $(shell pkg-config --cflags libmnl 2>/dev/null || echo "-I/usr/include/libmnl") LDLIBS = -lmnl diff --git a/tools/testing/selftests/netfilter/nft_fib.sh b/tools/testing/selftests/netfilter/nft_fib.sh index fd76b69635a4..dff476e45e77 100755 --- a/tools/testing/selftests/netfilter/nft_fib.sh +++ b/tools/testing/selftests/netfilter/nft_fib.sh @@ -188,6 +188,7 @@ test_ping() { ip netns exec ${nsrouter} sysctl net.ipv6.conf.all.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec ${nsrouter} sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null ip netns exec ${nsrouter} sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null sleep 3 diff --git a/tools/testing/selftests/netfilter/rpath.sh b/tools/testing/selftests/netfilter/rpath.sh new file mode 100755 index 000000000000..2d8da7bd8ab7 --- /dev/null +++ b/tools/testing/selftests/netfilter/rpath.sh @@ -0,0 +1,147 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# return code to signal skipped test +ksft_skip=4 + +# search for legacy iptables (it uses the xtables extensions +if iptables-legacy --version >/dev/null 2>&1; then + iptables='iptables-legacy' +elif iptables --version >/dev/null 2>&1; then + iptables='iptables' +else + iptables='' +fi + +if ip6tables-legacy --version >/dev/null 2>&1; then + ip6tables='ip6tables-legacy' +elif ! ip6tables --version >/dev/null 2>&1; then + ip6tables='ip6tables' +else + ip6tables='' +fi + +if nft --version >/dev/null 2>&1; then + nft='nft' +else + nft='' +fi + +if [ -z "$iptables$ip6tables$nft" ]; then + echo "SKIP: Test needs iptables, ip6tables or nft" + exit $ksft_skip +fi + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +trap "ip netns del $ns1; ip netns del $ns2" EXIT + +# create two netns, disable rp_filter in ns2 and +# keep IPv6 address when moving into VRF +ip netns add "$ns1" +ip netns add "$ns2" +ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0 +ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1 + +# a standard connection between the netns, should not trigger rp filter +ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2" +ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up +ip -net "$ns1" a a 192.168.23.2/24 dev v0 +ip -net "$ns2" a a 192.168.23.1/24 dev v0 +ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad +ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad + +# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0 +ip -net "$ns2" link add d0 type dummy +ip -net "$ns2" link set d0 up +ip -net "$ns1" a a 192.168.42.2/24 dev v0 +ip -net "$ns2" a a 192.168.42.1/24 dev d0 +ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad +ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad + +# firewall matches to test +ip netns exec "$ns2" "$iptables" -t raw -A PREROUTING -s 192.168.0.0/16 -m rpfilter +ip netns exec "$ns2" "$ip6tables" -t raw -A PREROUTING -s fec0::/16 -m rpfilter +ip netns exec "$ns2" nft -f - <<EOF +table inet t { + chain c { + type filter hook prerouting priority raw; + ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter + ip6 saddr fec0::/16 fib saddr . iif oif exists counter + } +} +EOF + +die() { + echo "FAIL: $*" + #ip netns exec "$ns2" "$iptables" -t raw -vS + #ip netns exec "$ns2" "$ip6tables" -t raw -vS + #ip netns exec "$ns2" nft list ruleset + exit 1 +} + +# check rule counters, return true if rule did not match +ipt_zero_rule() { # (command) + [ -n "$1" ] || return 0 + ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0" +} +nft_zero_rule() { # (family) + [ -n "$nft" ] || return 0 + ip netns exec "$ns2" "$nft" list chain inet t c | \ + grep -q "$1 saddr .* counter packets 0 bytes 0" +} + +netns_ping() { # (netns, args...) + local netns="$1" + shift + ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null +} + +testrun() { + # clear counters first + [ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z + [ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z + if [ -n "$nft" ]; then + ( + echo "delete table inet t"; + ip netns exec "$ns2" nft -s list table inet t; + ) | ip netns exec "$ns2" nft -f - + fi + + # test 1: martian traffic should fail rpfilter matches + netns_ping "$ns1" -I v0 192.168.42.1 && \ + die "martian ping 192.168.42.1 succeeded" + netns_ping "$ns1" -I v0 fec0:42::1 && \ + die "martian ping fec0:42::1 succeeded" + + ipt_zero_rule "$iptables" || die "iptables matched martian" + ipt_zero_rule "$ip6tables" || die "ip6tables matched martian" + nft_zero_rule ip || die "nft IPv4 matched martian" + nft_zero_rule ip6 || die "nft IPv6 matched martian" + + # test 2: rpfilter match should pass for regular traffic + netns_ping "$ns1" 192.168.23.1 || \ + die "regular ping 192.168.23.1 failed" + netns_ping "$ns1" fec0:23::1 || \ + die "regular ping fec0:23::1 failed" + + ipt_zero_rule "$iptables" && die "iptables match not effective" + ipt_zero_rule "$ip6tables" && die "ip6tables match not effective" + nft_zero_rule ip && die "nft IPv4 match not effective" + nft_zero_rule ip6 && die "nft IPv6 match not effective" + +} + +testrun + +# repeat test with vrf device in $ns2 +ip -net "$ns2" link add vrf0 type vrf table 10 +ip -net "$ns2" link set vrf0 up +ip -net "$ns2" link set v0 master vrf0 + +testrun + +echo "PASS: netfilter reverse path match works as intended" +exit 0 |