From 53b56da83d7899de375a9de153fd7f5397de85e6 Mon Sep 17 00:00:00 2001 From: Liping Zhang Date: Mon, 17 Apr 2017 21:18:57 +0800 Subject: netfilter: ctnetlink: make it safer when updating ct->status After converting to use rcu for conntrack hash, one CPU may update the ct->status via ctnetlink, while another CPU may process the packets and update the ct->status. So the non-atomic operation "ct->status |= status;" via ctnetlink becomes unsafe, and this may clear the IPS_DYING_BIT bit set by another CPU unexpectedly. For example: CPU0 CPU1 ctnetlink_change_status __nf_conntrack_find_get old = ct->status nf_ct_gc_expired - nf_ct_kill - test_and_set_bit(IPS_DYING_BIT new = old | status; - ct->status = new; <-- oops, _DYING_ is cleared! Now using a series of atomic bit operation to solve the above issue. Also note, user shouldn't set IPS_TEMPLATE, IPS_SEQ_ADJUST directly, so make these two bits be unchangable too. If we set the IPS_TEMPLATE_BIT, ct will be freed by nf_ct_tmpl_free, but actually it is alloced by nf_conntrack_alloc. If we set the IPS_SEQ_ADJUST_BIT, this may cause the NULL pointer deference, as the nfct_seqadj(ct) maybe NULL. Last, add some comments to describe the logic change due to the commit a963d710f367 ("netfilter: ctnetlink: Fix regression in CTA_STATUS processing"), which makes me feel a little confusing. Fixes: 76507f69c44e ("[NETFILTER]: nf_conntrack: use RCU for conntrack hash") Signed-off-by: Liping Zhang Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_common.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h index 6a8e33dd4ecb..38fc383139f0 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_common.h +++ b/include/uapi/linux/netfilter/nf_conntrack_common.h @@ -82,10 +82,6 @@ enum ip_conntrack_status { IPS_DYING_BIT = 9, IPS_DYING = (1 << IPS_DYING_BIT), - /* Bits that cannot be altered from userland. */ - IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | - IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING), - /* Connection has fixed timeout. */ IPS_FIXED_TIMEOUT_BIT = 10, IPS_FIXED_TIMEOUT = (1 << IPS_FIXED_TIMEOUT_BIT), @@ -101,6 +97,15 @@ enum ip_conntrack_status { /* Conntrack got a helper explicitly attached via CT target. */ IPS_HELPER_BIT = 13, IPS_HELPER = (1 << IPS_HELPER_BIT), + + /* Be careful here, modifying these bits can make things messy, + * so don't let users modify them directly. + */ + IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK | + IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING | + IPS_SEQ_ADJUST | IPS_TEMPLATE), + + __IPS_MAX_BIT = 14, }; /* Connection tracking event types */ -- cgit v1.2.3 From 4d463c4dbc5c1c5d73e488d52faeec05570443a0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 May 2017 00:39:17 +0200 Subject: xdp: use common helper for netlink extended ack reporting Small follow-up to d74a32acd59a ("xdp: use netlink extended ACK reporting") in order to let drivers all use the same NL_SET_ERR_MSG_MOD() helper macro for reporting. This also ensures that we consistently add the driver's prefix for dumping the report in user space to indicate that the error message is driver specific and not coming from core code. Furthermore, NL_SET_ERR_MSG_MOD() now reuses NL_SET_ERR_MSG() and thus makes all macros check the pointer as suggested. References: https://www.spinics.net/lists/netdev/msg433267.html Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 4 ++-- drivers/net/virtio_net.c | 8 ++++---- include/linux/netlink.h | 19 ++++++++----------- 3 files changed, 14 insertions(+), 17 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index db20376260f5..82bd6b0935f1 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -2532,11 +2532,11 @@ nfp_net_check_config(struct nfp_net *nn, struct nfp_net_dp *dp, if (!dp->xdp_prog) return 0; if (dp->fl_bufsz > PAGE_SIZE) { - NL_MOD_TRY_SET_ERR_MSG(extack, "MTU too large w/ XDP enabled"); + NL_SET_ERR_MSG_MOD(extack, "MTU too large w/ XDP enabled"); return -EINVAL; } if (dp->num_tx_rings > nn->max_tx_rings) { - NL_MOD_TRY_SET_ERR_MSG(extack, "Insufficient number of TX rings w/ XDP enabled"); + NL_SET_ERR_MSG_MOD(extack, "Insufficient number of TX rings w/ XDP enabled"); return -EINVAL; } diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c index 3d0bc484b3d7..1c6d3923c224 100644 --- a/drivers/net/virtio_net.c +++ b/drivers/net/virtio_net.c @@ -1891,17 +1891,17 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_TSO6) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_ECN) || virtio_has_feature(vi->vdev, VIRTIO_NET_F_GUEST_UFO)) { - NL_SET_ERR_MSG(extack, "can't set XDP while host is implementing LRO, disable LRO first"); + NL_SET_ERR_MSG_MOD(extack, "Can't set XDP while host is implementing LRO, disable LRO first"); return -EOPNOTSUPP; } if (vi->mergeable_rx_bufs && !vi->any_header_sg) { - NL_SET_ERR_MSG(extack, "XDP expects header/data in single page, any_header_sg required"); + NL_SET_ERR_MSG_MOD(extack, "XDP expects header/data in single page, any_header_sg required"); return -EINVAL; } if (dev->mtu > max_sz) { - NL_SET_ERR_MSG(extack, "MTU too large to enable XDP"); + NL_SET_ERR_MSG_MOD(extack, "MTU too large to enable XDP"); netdev_warn(dev, "XDP requires MTU less than %lu\n", max_sz); return -EINVAL; } @@ -1912,7 +1912,7 @@ static int virtnet_xdp_set(struct net_device *dev, struct bpf_prog *prog, /* XDP requires extra queues for XDP_TX */ if (curr_qp + xdp_qp > vi->max_queue_pairs) { - NL_SET_ERR_MSG(extack, "Too few free TX rings available"); + NL_SET_ERR_MSG_MOD(extack, "Too few free TX rings available"); netdev_warn(dev, "request %i queues but max is %i\n", curr_qp + xdp_qp, vi->max_queue_pairs); return -ENOMEM; diff --git a/include/linux/netlink.h b/include/linux/netlink.h index c20395edf2de..5fff5ba5964e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -86,19 +86,16 @@ struct netlink_ext_ack { * Currently string formatting is not supported (due * to the lack of an output buffer.) */ -#define NL_SET_ERR_MSG(extack, msg) do { \ - static const char _msg[] = (msg); \ - \ - (extack)->_msg = _msg; \ +#define NL_SET_ERR_MSG(extack, msg) do { \ + static const char __msg[] = (msg); \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (__extack) \ + __extack->_msg = __msg; \ } while (0) -#define NL_MOD_TRY_SET_ERR_MSG(extack, msg) do { \ - static const char _msg[] = KBUILD_MODNAME ": " msg; \ - struct netlink_ext_ack *_extack = (extack); \ - \ - if (_extack) \ - _extack->_msg = _msg; \ -} while (0) +#define NL_SET_ERR_MSG_MOD(extack, msg) \ + NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); -- cgit v1.2.3 From f870a3c6727db5fcfeaa42d099f75872e4b17553 Mon Sep 17 00:00:00 2001 From: "sudarsana.kalluru@cavium.com" Date: Thu, 4 May 2017 08:15:03 -0700 Subject: qed*: Fix possible overflow for status block id field. Value for status block id could be more than 256 in 100G mode, need to update its data type from u8 to u16. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- drivers/net/ethernet/qlogic/qed/qed_dev.c | 8 ++++---- drivers/net/ethernet/qlogic/qed/qed_dev_api.h | 4 ++-- drivers/net/ethernet/qlogic/qed/qed_main.c | 2 +- drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 5 ++--- include/linux/qed/qed_if.h | 2 +- 5 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c index 5f31140d0b77..5c6874af4b65 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev.c +++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c @@ -3586,7 +3586,7 @@ static int qed_set_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, } int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u16 coalesce, u8 qid, u16 sb_id) + u16 coalesce, u16 qid, u16 sb_id) { struct ustorm_eth_queue_zone eth_qzone; u8 timeset, timer_res; @@ -3607,7 +3607,7 @@ int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, } timeset = (u8)(coalesce >> timer_res); - rc = qed_fw_l2_queue(p_hwfn, (u16)qid, &fw_qid); + rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid); if (rc) return rc; @@ -3628,7 +3628,7 @@ out: } int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u16 coalesce, u8 qid, u16 sb_id) + u16 coalesce, u16 qid, u16 sb_id) { struct xstorm_eth_queue_zone eth_qzone; u8 timeset, timer_res; @@ -3649,7 +3649,7 @@ int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, } timeset = (u8)(coalesce >> timer_res); - rc = qed_fw_l2_queue(p_hwfn, (u16)qid, &fw_qid); + rc = qed_fw_l2_queue(p_hwfn, qid, &fw_qid); if (rc) return rc; diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h index cefe3ee9064a..12d16c096e36 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_dev_api.h +++ b/drivers/net/ethernet/qlogic/qed/qed_dev_api.h @@ -454,7 +454,7 @@ int qed_final_cleanup(struct qed_hwfn *p_hwfn, * @return int */ int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u16 coalesce, u8 qid, u16 sb_id); + u16 coalesce, u16 qid, u16 sb_id); /** * @brief qed_set_txq_coalesce - Configure coalesce parameters for a Tx queue @@ -471,7 +471,7 @@ int qed_set_rxq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, * @return int */ int qed_set_txq_coalesce(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt, - u16 coalesce, u8 qid, u16 sb_id); + u16 coalesce, u16 qid, u16 sb_id); const char *qed_hw_get_resc_name(enum qed_resources res_id); #endif diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c index 59992cf20d42..a5eef1abc5a1 100644 --- a/drivers/net/ethernet/qlogic/qed/qed_main.c +++ b/drivers/net/ethernet/qlogic/qed/qed_main.c @@ -1521,7 +1521,7 @@ static void qed_get_coalesce(struct qed_dev *cdev, u16 *rx_coal, u16 *tx_coal) } static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, - u8 qid, u16 sb_id) + u16 qid, u16 sb_id) { struct qed_hwfn *hwfn; struct qed_ptt *ptt; diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c index 4dcfe9614731..b22753c5fd44 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c +++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c @@ -706,8 +706,7 @@ static int qede_set_coalesce(struct net_device *dev, { struct qede_dev *edev = netdev_priv(dev); int i, rc = 0; - u16 rxc, txc; - u8 sb_id; + u16 rxc, txc, sb_id; if (!netif_running(dev)) { DP_INFO(edev, "Interface is down\n"); @@ -729,7 +728,7 @@ static int qede_set_coalesce(struct net_device *dev, for_each_queue(i) { sb_id = edev->fp_array[i].sb_info->igu_sb_id; rc = edev->ops->common->set_coalesce(edev->cdev, rxc, txc, - (u8)i, sb_id); + (u16)i, sb_id); if (rc) { DP_INFO(edev, "Set coalesce error, rc = %d\n", rc); return rc; diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 5544d7b2f2bb..c70ac13a97e6 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -635,7 +635,7 @@ struct qed_common_ops { * @return 0 on success, error otherwise. */ int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, - u8 qid, u16 sb_id); + u16 qid, u16 sb_id); /** * @brief set_led - Configure LED mode -- cgit v1.2.3 From 2f460933f58eee3393aba64f0f6d14acb08d1724 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Wed, 3 May 2017 22:07:31 -0700 Subject: ipv6: initialize route null entry in addrconf_init() Andrey reported a crash on init_net.ipv6.ip6_null_entry->rt6i_idev since it is always NULL. This is clearly wrong, we have code to initialize it to loopback_dev, unfortunately the order is still not correct. loopback_dev is registered very early during boot, we lose a chance to re-initialize it in notifier. addrconf_init() is called after ip6_route_init(), which means we have no chance to correct it. Fix it by moving this initialization explicitly after ipv6_add_dev(init_net.loopback_dev) in addrconf_init(). Reported-by: Andrey Konovalov Signed-off-by: Cong Wang Tested-by: Andrey Konovalov Signed-off-by: David S. Miller --- include/net/ip6_route.h | 1 + net/ipv6/addrconf.c | 2 ++ net/ipv6/route.c | 26 +++++++++++++++----------- 3 files changed, 18 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 9dc2c182a263..f5e625f53367 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -84,6 +84,7 @@ struct dst_entry *ip6_route_lookup(struct net *net, struct flowi6 *fl6, struct rt6_info *ip6_pol_route(struct net *net, struct fib6_table *table, int ifindex, struct flowi6 *fl6, int flags); +void ip6_route_init_special_entries(void); int ip6_route_init(void); void ip6_route_cleanup(void); diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index a2a370b71249..77a4bd526d6e 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6573,6 +6573,8 @@ int __init addrconf_init(void) goto errlo; } + ip6_route_init_special_entries(); + for (i = 0; i < IN6_ADDR_HSIZE; i++) INIT_HLIST_HEAD(&inet6_addr_lst[i]); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index a1bf426c959b..2f1136627dcb 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -4027,6 +4027,21 @@ static struct notifier_block ip6_route_dev_notifier = { .priority = 0, }; +void __init ip6_route_init_special_entries(void) +{ + /* Registering of the loopback is done before this portion of code, + * the loopback reference in rt6_info will not be taken, do it + * manually for init_net */ + init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #ifdef CONFIG_IPV6_MULTIPLE_TABLES + init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; + init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); + #endif +} + int __init ip6_route_init(void) { int ret; @@ -4053,17 +4068,6 @@ int __init ip6_route_init(void) ip6_dst_blackhole_ops.kmem_cachep = ip6_dst_ops_template.kmem_cachep; - /* Registering of the loopback is done before this portion of code, - * the loopback reference in rt6_info will not be taken, do it - * manually for init_net */ - init_net.ipv6.ip6_null_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_null_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - #ifdef CONFIG_IPV6_MULTIPLE_TABLES - init_net.ipv6.ip6_prohibit_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_prohibit_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - init_net.ipv6.ip6_blk_hole_entry->dst.dev = init_net.loopback_dev; - init_net.ipv6.ip6_blk_hole_entry->rt6i_idev = in6_dev_get(init_net.loopback_dev); - #endif ret = fib6_init(); if (ret) goto out_register_subsys; -- cgit v1.2.3 From 842be75c77cb72ee546a2b19da9c285fb3ded660 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 4 May 2017 08:42:30 +0200 Subject: cfg80211: make RATE_INFO_BW_20 the default Due to the way I did the RX bitrate conversions in mac80211 with spatch, going setting flags to setting the value, many drivers now don't set the bandwidth value for 20 MHz, since with the flags it wasn't necessary to (there was no 20 MHz flag, only the others.) Rather than go through and try to fix up all the drivers, instead renumber the enum so that 20 MHz, which is the typical bandwidth, actually has the value 0, making those drivers all work again. If VHT was hit used with a driver not reporting it, e.g. iwlmvm, this manifested in hitting the bandwidth warning in cfg80211_calculate_bitrate_vht(). Reported-by: Linus Torvalds Tested-by: Jens Axboe Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/cfg80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h index 6e90f1a4950f..15d6599b8bc6 100644 --- a/include/net/cfg80211.h +++ b/include/net/cfg80211.h @@ -1013,9 +1013,9 @@ enum rate_info_flags { * @RATE_INFO_BW_160: 160 MHz bandwidth */ enum rate_info_bw { + RATE_INFO_BW_20 = 0, RATE_INFO_BW_5, RATE_INFO_BW_10, - RATE_INFO_BW_20, RATE_INFO_BW_40, RATE_INFO_BW_80, RATE_INFO_BW_160, -- cgit v1.2.3