From 7c3125f0a6ebc17846c5908ad7d6056d66c1c426 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Fri, 20 Dec 2019 11:24:21 -0800 Subject: net: dsa: bcm_sf2: Fix IP fragment location and behavior The IP fragment is specified through user-defined field as the first bit of the first user-defined word. We were previously trying to extract it from the user-defined mask which could not possibly work. The ip_frag is also supposed to be a boolean, if we do not cast it as such, we risk overwriting the next fields in CFP_DATA(6) which would render the rule inoperative. Fixes: 7318166cacad ("net: dsa: bcm_sf2: Add support for ethtool::rxnfc") Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- drivers/net/dsa/bcm_sf2_cfp.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/dsa/bcm_sf2_cfp.c b/drivers/net/dsa/bcm_sf2_cfp.c index f3f0c3f07391..1962c8330daa 100644 --- a/drivers/net/dsa/bcm_sf2_cfp.c +++ b/drivers/net/dsa/bcm_sf2_cfp.c @@ -358,7 +358,7 @@ static int bcm_sf2_cfp_ipv4_rule_set(struct bcm_sf2_priv *priv, int port, return -EINVAL; } - ip_frag = be32_to_cpu(fs->m_ext.data[0]); + ip_frag = !!(be32_to_cpu(fs->h_ext.data[0]) & 1); /* Locate the first rule available */ if (fs->location == RX_CLS_LOC_ANY) @@ -569,7 +569,7 @@ static int bcm_sf2_cfp_rule_cmp(struct bcm_sf2_priv *priv, int port, if (rule->fs.flow_type != fs->flow_type || rule->fs.ring_cookie != fs->ring_cookie || - rule->fs.m_ext.data[0] != fs->m_ext.data[0]) + rule->fs.h_ext.data[0] != fs->h_ext.data[0]) continue; switch (fs->flow_type & ~FLOW_EXT) { @@ -621,7 +621,7 @@ static int bcm_sf2_cfp_ipv6_rule_set(struct bcm_sf2_priv *priv, int port, return -EINVAL; } - ip_frag = be32_to_cpu(fs->m_ext.data[0]); + ip_frag = !!(be32_to_cpu(fs->h_ext.data[0]) & 1); layout = &udf_tcpip6_layout; slice_num = bcm_sf2_get_slice_number(layout, 0); -- cgit v1.2.3 From bd085ef678b2cc8c38c105673dfe8ff8f5ec0c57 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:09 +0800 Subject: net: add bool confirm_neigh parameter for dst_ops.update_pmtu The MTU update code is supposed to be invoked in response to real networking events that update the PMTU. In IPv6 PMTU update function __ip6_rt_update_pmtu() we called dst_confirm_neigh() to update neighbor confirmed time. But for tunnel code, it will call pmtu before xmit, like: - tnl_update_pmtu() - skb_dst_update_pmtu() - ip6_rt_update_pmtu() - __ip6_rt_update_pmtu() - dst_confirm_neigh() If the tunnel remote dst mac address changed and we still do the neigh confirm, we will not be able to update neigh cache and ping6 remote will failed. So for this ip_tunnel_xmit() case, _EVEN_ if the MTU is changed, we should not be invoking dst_confirm_neigh() as we have no evidence of successful two-way communication at this point. On the other hand it is also important to keep the neigh reachability fresh for TCP flows, so we cannot remove this dst_confirm_neigh() call. To fix the issue, we have to add a new bool parameter for dst_ops.update_pmtu to choose whether we should do neigh update or not. I will add the parameter in this patch and set all the callers to true to comply with the previous way, and fix the tunnel code one by one on later patches. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Suggested-by: David Miller Reviewed-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/gtp.c | 2 +- include/net/dst.h | 2 +- include/net/dst_ops.h | 3 ++- net/bridge/br_nf_core.c | 3 ++- net/decnet/dn_route.c | 6 ++++-- net/ipv4/inet_connection_sock.c | 2 +- net/ipv4/route.c | 9 ++++++--- net/ipv4/xfrm4_policy.c | 5 +++-- net/ipv6/inet6_connection_sock.c | 2 +- net/ipv6/ip6_gre.c | 2 +- net/ipv6/route.c | 22 +++++++++++++++------- net/ipv6/xfrm6_policy.c | 5 +++-- net/netfilter/ipvs/ip_vs_xmit.c | 2 +- net/sctp/transport.c | 2 +- 14 files changed, 42 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index e5b7d6d2286e..913062017be9 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -540,7 +540,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, mtu = dst_mtu(&rt->dst); } - rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu); + rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true); if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && mtu < ntohs(iph->tot_len)) { diff --git a/include/net/dst.h b/include/net/dst.h index 8224dad2ae94..593630e0e076 100644 --- a/include/net/dst.h +++ b/include/net/dst.h @@ -516,7 +516,7 @@ static inline void skb_dst_update_pmtu(struct sk_buff *skb, u32 mtu) struct dst_entry *dst = skb_dst(skb); if (dst && dst->ops->update_pmtu) - dst->ops->update_pmtu(dst, NULL, skb, mtu); + dst->ops->update_pmtu(dst, NULL, skb, mtu, true); } static inline void skb_tunnel_check_pmtu(struct sk_buff *skb, diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h index 5ec645f27ee3..443863c7b8da 100644 --- a/include/net/dst_ops.h +++ b/include/net/dst_ops.h @@ -27,7 +27,8 @@ struct dst_ops { struct dst_entry * (*negative_advice)(struct dst_entry *); void (*link_failure)(struct sk_buff *); void (*update_pmtu)(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); void (*redirect)(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); int (*local_out)(struct net *net, struct sock *sk, struct sk_buff *skb); diff --git a/net/bridge/br_nf_core.c b/net/bridge/br_nf_core.c index 2cdfc5d6c25d..8c69f0c95a8e 100644 --- a/net/bridge/br_nf_core.c +++ b/net/bridge/br_nf_core.c @@ -22,7 +22,8 @@ #endif static void fake_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } diff --git a/net/decnet/dn_route.c b/net/decnet/dn_route.c index aea918135ec3..08c3dc45f1a4 100644 --- a/net/decnet/dn_route.c +++ b/net/decnet/dn_route.c @@ -110,7 +110,8 @@ static void dn_dst_ifdown(struct dst_entry *, struct net_device *dev, int how); static struct dst_entry *dn_dst_negative_advice(struct dst_entry *); static void dn_dst_link_failure(struct sk_buff *); static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb , u32 mtu); + struct sk_buff *skb , u32 mtu, + bool confirm_neigh); static void dn_dst_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static struct neighbour *dn_dst_neigh_lookup(const struct dst_entry *dst, @@ -251,7 +252,8 @@ static int dn_dst_gc(struct dst_ops *ops) * advertise to the other end). */ static void dn_dst_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct dn_route *rt = (struct dn_route *) dst; struct neighbour *n = rt->n; diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c index e4c6e8b40490..18c0d5bffe12 100644 --- a/net/ipv4/inet_connection_sock.c +++ b/net/ipv4/inet_connection_sock.c @@ -1086,7 +1086,7 @@ struct dst_entry *inet_csk_update_pmtu(struct sock *sk, u32 mtu) if (!dst) goto out; } - dst->ops->update_pmtu(dst, sk, NULL, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = __sk_dst_check(sk, 0); if (!dst) diff --git a/net/ipv4/route.c b/net/ipv4/route.c index f88c93c38f11..87e979f2b74a 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -139,7 +139,8 @@ static unsigned int ipv4_mtu(const struct dst_entry *dst); static struct dst_entry *ipv4_negative_advice(struct dst_entry *dst); static void ipv4_link_failure(struct sk_buff *skb); static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); static void ip_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static void ipv4_dst_destroy(struct dst_entry *dst); @@ -1043,7 +1044,8 @@ static void __ip_rt_update_pmtu(struct rtable *rt, struct flowi4 *fl4, u32 mtu) } static void ip_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct rtable *rt = (struct rtable *) dst; struct flowi4 fl4; @@ -2687,7 +2689,8 @@ static unsigned int ipv4_blackhole_mtu(const struct dst_entry *dst) } static void ipv4_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } diff --git a/net/ipv4/xfrm4_policy.c b/net/ipv4/xfrm4_policy.c index 35b84b52b702..9ebd54752e03 100644 --- a/net/ipv4/xfrm4_policy.c +++ b/net/ipv4/xfrm4_policy.c @@ -100,12 +100,13 @@ static int xfrm4_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, } static void xfrm4_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, sk, skb, mtu); + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm4_redirect(struct dst_entry *dst, struct sock *sk, diff --git a/net/ipv6/inet6_connection_sock.c b/net/ipv6/inet6_connection_sock.c index fe9cb8d1adca..e315526fa244 100644 --- a/net/ipv6/inet6_connection_sock.c +++ b/net/ipv6/inet6_connection_sock.c @@ -146,7 +146,7 @@ struct dst_entry *inet6_csk_update_pmtu(struct sock *sk, u32 mtu) if (IS_ERR(dst)) return NULL; - dst->ops->update_pmtu(dst, sk, NULL, mtu); + dst->ops->update_pmtu(dst, sk, NULL, mtu, true); dst = inet6_csk_route_socket(sk, &fl6); return IS_ERR(dst) ? NULL : dst; diff --git a/net/ipv6/ip6_gre.c b/net/ipv6/ip6_gre.c index 9d0965252ddf..3ba69174ad6c 100644 --- a/net/ipv6/ip6_gre.c +++ b/net/ipv6/ip6_gre.c @@ -1040,7 +1040,7 @@ static netdev_tx_t ip6erspan_tunnel_xmit(struct sk_buff *skb, /* TooBig packet may have updated dst->dev's mtu */ if (!t->parms.collect_md && dst && dst_mtu(dst) > dst->dev->mtu) - dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu); + dst->ops->update_pmtu(dst, NULL, skb, dst->dev->mtu, true); err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu, NEXTHDR_GRE); diff --git a/net/ipv6/route.c b/net/ipv6/route.c index b59940416cb5..affb51c11a25 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -95,7 +95,8 @@ static int ip6_pkt_prohibit(struct sk_buff *skb); static int ip6_pkt_prohibit_out(struct net *net, struct sock *sk, struct sk_buff *skb); static void ip6_link_failure(struct sk_buff *skb); static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu); + struct sk_buff *skb, u32 mtu, + bool confirm_neigh); static void rt6_do_redirect(struct dst_entry *dst, struct sock *sk, struct sk_buff *skb); static int rt6_score_route(const struct fib6_nh *nh, u32 fib6_flags, int oif, @@ -264,7 +265,8 @@ static unsigned int ip6_blackhole_mtu(const struct dst_entry *dst) } static void ip6_rt_blackhole_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { } @@ -2692,7 +2694,8 @@ static bool rt6_cache_allowed_for_pmtu(const struct rt6_info *rt) } static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, - const struct ipv6hdr *iph, u32 mtu) + const struct ipv6hdr *iph, u32 mtu, + bool confirm_neigh) { const struct in6_addr *daddr, *saddr; struct rt6_info *rt6 = (struct rt6_info *)dst; @@ -2710,7 +2713,10 @@ static void __ip6_rt_update_pmtu(struct dst_entry *dst, const struct sock *sk, daddr = NULL; saddr = NULL; } - dst_confirm_neigh(dst, daddr); + + if (confirm_neigh) + dst_confirm_neigh(dst, daddr); + mtu = max_t(u32, mtu, IPV6_MIN_MTU); if (mtu >= dst_mtu(dst)) return; @@ -2764,9 +2770,11 @@ out_unlock: } static void ip6_rt_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { - __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu); + __ip6_rt_update_pmtu(dst, sk, skb ? ipv6_hdr(skb) : NULL, mtu, + confirm_neigh); } void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, @@ -2785,7 +2793,7 @@ void ip6_update_pmtu(struct sk_buff *skb, struct net *net, __be32 mtu, dst = ip6_route_output(net, NULL, &fl6); if (!dst->error) - __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu)); + __ip6_rt_update_pmtu(dst, NULL, iph, ntohl(mtu), true); dst_release(dst); } EXPORT_SYMBOL_GPL(ip6_update_pmtu); diff --git a/net/ipv6/xfrm6_policy.c b/net/ipv6/xfrm6_policy.c index 699e0730ce8e..af7a4b8b1e9c 100644 --- a/net/ipv6/xfrm6_policy.c +++ b/net/ipv6/xfrm6_policy.c @@ -98,12 +98,13 @@ static int xfrm6_fill_dst(struct xfrm_dst *xdst, struct net_device *dev, } static void xfrm6_update_pmtu(struct dst_entry *dst, struct sock *sk, - struct sk_buff *skb, u32 mtu) + struct sk_buff *skb, u32 mtu, + bool confirm_neigh) { struct xfrm_dst *xdst = (struct xfrm_dst *)dst; struct dst_entry *path = xdst->route; - path->ops->update_pmtu(path, sk, skb, mtu); + path->ops->update_pmtu(path, sk, skb, mtu, confirm_neigh); } static void xfrm6_redirect(struct dst_entry *dst, struct sock *sk, diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c index b1e300f8881b..b00866d777fe 100644 --- a/net/netfilter/ipvs/ip_vs_xmit.c +++ b/net/netfilter/ipvs/ip_vs_xmit.c @@ -208,7 +208,7 @@ static inline void maybe_update_pmtu(int skb_af, struct sk_buff *skb, int mtu) struct rtable *ort = skb_rtable(skb); if (!skb->dev && sk && sk_fullsock(sk)) - ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu); + ort->dst.ops->update_pmtu(&ort->dst, sk, NULL, mtu, true); } static inline bool ensure_mtu_is_adequate(struct netns_ipvs *ipvs, int skb_af, diff --git a/net/sctp/transport.c b/net/sctp/transport.c index 7235a6032671..3bbe1a58ec87 100644 --- a/net/sctp/transport.c +++ b/net/sctp/transport.c @@ -263,7 +263,7 @@ bool sctp_transport_update_pmtu(struct sctp_transport *t, u32 pmtu) pf->af->from_sk(&addr, sk); pf->to_sk_daddr(&t->ipaddr, sk); - dst->ops->update_pmtu(dst, sk, NULL, pmtu); + dst->ops->update_pmtu(dst, sk, NULL, pmtu, true); pf->to_sk_daddr(&addr, sk); dst = sctp_transport_dst_check(t); -- cgit v1.2.3 From 6e9105c73f8d2163d12d5dfd762fd75483ed30f5 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Sun, 22 Dec 2019 10:51:11 +0800 Subject: gtp: do not confirm neighbor when do pmtu update When do IPv6 tunnel PMTU update and calls __ip6_rt_update_pmtu() in the end, we should not call dst_confirm_neigh() as there is no two-way communication. Although GTP only support ipv4 right now, and __ip_rt_update_pmtu() does not call dst_confirm_neigh(), we still set it to false to keep consistency with IPv6 code. v5: No change. v4: No change. v3: Do not remove dst_confirm_neigh, but add a new bool parameter in dst_ops.update_pmtu to control whether we should do neighbor confirm. Also split the big patch to small ones for each area. v2: Remove dst_confirm_neigh in __ip6_rt_update_pmtu. Reviewed-by: Guillaume Nault Acked-by: David Ahern Signed-off-by: Hangbin Liu Signed-off-by: David S. Miller --- drivers/net/gtp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/gtp.c b/drivers/net/gtp.c index 913062017be9..fca471e27f39 100644 --- a/drivers/net/gtp.c +++ b/drivers/net/gtp.c @@ -540,7 +540,7 @@ static int gtp_build_skb_ip4(struct sk_buff *skb, struct net_device *dev, mtu = dst_mtu(&rt->dst); } - rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, true); + rt->dst.ops->update_pmtu(&rt->dst, NULL, skb, mtu, false); if (!skb_is_gso(skb) && (iph->frag_off & htons(IP_DF)) && mtu < ntohs(iph->tot_len)) { -- cgit v1.2.3 From 8b5026bc16938920e4780b9094c3bf20e1e0939d Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 23 Dec 2019 15:03:21 +0100 Subject: s390/qeth: fix qdio teardown after early init error qeth_l?_set_online() goes through a number of initialization steps, and on any error uses qeth_l?_stop_card() to tear down the residual state. The first initialization step is qeth_core_hardsetup_card(). When this fails after having established a QDIO context on the device (ie. somewhere after qeth_mpc_initialize()), qeth_l?_stop_card() doesn't shut down this QDIO context again (since the card state hasn't progressed from DOWN at this stage). Even worse, we then call qdio_free() as final teardown step to free the QDIO data structures - while some of them are still hooked into wider QDIO infrastructure such as the IRQ list. This is inevitably followed by use-after-frees and other nastyness. Fix this by unconditionally calling qeth_qdio_clear_card() to shut down the QDIO context, and also to halt/clear any pending activity on the various IO channels. Remove the naive attempt at handling the teardown in qeth_mpc_initialize(), it clearly doesn't suffice and we're handling it properly now in the wider teardown code. Fixes: 4a71df50047f ("qeth: new qeth device driver") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 20 ++++++++------------ drivers/s390/net/qeth_l2_main.c | 2 +- drivers/s390/net/qeth_l3_main.c | 2 +- 3 files changed, 10 insertions(+), 14 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index bc4158888af9..324cf22f9111 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -2482,50 +2482,46 @@ static int qeth_mpc_initialize(struct qeth_card *card) rc = qeth_cm_enable(card); if (rc) { QETH_CARD_TEXT_(card, 2, "2err%d", rc); - goto out_qdio; + return rc; } rc = qeth_cm_setup(card); if (rc) { QETH_CARD_TEXT_(card, 2, "3err%d", rc); - goto out_qdio; + return rc; } rc = qeth_ulp_enable(card); if (rc) { QETH_CARD_TEXT_(card, 2, "4err%d", rc); - goto out_qdio; + return rc; } rc = qeth_ulp_setup(card); if (rc) { QETH_CARD_TEXT_(card, 2, "5err%d", rc); - goto out_qdio; + return rc; } rc = qeth_alloc_qdio_queues(card); if (rc) { QETH_CARD_TEXT_(card, 2, "5err%d", rc); - goto out_qdio; + return rc; } rc = qeth_qdio_establish(card); if (rc) { QETH_CARD_TEXT_(card, 2, "6err%d", rc); qeth_free_qdio_queues(card); - goto out_qdio; + return rc; } rc = qeth_qdio_activate(card); if (rc) { QETH_CARD_TEXT_(card, 2, "7err%d", rc); - goto out_qdio; + return rc; } rc = qeth_dm_act(card); if (rc) { QETH_CARD_TEXT_(card, 2, "8err%d", rc); - goto out_qdio; + return rc; } return 0; -out_qdio: - qeth_qdio_clear_card(card, !IS_IQD(card)); - qdio_free(CARD_DDEV(card)); - return rc; } void qeth_print_status_message(struct qeth_card *card) diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 8c95e6019bac..15e2fd65d434 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -287,12 +287,12 @@ static void qeth_l2_stop_card(struct qeth_card *card) card->state = CARD_STATE_HARDSETUP; } if (card->state == CARD_STATE_HARDSETUP) { - qeth_qdio_clear_card(card, 0); qeth_drain_output_queues(card); qeth_clear_working_pool_list(card); card->state = CARD_STATE_DOWN; } + qeth_qdio_clear_card(card, 0); flush_workqueue(card->event_wq); card->info.mac_bits &= ~QETH_LAYER2_MAC_REGISTERED; card->info.promisc_mode = 0; diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c index 04e301de376f..5508ab89b518 100644 --- a/drivers/s390/net/qeth_l3_main.c +++ b/drivers/s390/net/qeth_l3_main.c @@ -1307,12 +1307,12 @@ static void qeth_l3_stop_card(struct qeth_card *card) card->state = CARD_STATE_HARDSETUP; } if (card->state == CARD_STATE_HARDSETUP) { - qeth_qdio_clear_card(card, 0); qeth_drain_output_queues(card); qeth_clear_working_pool_list(card); card->state = CARD_STATE_DOWN; } + qeth_qdio_clear_card(card, 0); flush_workqueue(card->event_wq); card->info.promisc_mode = 0; } -- cgit v1.2.3 From 5b6c7b55cfe26224b0f41b1c226d3534c542787f Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 23 Dec 2019 15:03:22 +0100 Subject: s390/qeth: lock the card while changing its hsuid qeth_l3_dev_hsuid_store() initially checks the card state, but doesn't take the conf_mutex to ensure that the card stays in this state while being reconfigured. Rework the code to take this lock, and drop a redundant state check in a helper function. Fixes: b333293058aa ("qeth: add support for af_iucv HiperSockets transport") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 5 ----- drivers/s390/net/qeth_l3_sys.c | 40 +++++++++++++++++++++++++++------------ 2 files changed, 28 insertions(+), 17 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index 324cf22f9111..c64ef55f0dff 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -3425,11 +3425,6 @@ int qeth_configure_cq(struct qeth_card *card, enum qeth_cq cq) goto out; } - if (card->state != CARD_STATE_DOWN) { - rc = -1; - goto out; - } - qeth_free_qdio_queues(card); card->options.cq = cq; rc = 0; diff --git a/drivers/s390/net/qeth_l3_sys.c b/drivers/s390/net/qeth_l3_sys.c index f9067ed6c7d3..e8c848f72c6d 100644 --- a/drivers/s390/net/qeth_l3_sys.c +++ b/drivers/s390/net/qeth_l3_sys.c @@ -242,21 +242,33 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, struct device_attribute *attr, const char *buf, size_t count) { struct qeth_card *card = dev_get_drvdata(dev); + int rc = 0; char *tmp; - int rc; if (!IS_IQD(card)) return -EPERM; - if (card->state != CARD_STATE_DOWN) - return -EPERM; - if (card->options.sniffer) - return -EPERM; - if (card->options.cq == QETH_CQ_NOTAVAILABLE) - return -EPERM; + + mutex_lock(&card->conf_mutex); + if (card->state != CARD_STATE_DOWN) { + rc = -EPERM; + goto out; + } + + if (card->options.sniffer) { + rc = -EPERM; + goto out; + } + + if (card->options.cq == QETH_CQ_NOTAVAILABLE) { + rc = -EPERM; + goto out; + } tmp = strsep((char **)&buf, "\n"); - if (strlen(tmp) > 8) - return -EINVAL; + if (strlen(tmp) > 8) { + rc = -EINVAL; + goto out; + } if (card->options.hsuid[0]) /* delete old ip address */ @@ -267,11 +279,13 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, card->options.hsuid[0] = '\0'; memcpy(card->dev->perm_addr, card->options.hsuid, 9); qeth_configure_cq(card, QETH_CQ_DISABLED); - return count; + goto out; } - if (qeth_configure_cq(card, QETH_CQ_ENABLED)) - return -EPERM; + if (qeth_configure_cq(card, QETH_CQ_ENABLED)) { + rc = -EPERM; + goto out; + } snprintf(card->options.hsuid, sizeof(card->options.hsuid), "%-8s", tmp); @@ -280,6 +294,8 @@ static ssize_t qeth_l3_dev_hsuid_store(struct device *dev, rc = qeth_l3_modify_hsuid(card, true); +out: + mutex_unlock(&card->conf_mutex); return rc ? rc : count; } -- cgit v1.2.3 From 68c57bfd52836e31bff33e5e1fc64029749d2c35 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Mon, 23 Dec 2019 15:03:23 +0100 Subject: s390/qeth: fix false reporting of VNIC CHAR config failure Symptom: Error message "Configuring the VNIC characteristics failed" in dmesg whenever an OSA interface on z15 is set online. The VNIC characteristics get re-programmed when setting a L2 device online. This follows the selected 'wanted' characteristics - with the exception that the INVISIBLE characteristic unconditionally gets switched off. For devices that don't support INVISIBLE (ie. OSA), the resulting IO failure raises a noisy error message ("Configuring the VNIC characteristics failed"). For IQD, INVISIBLE is off by default anyways. So don't unnecessarily special-case the INVISIBLE characteristic, and thereby suppress the misleading error message on OSA devices. Fixes: caa1f0b10d18 ("s390/qeth: add VNICC enable/disable support") Signed-off-by: Alexandra Winter Reviewed-by: Julian Wiedmann Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 15e2fd65d434..fc5d8ed3a737 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2041,7 +2041,6 @@ static void qeth_l2_vnicc_init(struct qeth_card *card) error |= qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING, timeout); chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT; - chars_tmp |= QETH_VNICC_BRIDGE_INVISIBLE; chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE; for_each_set_bit(i, &chars_tmp, chars_len) { vnicc = BIT(i); -- cgit v1.2.3 From e8a66d800471e2df7f0b484e2e46898b21d1fa82 Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Mon, 23 Dec 2019 15:03:24 +0100 Subject: s390/qeth: Fix vnicc_is_in_use if rx_bcast not set Symptom: After vnicc/rx_bcast has been manually set to 0, bridge_* sysfs parameters can still be set or written. Only occurs on HiperSockets, as OSA doesn't support changing rx_bcast. Vnic characteristics and bridgeport settings are mutually exclusive. rx_bcast defaults to 1, so manually setting it to 0 should disable bridge_* parameters. Instead it makes sense here to check the supported mask. If the card does not support vnicc at all, bridge commands are always allowed. Fixes: caa1f0b10d18 ("s390/qeth: add VNICC enable/disable support") Signed-off-by: Alexandra Winter Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index fc5d8ed3a737..8024a2112a87 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -1952,8 +1952,7 @@ int qeth_l2_vnicc_get_timeout(struct qeth_card *card, u32 *timeout) /* check if VNICC is currently enabled */ bool qeth_l2_vnicc_is_in_use(struct qeth_card *card) { - /* if everything is turned off, VNICC is not active */ - if (!card->options.vnicc.cur_chars) + if (!card->options.vnicc.sup_chars) return false; /* default values are only OK if rx_bcast was not enabled by user * or the card is offline. -- cgit v1.2.3 From d1b9ae1864fc3c000e0eb4af8482d78c63e0915a Mon Sep 17 00:00:00 2001 From: Alexandra Winter Date: Mon, 23 Dec 2019 15:03:25 +0100 Subject: s390/qeth: vnicc Fix init to default During vnicc_init wanted_char should be compared to cur_char and not to QETH_VNICC_DEFAULT. Without this patch there is no way to enforce the default values as desired values. Note, that it is expected, that a card comes online with default values. This patch was tested with private card firmware. Fixes: caa1f0b10d18 ("s390/qeth: add VNICC enable/disable support") Signed-off-by: Alexandra Winter Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_l2_main.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c index 8024a2112a87..47d37e75dda6 100644 --- a/drivers/s390/net/qeth_l2_main.c +++ b/drivers/s390/net/qeth_l2_main.c @@ -2039,7 +2039,9 @@ static void qeth_l2_vnicc_init(struct qeth_card *card) /* enforce assumed default values and recover settings, if changed */ error |= qeth_l2_vnicc_recover_timeout(card, QETH_VNICC_LEARNING, timeout); - chars_tmp = card->options.vnicc.wanted_chars ^ QETH_VNICC_DEFAULT; + /* Change chars, if necessary */ + chars_tmp = card->options.vnicc.wanted_chars ^ + card->options.vnicc.cur_chars; chars_len = sizeof(card->options.vnicc.wanted_chars) * BITS_PER_BYTE; for_each_set_bit(i, &chars_tmp, chars_len) { vnicc = BIT(i); -- cgit v1.2.3 From 0b698c838e84149b690c7e979f78cccb6f8aa4b9 Mon Sep 17 00:00:00 2001 From: Julian Wiedmann Date: Mon, 23 Dec 2019 15:03:26 +0100 Subject: s390/qeth: fix initialization on old HW I stumbled over an old OSA model that claims to support DIAG_ASSIST, but then rejects the cmd to query its DIAG capabilities. In the old code this was ok, as the returned raw error code was > 0. Now that we translate the raw codes to errnos, the "rc < 0" causes us to fail the initialization of the device. The fix is trivial: don't bail out when the DIAG query fails. Such an error is not critical, we can still use the device (with a slightly reduced set of features). Fixes: 742d4d40831d ("s390/qeth: convert remaining legacy cmd callbacks") Signed-off-by: Julian Wiedmann Signed-off-by: David S. Miller --- drivers/s390/net/qeth_core_main.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c index c64ef55f0dff..29facb913671 100644 --- a/drivers/s390/net/qeth_core_main.c +++ b/drivers/s390/net/qeth_core_main.c @@ -5026,10 +5026,8 @@ retriable: } if (qeth_adp_supported(card, IPA_SETADP_SET_DIAG_ASSIST)) { rc = qeth_query_setdiagass(card); - if (rc < 0) { + if (rc) QETH_CARD_TEXT_(card, 2, "8err%d", rc); - goto out; - } } if (!qeth_is_diagass_supported(card, QETH_DIAGS_CMD_TRAP) || -- cgit v1.2.3 From a5bcd72e054aabb93ddc51ed8cde36a5bfc50271 Mon Sep 17 00:00:00 2001 From: Vladyslav Tarasiuk Date: Thu, 26 Dec 2019 10:41:56 +0200 Subject: net/mlxfw: Fix out-of-memory error in mfa2 flash burning The burning process requires to perform internal allocations of large chunks of memory. This memory doesn't need to be contiguous and can be safely allocated by vzalloc() instead of kzalloc(). This patch changes such allocation to avoid possible out-of-memory failure. Fixes: 410ed13cae39 ("Add the mlxfw module for Mellanox firmware flash process") Signed-off-by: Vladyslav Tarasiuk Reviewed-by: Aya Levin Signed-off-by: Leon Romanovsky Tested-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c index 544344ac4894..79057af4fe99 100644 --- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c +++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_mfa2.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include "mlxfw_mfa2.h" #include "mlxfw_mfa2_file.h" @@ -548,7 +549,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, comp_size = be32_to_cpu(comp->size); comp_buf_size = comp_size + mlxfw_mfa2_comp_magic_len; - comp_data = kmalloc(sizeof(*comp_data) + comp_buf_size, GFP_KERNEL); + comp_data = vzalloc(sizeof(*comp_data) + comp_buf_size); if (!comp_data) return ERR_PTR(-ENOMEM); comp_data->comp.data_size = comp_size; @@ -570,7 +571,7 @@ mlxfw_mfa2_file_component_get(const struct mlxfw_mfa2_file *mfa2_file, comp_data->comp.data = comp_data->buff + mlxfw_mfa2_comp_magic_len; return &comp_data->comp; err_out: - kfree(comp_data); + vfree(comp_data); return ERR_PTR(err); } @@ -579,7 +580,7 @@ void mlxfw_mfa2_file_component_put(struct mlxfw_mfa2_component *comp) const struct mlxfw_mfa2_comp_data *comp_data; comp_data = container_of(comp, struct mlxfw_mfa2_comp_data, comp); - kfree(comp_data); + vfree(comp_data); } void mlxfw_mfa2_file_fini(struct mlxfw_mfa2_file *mfa2_file) -- cgit v1.2.3 From c27569fcd6e1b11bd24361346504f2995a256e4e Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 23 Dec 2019 09:39:22 +0200 Subject: dpaa_eth: fix DMA mapping leak On the error path some fragments remain DMA mapped. Adding a fix that unmaps all the fragments. Rework cleanup path to be simpler. Fixes: 8151ee88bad5 ("dpaa_eth: use page backed rx buffers") Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 39 +++++++++++++------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c index 6a9d12dad5d9..a301f0095223 100644 --- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c +++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c @@ -1719,7 +1719,7 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, int page_offset; unsigned int sz; int *count_ptr; - int i; + int i, j; vaddr = phys_to_virt(addr); WARN_ON(!IS_ALIGNED((unsigned long)vaddr, SMP_CACHE_BYTES)); @@ -1736,14 +1736,14 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, WARN_ON(!IS_ALIGNED((unsigned long)sg_vaddr, SMP_CACHE_BYTES)); + dma_unmap_page(priv->rx_dma_dev, sg_addr, + DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE); + /* We may use multiple Rx pools */ dpaa_bp = dpaa_bpid2pool(sgt[i].bpid); if (!dpaa_bp) goto free_buffers; - count_ptr = this_cpu_ptr(dpaa_bp->percpu_count); - dma_unmap_page(priv->rx_dma_dev, sg_addr, - DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE); if (!skb) { sz = dpaa_bp->size + SKB_DATA_ALIGN(sizeof(struct skb_shared_info)); @@ -1786,7 +1786,9 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, skb_add_rx_frag(skb, i - 1, head_page, frag_off, frag_len, dpaa_bp->size); } + /* Update the pool count for the current {cpu x bpool} */ + count_ptr = this_cpu_ptr(dpaa_bp->percpu_count); (*count_ptr)--; if (qm_sg_entry_is_final(&sgt[i])) @@ -1800,26 +1802,25 @@ static struct sk_buff *sg_fd_to_skb(const struct dpaa_priv *priv, return skb; free_buffers: - /* compensate sw bpool counter changes */ - for (i--; i >= 0; i--) { - dpaa_bp = dpaa_bpid2pool(sgt[i].bpid); - if (dpaa_bp) { - count_ptr = this_cpu_ptr(dpaa_bp->percpu_count); - (*count_ptr)++; - } - } /* free all the SG entries */ - for (i = 0; i < DPAA_SGT_MAX_ENTRIES ; i++) { - sg_addr = qm_sg_addr(&sgt[i]); + for (j = 0; j < DPAA_SGT_MAX_ENTRIES ; j++) { + sg_addr = qm_sg_addr(&sgt[j]); sg_vaddr = phys_to_virt(sg_addr); + /* all pages 0..i were unmaped */ + if (j > i) + dma_unmap_page(priv->rx_dma_dev, qm_sg_addr(&sgt[j]), + DPAA_BP_RAW_SIZE, DMA_FROM_DEVICE); free_pages((unsigned long)sg_vaddr, 0); - dpaa_bp = dpaa_bpid2pool(sgt[i].bpid); - if (dpaa_bp) { - count_ptr = this_cpu_ptr(dpaa_bp->percpu_count); - (*count_ptr)--; + /* counters 0..i-1 were decremented */ + if (j >= i) { + dpaa_bp = dpaa_bpid2pool(sgt[j].bpid); + if (dpaa_bp) { + count_ptr = this_cpu_ptr(dpaa_bp->percpu_count); + (*count_ptr)--; + } } - if (qm_sg_entry_is_final(&sgt[i])) + if (qm_sg_entry_is_final(&sgt[j])) break; } /* free the SGT fragment */ -- cgit v1.2.3 From 1c93fb45761e79b3c00080e71523886cefaf351c Mon Sep 17 00:00:00 2001 From: Madalin Bucur Date: Mon, 23 Dec 2019 10:06:10 +0200 Subject: net: phy: aquantia: add suspend / resume ops for AQR105 The suspend/resume code for AQR107 works on AQR105 too. This patch fixes issues with the partner not seeing the link down when the interface using AQR105 is brought down. Fixes: bee8259dd31f ("net: phy: add driver for aquantia phy") Signed-off-by: Madalin Bucur Signed-off-by: David S. Miller --- drivers/net/phy/aquantia_main.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers') diff --git a/drivers/net/phy/aquantia_main.c b/drivers/net/phy/aquantia_main.c index 3b29d381116f..975789d9349d 100644 --- a/drivers/net/phy/aquantia_main.c +++ b/drivers/net/phy/aquantia_main.c @@ -627,6 +627,8 @@ static struct phy_driver aqr_driver[] = { .config_intr = aqr_config_intr, .ack_interrupt = aqr_ack_interrupt, .read_status = aqr_read_status, + .suspend = aqr107_suspend, + .resume = aqr107_resume, }, { PHY_ID_MATCH_MODEL(PHY_ID_AQR106), -- cgit v1.2.3 From 0444716a5dd563526e53ae686115987d5d4c249e Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 23 Dec 2019 10:23:08 -0800 Subject: bnx2x: Use appropriate define for vlan credit Although it has same value as MAX_MAC_CREDIT_E2, use MAX_VLAN_CREDIT_E2 appropriately. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index 7a6e82db4231..ed237854939a 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -1537,7 +1537,7 @@ void bnx2x_get_rss_ind_table(struct bnx2x_rss_config_obj *rss_obj, func_num + GET_NUM_VFS_PER_PF(bp) * VF_MAC_CREDIT_CNT) #define PF_VLAN_CREDIT_E2(bp, func_num) \ - ((MAX_MAC_CREDIT_E2 - GET_NUM_VFS_PER_PATH(bp) * VF_VLAN_CREDIT_CNT) / \ + ((MAX_VLAN_CREDIT_E2 - GET_NUM_VFS_PER_PATH(bp) * VF_VLAN_CREDIT_CNT) /\ func_num + GET_NUM_VFS_PER_PF(bp) * VF_VLAN_CREDIT_CNT) #endif /* BNX2X_SP_VERBS */ -- cgit v1.2.3 From 5cdc40c7820ff66c2271e0884bd8ee8f7cfd769b Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 23 Dec 2019 10:23:09 -0800 Subject: bnx2x: Fix accounting of vlan resources among the PFs While testing max vlan configuration on the PF, firmware gets assert as driver was configuring number of vlans more than what is supported per port/engine, it was figured out that there is an implicit vlan (hidden default vlan consuming hardware cam entry resource) which is configured default for all the clients (PF/VFs) on client_init ramrod by the adapter implicitly, so when allocating resources among the PFs this implicit vlan should be considered or total vlan entries should be reduced by one to accommodate that default/implicit vlan entry. Signed-off-by: Manish Chopra Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h index ed237854939a..bacc8552bce1 100644 --- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h +++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sp.h @@ -1536,8 +1536,11 @@ void bnx2x_get_rss_ind_table(struct bnx2x_rss_config_obj *rss_obj, ((MAX_MAC_CREDIT_E2 - GET_NUM_VFS_PER_PATH(bp) * VF_MAC_CREDIT_CNT) / \ func_num + GET_NUM_VFS_PER_PF(bp) * VF_MAC_CREDIT_CNT) +#define BNX2X_VFS_VLAN_CREDIT(bp) \ + (GET_NUM_VFS_PER_PATH(bp) * VF_VLAN_CREDIT_CNT) + #define PF_VLAN_CREDIT_E2(bp, func_num) \ - ((MAX_VLAN_CREDIT_E2 - GET_NUM_VFS_PER_PATH(bp) * VF_VLAN_CREDIT_CNT) /\ + ((MAX_VLAN_CREDIT_E2 - 1 - BNX2X_VFS_VLAN_CREDIT(bp)) / \ func_num + GET_NUM_VFS_PER_PF(bp) * VF_VLAN_CREDIT_CNT) #endif /* BNX2X_SP_VERBS */ -- cgit v1.2.3 From bd6f48546b9cb7a785344fc78058c420923d7ed8 Mon Sep 17 00:00:00 2001 From: Martin Blumenstingl Date: Thu, 26 Dec 2019 20:01:01 +0100 Subject: net: stmmac: dwmac-meson8b: Fix the RGMII TX delay on Meson8b/8m2 SoCs GXBB and newer SoCs use the fixed FCLK_DIV2 (1GHz) clock as input for the m250_sel clock. Meson8b and Meson8m2 use MPLL2 instead, whose rate can be adjusted at runtime. So far we have been running MPLL2 with ~250MHz (and the internal m250_div with value 1), which worked enough that we could transfer data with an TX delay of 4ns. Unfortunately there is high packet loss with an RGMII PHY when transferring data (receiving data works fine though). Odroid-C1's u-boot is running with a TX delay of only 2ns as well as the internal m250_div set to 2 - no lost (TX) packets can be observed with that setting in u-boot. Manual testing has shown that the TX packet loss goes away when using the following settings in Linux (the vendor kernel uses the same settings): - MPLL2 clock set to ~500MHz - m250_div set to 2 - TX delay set to 2ns on the MAC side Update the m250_div divider settings to only accept dividers greater or equal 2 to fix the TX delay generated by the MAC. iperf3 results before the change: [ ID] Interval Transfer Bitrate Retr [ 5] 0.00-10.00 sec 182 MBytes 153 Mbits/sec 514 sender [ 5] 0.00-10.00 sec 182 MBytes 152 Mbits/sec receiver iperf3 results after the change (including an updated TX delay of 2ns): [ ID] Interval Transfer Bitrate Retr Cwnd [ 5] 0.00-10.00 sec 927 MBytes 778 Mbits/sec 0 sender [ 5] 0.00-10.01 sec 927 MBytes 777 Mbits/sec receiver Fixes: 4f6a71b84e1afd ("net: stmmac: dwmac-meson8b: fix internal RGMII clock configuration") Signed-off-by: Martin Blumenstingl Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c index bd6c01004913..0e2fa14f1423 100644 --- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c +++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c @@ -112,6 +112,14 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) struct device *dev = dwmac->dev; const char *parent_name, *mux_parent_names[MUX_CLK_NUM_PARENTS]; struct meson8b_dwmac_clk_configs *clk_configs; + static const struct clk_div_table div_table[] = { + { .div = 2, .val = 2, }, + { .div = 3, .val = 3, }, + { .div = 4, .val = 4, }, + { .div = 5, .val = 5, }, + { .div = 6, .val = 6, }, + { .div = 7, .val = 7, }, + }; clk_configs = devm_kzalloc(dev, sizeof(*clk_configs), GFP_KERNEL); if (!clk_configs) @@ -146,9 +154,9 @@ static int meson8b_init_rgmii_tx_clk(struct meson8b_dwmac *dwmac) clk_configs->m250_div.reg = dwmac->regs + PRG_ETH0; clk_configs->m250_div.shift = PRG_ETH0_CLK_M250_DIV_SHIFT; clk_configs->m250_div.width = PRG_ETH0_CLK_M250_DIV_WIDTH; - clk_configs->m250_div.flags = CLK_DIVIDER_ONE_BASED | - CLK_DIVIDER_ALLOW_ZERO | - CLK_DIVIDER_ROUND_CLOSEST; + clk_configs->m250_div.table = div_table; + clk_configs->m250_div.flags = CLK_DIVIDER_ALLOW_ZERO | + CLK_DIVIDER_ROUND_CLOSEST; clk = meson8b_dwmac_register_clk(dwmac, "m250_div", &parent_name, 1, &clk_divider_ops, &clk_configs->m250_div.hw); -- cgit v1.2.3 From 314bd842d98e1035cc40b671a71e07f48420e58f Mon Sep 17 00:00:00 2001 From: Amit Cohen Date: Sun, 29 Dec 2019 13:40:22 +0200 Subject: mlxsw: spectrum_router: Skip loopback RIFs during MAC validation When a router interface (RIF) is created the MAC address of the backing netdev is verified to have the same MSBs as existing RIFs. This is required in order to avoid changing existing RIF MAC addresses that all share the same MSBs. Loopback RIFs are special in this regard as they do not have a MAC address, given they are only used to loop packets from the overlay to the underlay. Without this change, an error is returned when trying to create a RIF after the creation of a GRE tunnel that is represented by a loopback RIF. 'rif->dev->dev_addr' points to the GRE device's local IP, which does not share the same MSBs as physical interfaces. Adding an IP address to any physical interface results in: Error: mlxsw_spectrum: All router interface MAC addresses must have the same prefix. Fix this by skipping loopback RIFs during MAC validation. Fixes: 74bc99397438 ("mlxsw: spectrum_router: Veto unsupported RIF MAC addresses") Signed-off-by: Amit Cohen Signed-off-by: Ido Schimmel Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c index 08b7e9f964da..8290e82240fc 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c @@ -7079,6 +7079,9 @@ static int mlxsw_sp_router_port_check_rif_addr(struct mlxsw_sp *mlxsw_sp, for (i = 0; i < MLXSW_CORE_RES_GET(mlxsw_sp->core, MAX_RIFS); i++) { rif = mlxsw_sp->router->rifs[i]; + if (rif && rif->ops && + rif->ops->type == MLXSW_SP_RIF_TYPE_IPIP_LB) + continue; if (rif && rif->dev && rif->dev != dev && !ether_addr_equal_masked(rif->dev->dev_addr, dev_addr, mlxsw_sp->mac_mask)) { -- cgit v1.2.3 From acca789a358cc960be3937851d7de6591c79d6c2 Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Sun, 29 Dec 2019 13:40:23 +0200 Subject: mlxsw: spectrum: Use dedicated policer for VRRP packets Currently, VRRP packets and packets that hit exceptions during routing (e.g., MTU error) are policed using the same policer towards the CPU. This means, for example, that misconfiguration of the MTU on a routed interface can prevent VRRP packets from reaching the CPU, which in turn can cause the VRRP daemon to assume it is the Master router. Fix this by using a dedicated policer for VRRP packets. Fixes: 11566d34f895 ("mlxsw: spectrum: Add VRRP traps") Signed-off-by: Ido Schimmel Reported-by: Alex Veber Tested-by: Alex Veber Signed-off-by: David S. Miller --- drivers/net/ethernet/mellanox/mlxsw/reg.h | 1 + drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 9 +++++++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h index 5294a1622643..af30e8a76682 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/reg.h +++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h @@ -5472,6 +5472,7 @@ enum mlxsw_reg_htgt_trap_group { MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR, MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0, MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1, + MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP, __MLXSW_REG_HTGT_TRAP_GROUP_MAX, MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1 diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c index 556dca328bb5..f7fd5e8fbf96 100644 --- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c +++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c @@ -4542,8 +4542,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = { MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(IPIP_DECAP_ERROR, TRAP_TO_CPU, ROUTER_EXP, false), MLXSW_SP_RXL_MARK(DECAP_ECN0, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, ROUTER_EXP, false), - MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, ROUTER_EXP, false), + MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false), + MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false), /* PKT Sample trap */ MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU, false, SP_IP2ME, DISCARD), @@ -4626,6 +4626,10 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core) rate = 19 * 1024; burst_size = 12; break; + case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP: + rate = 360; + burst_size = 7; + break; default: continue; } @@ -4665,6 +4669,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core) case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM: case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0: + case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP: priority = 5; tc = 5; break; -- cgit v1.2.3 From 0caeaf6ad532f9be5a768a158627cb31921cc8b7 Mon Sep 17 00:00:00 2001 From: Rahul Lakkireddy Date: Mon, 30 Dec 2019 18:14:08 +0530 Subject: cxgb4/cxgb4vf: fix flow control display for auto negotiation As per 802.3-2005, Section Two, Annex 28B, Table 28B-2 [1], when _only_ Rx pause is enabled, both symmetric and asymmetric pause towards local device must be enabled. Also, firmware returns the local device's flow control pause params as part of advertised capabilities and negotiated params as part of current link attributes. So, fix up ethtool's flow control pause params fetch logic to read from acaps, instead of linkattr. [1] https://standards.ieee.org/standard/802_3-2005.html Fixes: c3168cabe1af ("cxgb4/cxgbvf: Handle 32-bit fw port capabilities") Signed-off-by: Surendra Mobiya Signed-off-by: Rahul Lakkireddy Signed-off-by: David S. Miller --- drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 1 + drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c | 4 ++-- drivers/net/ethernet/chelsio/cxgb4/t4_hw.c | 21 +++++++++++++-------- drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 4 ++-- drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h | 1 + drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c | 18 +++++++++++------- 6 files changed, 30 insertions(+), 19 deletions(-) (limited to 'drivers') diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h index a70ac2097892..becee29f5df7 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h @@ -504,6 +504,7 @@ struct link_config { enum cc_pause requested_fc; /* flow control user has requested */ enum cc_pause fc; /* actual link flow control */ + enum cc_pause advertised_fc; /* actual advertised flow control */ enum cc_fec requested_fec; /* Forward Error Correction: */ enum cc_fec fec; /* requested and actual in use */ diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c index 20ab3b6285a2..c837382ee522 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c +++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_ethtool.c @@ -807,8 +807,8 @@ static void get_pauseparam(struct net_device *dev, struct port_info *p = netdev_priv(dev); epause->autoneg = (p->link_cfg.requested_fc & PAUSE_AUTONEG) != 0; - epause->rx_pause = (p->link_cfg.fc & PAUSE_RX) != 0; - epause->tx_pause = (p->link_cfg.fc & PAUSE_TX) != 0; + epause->rx_pause = (p->link_cfg.advertised_fc & PAUSE_RX) != 0; + epause->tx_pause = (p->link_cfg.advertised_fc & PAUSE_TX) != 0; } static int set_pauseparam(struct net_device *dev, diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c index 19d18acfc9a6..844fdcf55118 100644 --- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c @@ -4089,7 +4089,8 @@ static inline fw_port_cap32_t cc_to_fwcap_pause(enum cc_pause cc_pause) if (cc_pause & PAUSE_TX) fw_pause |= FW_PORT_CAP32_802_3_PAUSE; else - fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR; + fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR | + FW_PORT_CAP32_802_3_PAUSE; } else if (cc_pause & PAUSE_TX) { fw_pause |= FW_PORT_CAP32_802_3_ASM_DIR; } @@ -8563,17 +8564,17 @@ static fw_port_cap32_t lstatus_to_fwcap(u32 lstatus) void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) { const struct fw_port_cmd *cmd = (const void *)rpl; - int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16)); - struct adapter *adapter = pi->adapter; + fw_port_cap32_t pcaps, acaps, lpacaps, linkattr; struct link_config *lc = &pi->link_cfg; - int link_ok, linkdnrc; - enum fw_port_type port_type; + struct adapter *adapter = pi->adapter; + unsigned int speed, fc, fec, adv_fc; enum fw_port_module_type mod_type; - unsigned int speed, fc, fec; - fw_port_cap32_t pcaps, acaps, lpacaps, linkattr; + int action, link_ok, linkdnrc; + enum fw_port_type port_type; /* Extract the various fields from the Port Information message. */ + action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16)); switch (action) { case FW_PORT_ACTION_GET_PORT_INFO: { u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype); @@ -8611,6 +8612,7 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) } fec = fwcap_to_cc_fec(acaps); + adv_fc = fwcap_to_cc_pause(acaps); fc = fwcap_to_cc_pause(linkattr); speed = fwcap_to_speed(linkattr); @@ -8667,7 +8669,9 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) } if (link_ok != lc->link_ok || speed != lc->speed || - fc != lc->fc || fec != lc->fec) { /* something changed */ + fc != lc->fc || adv_fc != lc->advertised_fc || + fec != lc->fec) { + /* something changed */ if (!link_ok && lc->link_ok) { lc->link_down_rc = linkdnrc; dev_warn_ratelimited(adapter->pdev_dev, @@ -8677,6 +8681,7 @@ void t4_handle_get_port_info(struct port_info *pi, const __be64 *rpl) } lc->link_ok = link_ok; lc->speed = speed; + lc->advertised_fc = adv_fc; lc->fc = fc; lc->fec = fec; diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c index f6fc0875d5b0..f4d41f968afa 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c @@ -1690,8 +1690,8 @@ static void cxgb4vf_get_pauseparam(struct net_device *dev, struct port_info *pi = netdev_priv(dev); pauseparam->autoneg = (pi->link_cfg.requested_fc & PAUSE_AUTONEG) != 0; - pauseparam->rx_pause = (pi->link_cfg.fc & PAUSE_RX) != 0; - pauseparam->tx_pause = (pi->link_cfg.fc & PAUSE_TX) != 0; + pauseparam->rx_pause = (pi->link_cfg.advertised_fc & PAUSE_RX) != 0; + pauseparam->tx_pause = (pi->link_cfg.advertised_fc & PAUSE_TX) != 0; } /* diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h index ccca67cf4487..57cfd10a99ec 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_common.h @@ -135,6 +135,7 @@ struct link_config { enum cc_pause requested_fc; /* flow control user has requested */ enum cc_pause fc; /* actual link flow control */ + enum cc_pause advertised_fc; /* actual advertised flow control */ enum cc_fec auto_fec; /* Forward Error Correction: */ enum cc_fec requested_fec; /* "automatic" (IEEE 802.3), */ diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c index 8a389d617a23..9d49ff211cc1 100644 --- a/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c +++ b/drivers/net/ethernet/chelsio/cxgb4vf/t4vf_hw.c @@ -1913,16 +1913,16 @@ static const char *t4vf_link_down_rc_str(unsigned char link_down_rc) static void t4vf_handle_get_port_info(struct port_info *pi, const struct fw_port_cmd *cmd) { - int action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16)); - struct adapter *adapter = pi->adapter; + fw_port_cap32_t pcaps, acaps, lpacaps, linkattr; struct link_config *lc = &pi->link_cfg; - int link_ok, linkdnrc; - enum fw_port_type port_type; + struct adapter *adapter = pi->adapter; + unsigned int speed, fc, fec, adv_fc; enum fw_port_module_type mod_type; - unsigned int speed, fc, fec; - fw_port_cap32_t pcaps, acaps, lpacaps, linkattr; + int action, link_ok, linkdnrc; + enum fw_port_type port_type; /* Extract the various fields from the Port Information message. */ + action = FW_PORT_CMD_ACTION_G(be32_to_cpu(cmd->action_to_len16)); switch (action) { case FW_PORT_ACTION_GET_PORT_INFO: { u32 lstatus = be32_to_cpu(cmd->u.info.lstatus_to_modtype); @@ -1982,6 +1982,7 @@ static void t4vf_handle_get_port_info(struct port_info *pi, } fec = fwcap_to_cc_fec(acaps); + adv_fc = fwcap_to_cc_pause(acaps); fc = fwcap_to_cc_pause(linkattr); speed = fwcap_to_speed(linkattr); @@ -2012,7 +2013,9 @@ static void t4vf_handle_get_port_info(struct port_info *pi, } if (link_ok != lc->link_ok || speed != lc->speed || - fc != lc->fc || fec != lc->fec) { /* something changed */ + fc != lc->fc || adv_fc != lc->advertised_fc || + fec != lc->fec) { + /* something changed */ if (!link_ok && lc->link_ok) { lc->link_down_rc = linkdnrc; dev_warn_ratelimited(adapter->pdev_dev, @@ -2022,6 +2025,7 @@ static void t4vf_handle_get_port_info(struct port_info *pi, } lc->link_ok = link_ok; lc->speed = speed; + lc->advertised_fc = adv_fc; lc->fc = fc; lc->fec = fec; -- cgit v1.2.3 From 9fcf024dd6fae082f05e8c1fcdae23972b2f6971 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 27 Dec 2019 02:59:54 +0200 Subject: net: dsa: sja1105: Take PTP egress timestamp by port, not mgmt slot The PTP egress timestamp N must be captured from register PTPEGR_TS[n], where n = 2 * PORT + TSREG. There are 10 PTPEGR_TS registers, 2 per port. We are only using TSREG=0. As opposed to the management slots, which are 4 in number (SJA1105_NUM_PORTS, minus the CPU port). Any management frame (which includes PTP frames) can be sent to any non-CPU port through any management slot. When the CPU port is not the last port (#4), there will be a mismatch between the slot and the port number. Luckily, the only mainline occurrence with this switch (arch/arm/boot/dts/ls1021a-tsn.dts) does have the CPU port as #4, so the issue did not manifest itself thus far. Fixes: 47ed985e97f5 ("net: dsa: sja1105: Add logic for TX timestamping") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 2 +- drivers/net/dsa/sja1105/sja1105_ptp.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index a51ac088c0bc..86bbab166633 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1855,7 +1855,7 @@ static netdev_tx_t sja1105_port_deferred_xmit(struct dsa_switch *ds, int port, if (!clone) goto out; - sja1105_ptp_txtstamp_skb(ds, slot, clone); + sja1105_ptp_txtstamp_skb(ds, port, clone); out: mutex_unlock(&priv->mgmt_lock); diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index 54258a25031d..c0fda7db6271 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -659,7 +659,7 @@ void sja1105_ptp_clock_unregister(struct dsa_switch *ds) ptp_data->clock = NULL; } -void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot, +void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int port, struct sk_buff *skb) { struct sja1105_private *priv = ds->priv; @@ -679,7 +679,7 @@ void sja1105_ptp_txtstamp_skb(struct dsa_switch *ds, int slot, goto out; } - rc = sja1105_ptpegr_ts_poll(ds, slot, &ts); + rc = sja1105_ptpegr_ts_poll(ds, port, &ts); if (rc < 0) { dev_err(ds->dev, "timed out polling for tstamp\n"); kfree_skb(skb); -- cgit v1.2.3 From 5a47f588ee2366b2febdc822cdfdcf856cb0a777 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 27 Dec 2019 03:01:50 +0200 Subject: net: dsa: sja1105: Really make the PTP command read-write When activating tc-taprio offload on the switch ports, the TAS state machine will try to check whether it is running or not, but will find both the STARTED and STOPPED bits as false in the sja1105_tas_check_running function. So the function will return -EINVAL (an abnormal situation) and the kernel will keep printing this from the TAS FSM workqueue: [ 37.691971] sja1105 spi0.1: An operation returned -22 The reason is that the underlying function that gets called, sja1105_ptp_commit, does not actually do a SPI_READ, but a SPI_WRITE. So the command buffer remains initialized with zeroes instead of retrieving the hardware state. Fix that. Fixes: 41603d78b362 ("net: dsa: sja1105: Make the PTP command read-write") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_ptp.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers') diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.c b/drivers/net/dsa/sja1105/sja1105_ptp.c index c0fda7db6271..43ab7589d0d0 100644 --- a/drivers/net/dsa/sja1105/sja1105_ptp.c +++ b/drivers/net/dsa/sja1105/sja1105_ptp.c @@ -234,7 +234,7 @@ int sja1105_ptp_commit(struct dsa_switch *ds, struct sja1105_ptp_cmd *cmd, if (rw == SPI_WRITE) priv->info->ptp_cmd_packing(buf, cmd, PACK); - rc = sja1105_xfer_buf(priv, SPI_WRITE, regs->ptp_control, buf, + rc = sja1105_xfer_buf(priv, rw, regs->ptp_control, buf, SJA1105_SIZE_PTP_CMD); if (rw == SPI_READ) -- cgit v1.2.3 From d00bdc0a8839de9a5c9be5af2a79dbf8e0087689 Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 27 Dec 2019 03:03:54 +0200 Subject: net: dsa: sja1105: Remove restriction of zero base-time for taprio offload The check originates from the initial implementation which was not based on PTP time but on a standalone clock source. In the meantime we can now program the PTPSCHTM register at runtime with the dynamic base time (actually with a value that is 200 ns smaller, to avoid writing DELTA=0 in the Schedule Entry Points Parameters Table). And we also have logic for moving the actual base time in the future of the PHC's current time base, so the check for zero serves no purpose, since even if the user will specify zero, that's not what will end up in the static config table where the limitation is. Fixes: 86db36a347b4 ("net: dsa: sja1105: Implement state machine for TAS with PTP clock source") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_tas.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'drivers') diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c index 26b925b5dace..fa6750d973d7 100644 --- a/drivers/net/dsa/sja1105/sja1105_tas.c +++ b/drivers/net/dsa/sja1105/sja1105_tas.c @@ -477,11 +477,6 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port, if (admin->cycle_time_extension) return -ENOTSUPP; - if (!ns_to_sja1105_delta(admin->base_time)) { - dev_err(ds->dev, "A base time of zero is not hardware-allowed\n"); - return -ERANGE; - } - for (i = 0; i < admin->num_entries; i++) { s64 delta_ns = admin->entries[i].interval; s64 delta_cycles = ns_to_sja1105_delta(delta_ns); -- cgit v1.2.3 From 54fa49ee88138756df0fcf867cb1849904710a8c Mon Sep 17 00:00:00 2001 From: Vladimir Oltean Date: Fri, 27 Dec 2019 03:11:13 +0200 Subject: net: dsa: sja1105: Reconcile the meaning of TPID and TPID2 for E/T and P/Q/R/S For first-generation switches (SJA1105E and SJA1105T): - TPID means C-Tag (typically 0x8100) - TPID2 means S-Tag (typically 0x88A8) While for the second generation switches (SJA1105P, SJA1105Q, SJA1105R, SJA1105S) it is the other way around: - TPID means S-Tag (typically 0x88A8) - TPID2 means C-Tag (typically 0x8100) In other words, E/T tags untagged traffic with TPID, and P/Q/R/S with TPID2. So the patch mentioned below fixed VLAN filtering for P/Q/R/S, but broke it for E/T. We strive for a common code path for all switches in the family, so just lie in the static config packing functions that TPID and TPID2 are at swapped bit offsets than they actually are, for P/Q/R/S. This will make both switches understand TPID to be ETH_P_8021Q and TPID2 to be ETH_P_8021AD. The meaning from the original E/T was chosen over P/Q/R/S because E/T is actually the one with public documentation available (UM10944.pdf). Fixes: f9a1a7646c0d ("net: dsa: sja1105: Reverse TPID and TPID2") Signed-off-by: Vladimir Oltean Signed-off-by: David S. Miller --- drivers/net/dsa/sja1105/sja1105_main.c | 8 ++++---- drivers/net/dsa/sja1105/sja1105_static_config.c | 7 +++++-- 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'drivers') diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c index 86bbab166633..1da5ac111499 100644 --- a/drivers/net/dsa/sja1105/sja1105_main.c +++ b/drivers/net/dsa/sja1105/sja1105_main.c @@ -1569,8 +1569,8 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled) if (enabled) { /* Enable VLAN filtering. */ - tpid = ETH_P_8021AD; - tpid2 = ETH_P_8021Q; + tpid = ETH_P_8021Q; + tpid2 = ETH_P_8021AD; } else { /* Disable VLAN filtering. */ tpid = ETH_P_SJA1105; @@ -1579,9 +1579,9 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled) table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS]; general_params = table->entries; - /* EtherType used to identify outer tagged (S-tag) VLAN traffic */ - general_params->tpid = tpid; /* EtherType used to identify inner tagged (C-tag) VLAN traffic */ + general_params->tpid = tpid; + /* EtherType used to identify outer tagged (S-tag) VLAN traffic */ general_params->tpid2 = tpid2; /* When VLAN filtering is on, we need to at least be able to * decode management traffic through the "backup plan". diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c index 0d03e13e9909..63d2311817c4 100644 --- a/drivers/net/dsa/sja1105/sja1105_static_config.c +++ b/drivers/net/dsa/sja1105/sja1105_static_config.c @@ -142,6 +142,9 @@ static size_t sja1105et_general_params_entry_packing(void *buf, void *entry_ptr, return size; } +/* TPID and TPID2 are intentionally reversed so that semantic + * compatibility with E/T is kept. + */ static size_t sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr, enum packing_op op) @@ -166,9 +169,9 @@ sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr, sja1105_packing(buf, &entry->mirr_port, 141, 139, size, op); sja1105_packing(buf, &entry->vlmarker, 138, 107, size, op); sja1105_packing(buf, &entry->vlmask, 106, 75, size, op); - sja1105_packing(buf, &entry->tpid, 74, 59, size, op); + sja1105_packing(buf, &entry->tpid2, 74, 59, size, op); sja1105_packing(buf, &entry->ignore2stf, 58, 58, size, op); - sja1105_packing(buf, &entry->tpid2, 57, 42, size, op); + sja1105_packing(buf, &entry->tpid, 57, 42, size, op); sja1105_packing(buf, &entry->queue_ts, 41, 41, size, op); sja1105_packing(buf, &entry->egrmirrvid, 40, 29, size, op); sja1105_packing(buf, &entry->egrmirrpcp, 28, 26, size, op); -- cgit v1.2.3 From a33121e5487b424339636b25c35d3a180eaa5f5e Mon Sep 17 00:00:00 2001 From: Vladis Dronov Date: Fri, 27 Dec 2019 03:26:27 +0100 Subject: ptp: fix the race between the release of ptp_clock and cdev In a case when a ptp chardev (like /dev/ptp0) is open but an underlying device is removed, closing this file leads to a race. This reproduces easily in a kvm virtual machine: ts# cat openptp0.c int main() { ... fp = fopen("/dev/ptp0", "r"); ... sleep(10); } ts# uname -r 5.5.0-rc3-46cf053e ts# cat /proc/cmdline ... slub_debug=FZP ts# modprobe ptp_kvm ts# ./openptp0 & [1] 670 opened /dev/ptp0, sleeping 10s... ts# rmmod ptp_kvm ts# ls /dev/ptp* ls: cannot access '/dev/ptp*': No such file or directory ts# ...woken up [ 48.010809] general protection fault: 0000 [#1] SMP [ 48.012502] CPU: 6 PID: 658 Comm: openptp0 Not tainted 5.5.0-rc3-46cf053e #25 [ 48.014624] Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), ... [ 48.016270] RIP: 0010:module_put.part.0+0x7/0x80 [ 48.017939] RSP: 0018:ffffb3850073be00 EFLAGS: 00010202 [ 48.018339] RAX: 000000006b6b6b6b RBX: 6b6b6b6b6b6b6b6b RCX: ffff89a476c00ad0 [ 48.018936] RDX: fffff65a08d3ea08 RSI: 0000000000000247 RDI: 6b6b6b6b6b6b6b6b [ 48.019470] ... ^^^ a slub poison [ 48.023854] Call Trace: [ 48.024050] __fput+0x21f/0x240 [ 48.024288] task_work_run+0x79/0x90 [ 48.024555] do_exit+0x2af/0xab0 [ 48.024799] ? vfs_write+0x16a/0x190 [ 48.025082] do_group_exit+0x35/0x90 [ 48.025387] __x64_sys_exit_group+0xf/0x10 [ 48.025737] do_syscall_64+0x3d/0x130 [ 48.026056] entry_SYSCALL_64_after_hwframe+0x44/0xa9 [ 48.026479] RIP: 0033:0x7f53b12082f6 [ 48.026792] ... [ 48.030945] Modules linked in: ptp i6300esb watchdog [last unloaded: ptp_kvm] [ 48.045001] Fixing recursive fault but reboot is needed! This happens in: static void __fput(struct file *file) { ... if (file->f_op->release) file->f_op->release(inode, file); <<< cdev is kfree'd here if (unlikely(S_ISCHR(inode->i_mode) && inode->i_cdev != NULL && !(mode & FMODE_PATH))) { cdev_put(inode->i_cdev); <<< cdev fields are accessed here Namely: __fput() posix_clock_release() kref_put(&clk->kref, delete_clock) <<< the last reference delete_clock() delete_ptp_clock() kfree(ptp) <<< cdev is embedded in ptp cdev_put module_put(p->owner) <<< *p is kfree'd, bang! Here cdev is embedded in posix_clock which is embedded in ptp_clock. The race happens because ptp_clock's lifetime is controlled by two refcounts: kref and cdev.kobj in posix_clock. This is wrong. Make ptp_clock's sysfs device a parent of cdev with cdev_device_add() created especially for such cases. This way the parent device with its ptp_clock is not released until all references to the cdev are released. This adds a requirement that an initialized but not exposed struct device should be provided to posix_clock_register() by a caller instead of a simple dev_t. This approach was adopted from the commit 72139dfa2464 ("watchdog: Fix the race between the release of watchdog_core_data and cdev"). See details of the implementation in the commit 233ed09d7fda ("chardev: add helper function to register char devs with a struct device"). Link: https://lore.kernel.org/linux-fsdevel/20191125125342.6189-1-vdronov@redhat.com/T/#u Analyzed-by: Stephen Johnston Analyzed-by: Vern Lovejoy Signed-off-by: Vladis Dronov Acked-by: Richard Cochran Signed-off-by: David S. Miller --- drivers/ptp/ptp_clock.c | 31 ++++++++++++++----------------- drivers/ptp/ptp_private.h | 2 +- include/linux/posix-clock.h | 19 +++++++++++-------- kernel/time/posix-clock.c | 31 +++++++++++++------------------ 4 files changed, 39 insertions(+), 44 deletions(-) (limited to 'drivers') diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c index e60eab7f8a61..61fafe0374ce 100644 --- a/drivers/ptp/ptp_clock.c +++ b/drivers/ptp/ptp_clock.c @@ -166,9 +166,9 @@ static struct posix_clock_operations ptp_clock_ops = { .read = ptp_read, }; -static void delete_ptp_clock(struct posix_clock *pc) +static void ptp_clock_release(struct device *dev) { - struct ptp_clock *ptp = container_of(pc, struct ptp_clock, clock); + struct ptp_clock *ptp = container_of(dev, struct ptp_clock, dev); mutex_destroy(&ptp->tsevq_mux); mutex_destroy(&ptp->pincfg_mux); @@ -213,7 +213,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } ptp->clock.ops = ptp_clock_ops; - ptp->clock.release = delete_ptp_clock; ptp->info = info; ptp->devid = MKDEV(major, index); ptp->index = index; @@ -236,15 +235,6 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, if (err) goto no_pin_groups; - /* Create a new device in our class. */ - ptp->dev = device_create_with_groups(ptp_class, parent, ptp->devid, - ptp, ptp->pin_attr_groups, - "ptp%d", ptp->index); - if (IS_ERR(ptp->dev)) { - err = PTR_ERR(ptp->dev); - goto no_device; - } - /* Register a new PPS source. */ if (info->pps) { struct pps_source_info pps; @@ -260,8 +250,18 @@ struct ptp_clock *ptp_clock_register(struct ptp_clock_info *info, } } - /* Create a posix clock. */ - err = posix_clock_register(&ptp->clock, ptp->devid); + /* Initialize a new device of our class in our clock structure. */ + device_initialize(&ptp->dev); + ptp->dev.devt = ptp->devid; + ptp->dev.class = ptp_class; + ptp->dev.parent = parent; + ptp->dev.groups = ptp->pin_attr_groups; + ptp->dev.release = ptp_clock_release; + dev_set_drvdata(&ptp->dev, ptp); + dev_set_name(&ptp->dev, "ptp%d", ptp->index); + + /* Create a posix clock and link it to the device. */ + err = posix_clock_register(&ptp->clock, &ptp->dev); if (err) { pr_err("failed to create posix clock\n"); goto no_clock; @@ -273,8 +273,6 @@ no_clock: if (ptp->pps_source) pps_unregister_source(ptp->pps_source); no_pps: - device_destroy(ptp_class, ptp->devid); -no_device: ptp_cleanup_pin_groups(ptp); no_pin_groups: if (ptp->kworker) @@ -304,7 +302,6 @@ int ptp_clock_unregister(struct ptp_clock *ptp) if (ptp->pps_source) pps_unregister_source(ptp->pps_source); - device_destroy(ptp_class, ptp->devid); ptp_cleanup_pin_groups(ptp); posix_clock_unregister(&ptp->clock); diff --git a/drivers/ptp/ptp_private.h b/drivers/ptp/ptp_private.h index 9171d42468fd..6b97155148f1 100644 --- a/drivers/ptp/ptp_private.h +++ b/drivers/ptp/ptp_private.h @@ -28,7 +28,7 @@ struct timestamp_event_queue { struct ptp_clock { struct posix_clock clock; - struct device *dev; + struct device dev; struct ptp_clock_info *info; dev_t devid; int index; /* index into clocks.map */ diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index fe6cfdcfbc26..468328b1e1dd 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -69,29 +69,32 @@ struct posix_clock_operations { * * @ops: Functional interface to the clock * @cdev: Character device instance for this clock - * @kref: Reference count. + * @dev: Pointer to the clock's device. * @rwsem: Protects the 'zombie' field from concurrent access. * @zombie: If 'zombie' is true, then the hardware has disappeared. - * @release: A function to free the structure when the reference count reaches - * zero. May be NULL if structure is statically allocated. * * Drivers should embed their struct posix_clock within a private * structure, obtaining a reference to it during callbacks using * container_of(). + * + * Drivers should supply an initialized but not exposed struct device + * to posix_clock_register(). It is used to manage lifetime of the + * driver's private structure. It's 'release' field should be set to + * a release function for this private structure. */ struct posix_clock { struct posix_clock_operations ops; struct cdev cdev; - struct kref kref; + struct device *dev; struct rw_semaphore rwsem; bool zombie; - void (*release)(struct posix_clock *clk); }; /** * posix_clock_register() - register a new clock - * @clk: Pointer to the clock. Caller must provide 'ops' and 'release' - * @devid: Allocated device id + * @clk: Pointer to the clock. Caller must provide 'ops' field + * @dev: Pointer to the initialized device. Caller must provide + * 'release' field * * A clock driver calls this function to register itself with the * clock device subsystem. If 'clk' points to dynamically allocated @@ -100,7 +103,7 @@ struct posix_clock { * * Returns zero on success, non-zero otherwise. */ -int posix_clock_register(struct posix_clock *clk, dev_t devid); +int posix_clock_register(struct posix_clock *clk, struct device *dev); /** * posix_clock_unregister() - unregister a clock diff --git a/kernel/time/posix-clock.c b/kernel/time/posix-clock.c index ec960bb939fd..200fb2d3be99 100644 --- a/kernel/time/posix-clock.c +++ b/kernel/time/posix-clock.c @@ -14,8 +14,6 @@ #include "posix-timers.h" -static void delete_clock(struct kref *kref); - /* * Returns NULL if the posix_clock instance attached to 'fp' is old and stale. */ @@ -125,7 +123,7 @@ static int posix_clock_open(struct inode *inode, struct file *fp) err = 0; if (!err) { - kref_get(&clk->kref); + get_device(clk->dev); fp->private_data = clk; } out: @@ -141,7 +139,7 @@ static int posix_clock_release(struct inode *inode, struct file *fp) if (clk->ops.release) err = clk->ops.release(clk); - kref_put(&clk->kref, delete_clock); + put_device(clk->dev); fp->private_data = NULL; @@ -161,38 +159,35 @@ static const struct file_operations posix_clock_file_operations = { #endif }; -int posix_clock_register(struct posix_clock *clk, dev_t devid) +int posix_clock_register(struct posix_clock *clk, struct device *dev) { int err; - kref_init(&clk->kref); init_rwsem(&clk->rwsem); cdev_init(&clk->cdev, &posix_clock_file_operations); + err = cdev_device_add(&clk->cdev, dev); + if (err) { + pr_err("%s unable to add device %d:%d\n", + dev_name(dev), MAJOR(dev->devt), MINOR(dev->devt)); + return err; + } clk->cdev.owner = clk->ops.owner; - err = cdev_add(&clk->cdev, devid, 1); + clk->dev = dev; - return err; + return 0; } EXPORT_SYMBOL_GPL(posix_clock_register); -static void delete_clock(struct kref *kref) -{ - struct posix_clock *clk = container_of(kref, struct posix_clock, kref); - - if (clk->release) - clk->release(clk); -} - void posix_clock_unregister(struct posix_clock *clk) { - cdev_del(&clk->cdev); + cdev_device_del(&clk->cdev, clk->dev); down_write(&clk->rwsem); clk->zombie = true; up_write(&clk->rwsem); - kref_put(&clk->kref, delete_clock); + put_device(clk->dev); } EXPORT_SYMBOL_GPL(posix_clock_unregister); -- cgit v1.2.3