From 1ecb7d27b1af6705e9a4e94415b4d8cc8cf2fbfb Mon Sep 17 00:00:00 2001 From: Cristian Marussi Date: Wed, 17 Aug 2022 18:27:27 +0100 Subject: firmware: arm_scmi: Improve checks in the info_get operations SCMI protocols abstract and expose a number of protocol specific resources like clocks, sensors and so on. Information about such specific domain resources are generally exposed via an `info_get` protocol operation. Improve the sanity check on these operations where needed. Link: https://lore.kernel.org/r/20220817172731.1185305-3-cristian.marussi@arm.com Signed-off-by: Cristian Marussi Signed-off-by: Sudeep Holla --- include/linux/scmi_protocol.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scmi_protocol.h b/include/linux/scmi_protocol.h index a193884ecf2b..4f765bc788ff 100644 --- a/include/linux/scmi_protocol.h +++ b/include/linux/scmi_protocol.h @@ -84,7 +84,7 @@ struct scmi_protocol_handle; struct scmi_clk_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); - const struct scmi_clock_info *(*info_get) + const struct scmi_clock_info __must_check *(*info_get) (const struct scmi_protocol_handle *ph, u32 clk_id); int (*rate_get)(const struct scmi_protocol_handle *ph, u32 clk_id, u64 *rate); @@ -466,7 +466,7 @@ enum scmi_sensor_class { */ struct scmi_sensor_proto_ops { int (*count_get)(const struct scmi_protocol_handle *ph); - const struct scmi_sensor_info *(*info_get) + const struct scmi_sensor_info __must_check *(*info_get) (const struct scmi_protocol_handle *ph, u32 sensor_id); int (*trip_point_config)(const struct scmi_protocol_handle *ph, u32 sensor_id, u8 trip_id, u64 trip_value); -- cgit v1.2.3 From 40bfe7a86d84cf08ac6a8fe2f0c8bf7a43edd110 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 24 Aug 2022 17:32:56 +0200 Subject: of/device: Fix up of_dma_configure_id() stub Since the stub version of of_dma_configure_id() was added in commit a081bd4af4ce ("of/device: Add input id to of_dma_configure()"), it has not matched the signature of the full function, leading to build failure reports when code using this function is built on !OF configurations. Fixes: a081bd4af4ce ("of/device: Add input id to of_dma_configure()") Cc: stable@vger.kernel.org Signed-off-by: Thierry Reding Reviewed-by: Frank Rowand Acked-by: Lorenzo Pieralisi Link: https://lore.kernel.org/r/20220824153256.1437483-1-thierry.reding@gmail.com Signed-off-by: Rob Herring --- include/linux/of_device.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 1d7992a02e36..1a803e4335d3 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -101,8 +101,9 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) } static inline int of_dma_configure_id(struct device *dev, - struct device_node *np, - bool force_dma) + struct device_node *np, + bool force_dma, + const u32 *id) { return 0; } -- cgit v1.2.3 From 25af7406df5915f04d5f1c8f081dabb0ead1cdcc Mon Sep 17 00:00:00 2001 From: Isaac Manjarres Date: Thu, 18 Aug 2022 18:28:51 +0100 Subject: ARM: 9229/1: amba: Fix use-after-free in amba_read_periphid() After commit f2d3b9a46e0e ("ARM: 9220/1: amba: Remove deferred device addition"), it became possible for amba_read_periphid() to be invoked concurrently from two threads for a particular AMBA device. Consider the case where a thread (T0) is registering an AMBA driver, and searching for all of the devices it can match with on the AMBA bus. Suppose that another thread (T1) is executing the deferred probe work, and is searching through all of the AMBA drivers on the bus for a driver that matches a particular AMBA device. Assume that both threads begin operating on the same AMBA device and the device's peripheral ID is still unknown. In this scenario, the amba_match() function will be invoked for the same AMBA device by both threads, which means amba_read_periphid() can also be invoked by both threads, and both threads will be able to manipulate the AMBA device's pclk pointer without any synchronization. It's possible that one thread will initialize the pclk pointer, then the other thread will re-initialize it, overwriting the previous value, and both will race to free the same pclk, resulting in a use-after-free for whichever thread frees the pclk last. Add a lock per AMBA device to synchronize the handling with detecting the peripheral ID to avoid the use-after-free scenario. The following KFENCE bug report helped detect this problem: ================================================================== BUG: KFENCE: use-after-free read in clk_disable+0x14/0x34 Use-after-free read at 0x(ptrval) (in kfence-#19): clk_disable+0x14/0x34 amba_read_periphid+0xdc/0x134 amba_match+0x3c/0x84 __driver_attach+0x20/0x158 bus_for_each_dev+0x74/0xc0 bus_add_driver+0x154/0x1e8 driver_register+0x88/0x11c do_one_initcall+0x8c/0x2fc kernel_init_freeable+0x190/0x220 kernel_init+0x10/0x108 ret_from_fork+0x14/0x3c 0x0 kfence-#19: 0x(ptrval)-0x(ptrval), size=36, cache=kmalloc-64 allocated by task 8 on cpu 0 at 11.629931s: clk_hw_create_clk+0x38/0x134 amba_get_enable_pclk+0x10/0x68 amba_read_periphid+0x28/0x134 amba_match+0x3c/0x84 __device_attach_driver+0x2c/0xc4 bus_for_each_drv+0x80/0xd0 __device_attach+0xb0/0x1f0 bus_probe_device+0x88/0x90 deferred_probe_work_func+0x8c/0xc0 process_one_work+0x23c/0x690 worker_thread+0x34/0x488 kthread+0xd4/0xfc ret_from_fork+0x14/0x3c 0x0 freed by task 8 on cpu 0 at 11.630095s: amba_read_periphid+0xec/0x134 amba_match+0x3c/0x84 __device_attach_driver+0x2c/0xc4 bus_for_each_drv+0x80/0xd0 __device_attach+0xb0/0x1f0 bus_probe_device+0x88/0x90 deferred_probe_work_func+0x8c/0xc0 process_one_work+0x23c/0x690 worker_thread+0x34/0x488 kthread+0xd4/0xfc ret_from_fork+0x14/0x3c 0x0 Cc: Saravana Kannan Cc: patches@armlinux.org.uk Fixes: f2d3b9a46e0e ("ARM: 9220/1: amba: Remove deferred device addition") Reported-by: Guenter Roeck Tested-by: Guenter Roeck Signed-off-by: Isaac J. Manjarres Signed-off-by: Russell King (Oracle) --- drivers/amba/bus.c | 8 +++++++- include/linux/amba/bus.h | 1 + 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/amba/bus.c b/drivers/amba/bus.c index 32b0e0b930c1..110a535648d2 100644 --- a/drivers/amba/bus.c +++ b/drivers/amba/bus.c @@ -209,6 +209,7 @@ static int amba_match(struct device *dev, struct device_driver *drv) struct amba_device *pcdev = to_amba_device(dev); struct amba_driver *pcdrv = to_amba_driver(drv); + mutex_lock(&pcdev->periphid_lock); if (!pcdev->periphid) { int ret = amba_read_periphid(pcdev); @@ -218,11 +219,14 @@ static int amba_match(struct device *dev, struct device_driver *drv) * permanent failure in reading pid and cid, simply map it to * -EPROBE_DEFER. */ - if (ret) + if (ret) { + mutex_unlock(&pcdev->periphid_lock); return -EPROBE_DEFER; + } dev_set_uevent_suppress(dev, false); kobject_uevent(&dev->kobj, KOBJ_ADD); } + mutex_unlock(&pcdev->periphid_lock); /* When driver_override is set, only bind to the matching driver */ if (pcdev->driver_override) @@ -532,6 +536,7 @@ static void amba_device_release(struct device *dev) if (d->res.parent) release_resource(&d->res); + mutex_destroy(&d->periphid_lock); kfree(d); } @@ -584,6 +589,7 @@ static void amba_device_initialize(struct amba_device *dev, const char *name) dev->dev.dma_mask = &dev->dev.coherent_dma_mask; dev->dev.dma_parms = &dev->dma_parms; dev->res.name = dev_name(&dev->dev); + mutex_init(&dev->periphid_lock); } /** diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index e94cdf235f1d..5001e14c5c06 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -67,6 +67,7 @@ struct amba_device { struct clk *pclk; struct device_dma_parameters dma_parms; unsigned int periphid; + struct mutex periphid_lock; unsigned int cid; struct amba_cs_uci_id uci; unsigned int irq[AMBA_NR_IRQS]; -- cgit v1.2.3 From ac56a0b48da86fd1b4389632fb7c4c8a5d86eefa Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 26 Aug 2022 15:39:28 +0100 Subject: rxrpc: Fix ICMP/ICMP6 error handling Because rxrpc pretends to be a tunnel on top of a UDP/UDP6 socket, allowing it to siphon off UDP packets early in the handling of received UDP packets thereby avoiding the packet going through the UDP receive queue, it doesn't get ICMP packets through the UDP ->sk_error_report() callback. In fact, it doesn't appear that there's any usable option for getting hold of ICMP packets. Fix this by adding a new UDP encap hook to distribute error messages for UDP tunnels. If the hook is set, then the tunnel driver will be able to see ICMP packets. The hook provides the offset into the packet of the UDP header of the original packet that caused the notification. An alternative would be to call the ->error_handler() hook - but that requires that the skbuff be cloned (as ip_icmp_error() or ipv6_cmp_error() do, though isn't really necessary or desirable in rxrpc's case is we want to parse them there and then, not queue them). Changes ======= ver #3) - Fixed an uninitialised variable. ver #2) - Fixed some missing CONFIG_AF_RXRPC_IPV6 conditionals. Fixes: 5271953cad31 ("rxrpc: Use the UDP encap_rcv hook") Signed-off-by: David Howells --- include/linux/udp.h | 1 + include/net/udp_tunnel.h | 4 + net/ipv4/udp.c | 2 + net/ipv4/udp_tunnel_core.c | 1 + net/ipv6/udp.c | 5 +- net/rxrpc/ar-internal.h | 1 + net/rxrpc/local_object.c | 1 + net/rxrpc/peer_event.c | 293 +++++++++++++++++++++++++++++++++++++++------ 8 files changed, 270 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index 254a2654400f..e96da4157d04 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -70,6 +70,7 @@ struct udp_sock { * For encapsulation sockets. */ int (*encap_rcv)(struct sock *sk, struct sk_buff *skb); + void (*encap_err_rcv)(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); int (*encap_err_lookup)(struct sock *sk, struct sk_buff *skb); void (*encap_destroy)(struct sock *sk); diff --git a/include/net/udp_tunnel.h b/include/net/udp_tunnel.h index afc7ce713657..72394f441dad 100644 --- a/include/net/udp_tunnel.h +++ b/include/net/udp_tunnel.h @@ -67,6 +67,9 @@ static inline int udp_sock_create(struct net *net, typedef int (*udp_tunnel_encap_rcv_t)(struct sock *sk, struct sk_buff *skb); typedef int (*udp_tunnel_encap_err_lookup_t)(struct sock *sk, struct sk_buff *skb); +typedef void (*udp_tunnel_encap_err_rcv_t)(struct sock *sk, + struct sk_buff *skb, + unsigned int udp_offset); typedef void (*udp_tunnel_encap_destroy_t)(struct sock *sk); typedef struct sk_buff *(*udp_tunnel_gro_receive_t)(struct sock *sk, struct list_head *head, @@ -80,6 +83,7 @@ struct udp_tunnel_sock_cfg { __u8 encap_type; udp_tunnel_encap_rcv_t encap_rcv; udp_tunnel_encap_err_lookup_t encap_err_lookup; + udp_tunnel_encap_err_rcv_t encap_err_rcv; udp_tunnel_encap_destroy_t encap_destroy; udp_tunnel_gro_receive_t gro_receive; udp_tunnel_gro_complete_t gro_complete; diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 34eda973bbf1..cd72158e953a 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -783,6 +783,8 @@ int __udp4_lib_err(struct sk_buff *skb, u32 info, struct udp_table *udptable) */ if (tunnel) { /* ...not for tunnels though: we don't have a sending socket */ + if (udp_sk(sk)->encap_err_rcv) + udp_sk(sk)->encap_err_rcv(sk, skb, iph->ihl << 2); goto out; } if (!inet->recverr) { diff --git a/net/ipv4/udp_tunnel_core.c b/net/ipv4/udp_tunnel_core.c index 8efaf8c3fe2a..8242c8947340 100644 --- a/net/ipv4/udp_tunnel_core.c +++ b/net/ipv4/udp_tunnel_core.c @@ -72,6 +72,7 @@ void setup_udp_tunnel_sock(struct net *net, struct socket *sock, udp_sk(sk)->encap_type = cfg->encap_type; udp_sk(sk)->encap_rcv = cfg->encap_rcv; + udp_sk(sk)->encap_err_rcv = cfg->encap_err_rcv; udp_sk(sk)->encap_err_lookup = cfg->encap_err_lookup; udp_sk(sk)->encap_destroy = cfg->encap_destroy; udp_sk(sk)->gro_receive = cfg->gro_receive; diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index 16c176e7c69a..3366d6a77ff2 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -616,8 +616,11 @@ int __udp6_lib_err(struct sk_buff *skb, struct inet6_skb_parm *opt, } /* Tunnels don't have an application socket: don't pass errors back */ - if (tunnel) + if (tunnel) { + if (udp_sk(sk)->encap_err_rcv) + udp_sk(sk)->encap_err_rcv(sk, skb, offset); goto out; + } if (!np->recverr) { if (!harderr || sk->sk_state != TCP_ESTABLISHED) diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h index 571436064cd6..62c70709d798 100644 --- a/net/rxrpc/ar-internal.h +++ b/net/rxrpc/ar-internal.h @@ -982,6 +982,7 @@ void rxrpc_send_keepalive(struct rxrpc_peer *); /* * peer_event.c */ +void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, unsigned int udp_offset); void rxrpc_error_report(struct sock *); void rxrpc_peer_keepalive_worker(struct work_struct *); diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c index 96ecb7356c0f..79bb02eb67b2 100644 --- a/net/rxrpc/local_object.c +++ b/net/rxrpc/local_object.c @@ -137,6 +137,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net) tuncfg.encap_type = UDP_ENCAP_RXRPC; tuncfg.encap_rcv = rxrpc_input_packet; + tuncfg.encap_err_rcv = rxrpc_encap_err_rcv; tuncfg.sk_user_data = local; setup_udp_tunnel_sock(net, local->socket, &tuncfg); diff --git a/net/rxrpc/peer_event.c b/net/rxrpc/peer_event.c index be032850ae8c..32561e9567fe 100644 --- a/net/rxrpc/peer_event.c +++ b/net/rxrpc/peer_event.c @@ -16,22 +16,105 @@ #include #include #include +#include #include "ar-internal.h" +static void rxrpc_adjust_mtu(struct rxrpc_peer *, unsigned int); static void rxrpc_store_error(struct rxrpc_peer *, struct sock_exterr_skb *); static void rxrpc_distribute_error(struct rxrpc_peer *, int, enum rxrpc_call_completion); /* - * Find the peer associated with an ICMP packet. + * Find the peer associated with an ICMPv4 packet. */ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, - const struct sk_buff *skb, + struct sk_buff *skb, + unsigned int udp_offset, + unsigned int *info, struct sockaddr_rxrpc *srx) { - struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + struct iphdr *ip, *ip0 = ip_hdr(skb); + struct icmphdr *icmp = icmp_hdr(skb); + struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); - _enter(""); + _enter("%u,%u,%u", ip0->protocol, icmp->type, icmp->code); + + switch (icmp->type) { + case ICMP_DEST_UNREACH: + *info = ntohs(icmp->un.frag.mtu); + fallthrough; + case ICMP_TIME_EXCEEDED: + case ICMP_PARAMETERPROB: + ip = (struct iphdr *)((void *)icmp + 8); + break; + default: + return NULL; + } + + memset(srx, 0, sizeof(*srx)); + srx->transport_type = local->srx.transport_type; + srx->transport_len = local->srx.transport_len; + srx->transport.family = local->srx.transport.family; + + /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice + * versa? + */ + switch (srx->transport.family) { + case AF_INET: + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin.sin_addr, &ip->daddr, + sizeof(struct in_addr)); + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case AF_INET6: + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin.sin_addr, &ip->daddr, + sizeof(struct in_addr)); + break; +#endif + + default: + WARN_ON_ONCE(1); + return NULL; + } + + _net("ICMP {%pISp}", &srx->transport); + return rxrpc_lookup_peer_rcu(local, srx); +} + +#ifdef CONFIG_AF_RXRPC_IPV6 +/* + * Find the peer associated with an ICMPv6 packet. + */ +static struct rxrpc_peer *rxrpc_lookup_peer_icmp6_rcu(struct rxrpc_local *local, + struct sk_buff *skb, + unsigned int udp_offset, + unsigned int *info, + struct sockaddr_rxrpc *srx) +{ + struct icmp6hdr *icmp = icmp6_hdr(skb); + struct ipv6hdr *ip, *ip0 = ipv6_hdr(skb); + struct udphdr *udp = (struct udphdr *)(skb->data + udp_offset); + + _enter("%u,%u,%u", ip0->nexthdr, icmp->icmp6_type, icmp->icmp6_code); + + switch (icmp->icmp6_type) { + case ICMPV6_DEST_UNREACH: + *info = ntohl(icmp->icmp6_mtu); + fallthrough; + case ICMPV6_PKT_TOOBIG: + case ICMPV6_TIME_EXCEED: + case ICMPV6_PARAMPROB: + ip = (struct ipv6hdr *)((void *)icmp + 8); + break; + default: + return NULL; + } memset(srx, 0, sizeof(*srx)); srx->transport_type = local->srx.transport_type; @@ -41,6 +124,165 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, /* Can we see an ICMP4 packet on an ICMP6 listening socket? and vice * versa? */ + switch (srx->transport.family) { + case AF_INET: + _net("Rx ICMP6 on v4 sock"); + srx->transport_len = sizeof(srx->transport.sin); + srx->transport.family = AF_INET; + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin.sin_addr, + &ip->daddr.s6_addr32[3], sizeof(struct in_addr)); + break; + case AF_INET6: + _net("Rx ICMP6"); + srx->transport.sin.sin_port = udp->dest; + memcpy(&srx->transport.sin6.sin6_addr, &ip->daddr, + sizeof(struct in6_addr)); + break; + default: + WARN_ON_ONCE(1); + return NULL; + } + + _net("ICMP {%pISp}", &srx->transport); + return rxrpc_lookup_peer_rcu(local, srx); +} +#endif /* CONFIG_AF_RXRPC_IPV6 */ + +/* + * Handle an error received on the local endpoint as a tunnel. + */ +void rxrpc_encap_err_rcv(struct sock *sk, struct sk_buff *skb, + unsigned int udp_offset) +{ + struct sock_extended_err ee; + struct sockaddr_rxrpc srx; + struct rxrpc_local *local; + struct rxrpc_peer *peer; + unsigned int info = 0; + int err; + u8 version = ip_hdr(skb)->version; + u8 type = icmp_hdr(skb)->type; + u8 code = icmp_hdr(skb)->code; + + rcu_read_lock(); + local = rcu_dereference_sk_user_data(sk); + if (unlikely(!local)) { + rcu_read_unlock(); + return; + } + + rxrpc_new_skb(skb, rxrpc_skb_received); + + switch (ip_hdr(skb)->version) { + case IPVERSION: + peer = rxrpc_lookup_peer_icmp_rcu(local, skb, udp_offset, + &info, &srx); + break; +#ifdef CONFIG_AF_RXRPC_IPV6 + case 6: + peer = rxrpc_lookup_peer_icmp6_rcu(local, skb, udp_offset, + &info, &srx); + break; +#endif + default: + rcu_read_unlock(); + return; + } + + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (!peer) { + rcu_read_unlock(); + return; + } + + memset(&ee, 0, sizeof(ee)); + + switch (version) { + case IPVERSION: + switch (type) { + case ICMP_DEST_UNREACH: + switch (code) { + case ICMP_FRAG_NEEDED: + rxrpc_adjust_mtu(peer, info); + rcu_read_unlock(); + rxrpc_put_peer(peer); + return; + default: + break; + } + + err = EHOSTUNREACH; + if (code <= NR_ICMP_UNREACH) { + /* Might want to do something different with + * non-fatal errors + */ + //harderr = icmp_err_convert[code].fatal; + err = icmp_err_convert[code].errno; + } + break; + + case ICMP_TIME_EXCEEDED: + err = EHOSTUNREACH; + break; + default: + err = EPROTO; + break; + } + + ee.ee_origin = SO_EE_ORIGIN_ICMP; + ee.ee_type = type; + ee.ee_code = code; + ee.ee_errno = err; + break; + +#ifdef CONFIG_AF_RXRPC_IPV6 + case 6: + switch (type) { + case ICMPV6_PKT_TOOBIG: + rxrpc_adjust_mtu(peer, info); + rcu_read_unlock(); + rxrpc_put_peer(peer); + return; + } + + icmpv6_err_convert(type, code, &err); + + if (err == EACCES) + err = EHOSTUNREACH; + + ee.ee_origin = SO_EE_ORIGIN_ICMP6; + ee.ee_type = type; + ee.ee_code = code; + ee.ee_errno = err; + break; +#endif + } + + trace_rxrpc_rx_icmp(peer, &ee, &srx); + + rxrpc_distribute_error(peer, err, RXRPC_CALL_NETWORK_ERROR); + rcu_read_unlock(); + rxrpc_put_peer(peer); +} + +/* + * Find the peer associated with a local error. + */ +static struct rxrpc_peer *rxrpc_lookup_peer_local_rcu(struct rxrpc_local *local, + const struct sk_buff *skb, + struct sockaddr_rxrpc *srx) +{ + struct sock_exterr_skb *serr = SKB_EXT_ERR(skb); + + _enter(""); + + memset(srx, 0, sizeof(*srx)); + srx->transport_type = local->srx.transport_type; + srx->transport_len = local->srx.transport_len; + srx->transport.family = local->srx.transport.family; + switch (srx->transport.family) { case AF_INET: srx->transport_len = sizeof(srx->transport.sin); @@ -104,10 +346,8 @@ static struct rxrpc_peer *rxrpc_lookup_peer_icmp_rcu(struct rxrpc_local *local, /* * Handle an MTU/fragmentation problem. */ -static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, struct sock_exterr_skb *serr) +static void rxrpc_adjust_mtu(struct rxrpc_peer *peer, unsigned int mtu) { - u32 mtu = serr->ee.ee_info; - _net("Rx ICMP Fragmentation Needed (%d)", mtu); /* wind down the local interface MTU */ @@ -148,7 +388,7 @@ void rxrpc_error_report(struct sock *sk) struct sock_exterr_skb *serr; struct sockaddr_rxrpc srx; struct rxrpc_local *local; - struct rxrpc_peer *peer; + struct rxrpc_peer *peer = NULL; struct sk_buff *skb; rcu_read_lock(); @@ -172,41 +412,20 @@ void rxrpc_error_report(struct sock *sk) } rxrpc_new_skb(skb, rxrpc_skb_received); serr = SKB_EXT_ERR(skb); - if (!skb->len && serr->ee.ee_origin == SO_EE_ORIGIN_TIMESTAMPING) { - _leave("UDP empty message"); - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - return; - } - peer = rxrpc_lookup_peer_icmp_rcu(local, skb, &srx); - if (peer && !rxrpc_get_peer_maybe(peer)) - peer = NULL; - if (!peer) { - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - _leave(" [no peer]"); - return; - } - - trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); - - if ((serr->ee.ee_origin == SO_EE_ORIGIN_ICMP && - serr->ee.ee_type == ICMP_DEST_UNREACH && - serr->ee.ee_code == ICMP_FRAG_NEEDED)) { - rxrpc_adjust_mtu(peer, serr); - rcu_read_unlock(); - rxrpc_free_skb(skb, rxrpc_skb_freed); - rxrpc_put_peer(peer); - _leave(" [MTU update]"); - return; + if (serr->ee.ee_origin == SO_EE_ORIGIN_LOCAL) { + peer = rxrpc_lookup_peer_local_rcu(local, skb, &srx); + if (peer && !rxrpc_get_peer_maybe(peer)) + peer = NULL; + if (peer) { + trace_rxrpc_rx_icmp(peer, &serr->ee, &srx); + rxrpc_store_error(peer, serr); + } } - rxrpc_store_error(peer, serr); rcu_read_unlock(); rxrpc_free_skb(skb, rxrpc_skb_freed); rxrpc_put_peer(peer); - _leave(""); } -- cgit v1.2.3 From 3261400639463a853ba2b3be8bd009c2a8089775 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 31 Aug 2022 23:38:09 +0000 Subject: tcp: TX zerocopy should not sense pfmemalloc status We got a recent syzbot report [1] showing a possible misuse of pfmemalloc page status in TCP zerocopy paths. Indeed, for pages coming from user space or other layers, using page_is_pfmemalloc() is moot, and possibly could give false positives. There has been attempts to make page_is_pfmemalloc() more robust, but not using it in the first place in this context is probably better, removing cpu cycles. Note to stable teams : You need to backport 84ce071e38a6 ("net: introduce __skb_fill_page_desc_noacc") as a prereq. Race is more probable after commit c07aea3ef4d4 ("mm: add a signature in struct page") because page_is_pfmemalloc() is now using low order bit from page->lru.next, which can change more often than page->index. Low order bit should never be set for lru.next (when used as an anchor in LRU list), so KCSAN report is mostly a false positive. Backporting to older kernel versions seems not necessary. [1] BUG: KCSAN: data-race in lru_add_fn / tcp_build_frag write to 0xffffea0004a1d2c8 of 8 bytes by task 18600 on cpu 0: __list_add include/linux/list.h:73 [inline] list_add include/linux/list.h:88 [inline] lruvec_add_folio include/linux/mm_inline.h:105 [inline] lru_add_fn+0x440/0x520 mm/swap.c:228 folio_batch_move_lru+0x1e1/0x2a0 mm/swap.c:246 folio_batch_add_and_move mm/swap.c:263 [inline] folio_add_lru+0xf1/0x140 mm/swap.c:490 filemap_add_folio+0xf8/0x150 mm/filemap.c:948 __filemap_get_folio+0x510/0x6d0 mm/filemap.c:1981 pagecache_get_page+0x26/0x190 mm/folio-compat.c:104 grab_cache_page_write_begin+0x2a/0x30 mm/folio-compat.c:116 ext4_da_write_begin+0x2dd/0x5f0 fs/ext4/inode.c:2988 generic_perform_write+0x1d4/0x3f0 mm/filemap.c:3738 ext4_buffered_write_iter+0x235/0x3e0 fs/ext4/file.c:270 ext4_file_write_iter+0x2e3/0x1210 call_write_iter include/linux/fs.h:2187 [inline] new_sync_write fs/read_write.c:491 [inline] vfs_write+0x468/0x760 fs/read_write.c:578 ksys_write+0xe8/0x1a0 fs/read_write.c:631 __do_sys_write fs/read_write.c:643 [inline] __se_sys_write fs/read_write.c:640 [inline] __x64_sys_write+0x3e/0x50 fs/read_write.c:640 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd read to 0xffffea0004a1d2c8 of 8 bytes by task 18611 on cpu 1: page_is_pfmemalloc include/linux/mm.h:1740 [inline] __skb_fill_page_desc include/linux/skbuff.h:2422 [inline] skb_fill_page_desc include/linux/skbuff.h:2443 [inline] tcp_build_frag+0x613/0xb20 net/ipv4/tcp.c:1018 do_tcp_sendpages+0x3e8/0xaf0 net/ipv4/tcp.c:1075 tcp_sendpage_locked net/ipv4/tcp.c:1140 [inline] tcp_sendpage+0x89/0xb0 net/ipv4/tcp.c:1150 inet_sendpage+0x7f/0xc0 net/ipv4/af_inet.c:833 kernel_sendpage+0x184/0x300 net/socket.c:3561 sock_sendpage+0x5a/0x70 net/socket.c:1054 pipe_to_sendpage+0x128/0x160 fs/splice.c:361 splice_from_pipe_feed fs/splice.c:415 [inline] __splice_from_pipe+0x222/0x4d0 fs/splice.c:559 splice_from_pipe fs/splice.c:594 [inline] generic_splice_sendpage+0x89/0xc0 fs/splice.c:743 do_splice_from fs/splice.c:764 [inline] direct_splice_actor+0x80/0xa0 fs/splice.c:931 splice_direct_to_actor+0x305/0x620 fs/splice.c:886 do_splice_direct+0xfb/0x180 fs/splice.c:974 do_sendfile+0x3bf/0x910 fs/read_write.c:1249 __do_sys_sendfile64 fs/read_write.c:1317 [inline] __se_sys_sendfile64 fs/read_write.c:1303 [inline] __x64_sys_sendfile64+0x10c/0x150 fs/read_write.c:1303 do_syscall_x64 arch/x86/entry/common.c:50 [inline] do_syscall_64+0x2b/0x70 arch/x86/entry/common.c:80 entry_SYSCALL_64_after_hwframe+0x63/0xcd value changed: 0x0000000000000000 -> 0xffffea0004a1d288 Reported by Kernel Concurrency Sanitizer on: CPU: 1 PID: 18611 Comm: syz-executor.4 Not tainted 6.0.0-rc2-syzkaller-00248-ge022620b5d05-dirty #0 Hardware name: Google Google Compute Engine/Google Compute Engine, BIOS Google 07/22/2022 Fixes: c07aea3ef4d4 ("mm: add a signature in struct page") Reported-by: syzbot Signed-off-by: Eric Dumazet Cc: Shakeel Butt Reviewed-by: Shakeel Butt Signed-off-by: David S. Miller --- include/linux/skbuff.h | 21 +++++++++++++++++++++ net/core/datagram.c | 2 +- net/ipv4/tcp.c | 2 +- 3 files changed, 23 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ca8afa382bf2..18e163a3460d 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2444,6 +2444,27 @@ static inline void skb_fill_page_desc(struct sk_buff *skb, int i, skb_shinfo(skb)->nr_frags = i + 1; } +/** + * skb_fill_page_desc_noacc - initialise a paged fragment in an skb + * @skb: buffer containing fragment to be initialised + * @i: paged fragment index to initialise + * @page: the page to use for this fragment + * @off: the offset to the data with @page + * @size: the length of the data + * + * Variant of skb_fill_page_desc() which does not deal with + * pfmemalloc, if page is not owned by us. + */ +static inline void skb_fill_page_desc_noacc(struct sk_buff *skb, int i, + struct page *page, int off, + int size) +{ + struct skb_shared_info *shinfo = skb_shinfo(skb); + + __skb_fill_page_desc_noacc(shinfo, i, page, off, size); + shinfo->nr_frags = i + 1; +} + void skb_add_rx_frag(struct sk_buff *skb, int i, struct page *page, int off, int size, unsigned int truesize); diff --git a/net/core/datagram.c b/net/core/datagram.c index 7255531f63ae..e4ff2db40c98 100644 --- a/net/core/datagram.c +++ b/net/core/datagram.c @@ -677,7 +677,7 @@ int __zerocopy_sg_from_iter(struct msghdr *msg, struct sock *sk, page_ref_sub(last_head, refs); refs = 0; } - skb_fill_page_desc(skb, frag++, head, start, size); + skb_fill_page_desc_noacc(skb, frag++, head, start, size); } if (refs) page_ref_sub(last_head, refs); diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c index e5011c136fdb..6cdfce6f2867 100644 --- a/net/ipv4/tcp.c +++ b/net/ipv4/tcp.c @@ -1015,7 +1015,7 @@ new_segment: skb_frag_size_add(&skb_shinfo(skb)->frags[i - 1], copy); } else { get_page(page); - skb_fill_page_desc(skb, i, page, offset, copy); + skb_fill_page_desc_noacc(skb, i, page, offset, copy); } if (!(flags & MSG_NO_SHARED_FRAGS)) -- cgit v1.2.3 From b30f7c8eb0780e1479a9882526e838664271f4c9 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 1 Sep 2022 13:07:32 +0100 Subject: spi: mux: Fix mux interaction with fast path optimisations The spi-mux driver is rather too clever and attempts to resubmit any message that is submitted to it to the parent controller with some adjusted callbacks. This does not play at all nicely with the fast path which now sets flags on the message indicating that it's being handled through the fast path, we see async messages flagged as being on the fast path. Ideally the spi-mux code would duplicate the message but that's rather invasive and a bit fragile in that it relies on the mux knowing which fields in the message to copy. Instead teach the core that there are controllers which can't cope with the fast path and have the mux flag itself as being such a controller, ensuring that messages going via the mux don't get partially handled via the fast path. This will reduce the performance of any spi-mux connected device since we'll now always use the thread for both the actual controller and the mux controller instead of just the actual controller but given that we were always hitting the slow path anyway it's hopefully not too much of an additional cost and it allows us to keep the fast path. Fixes: ae7d2346dc89 ("spi: Don't use the message queue if possible in spi_sync") Reported-by: Casper Andersson Tested-by: Casper Andersson Signed-off-by: Mark Brown Link: https://lore.kernel.org/r/20220901120732.49245-1-broonie@kernel.org Signed-off-by: Mark Brown --- drivers/spi/spi-mux.c | 1 + drivers/spi/spi.c | 2 +- include/linux/spi/spi.h | 2 ++ 3 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/drivers/spi/spi-mux.c b/drivers/spi/spi-mux.c index f5d32ec4634e..0709e987bd5a 100644 --- a/drivers/spi/spi-mux.c +++ b/drivers/spi/spi-mux.c @@ -161,6 +161,7 @@ static int spi_mux_probe(struct spi_device *spi) ctlr->num_chipselect = mux_control_states(priv->mux); ctlr->bus_num = -1; ctlr->dev.of_node = spi->dev.of_node; + ctlr->must_async = true; ret = devm_spi_register_controller(&spi->dev, ctlr); if (ret) diff --git a/drivers/spi/spi.c b/drivers/spi/spi.c index 8f97a3eacdea..5cbe090f7676 100644 --- a/drivers/spi/spi.c +++ b/drivers/spi/spi.c @@ -4033,7 +4033,7 @@ static int __spi_sync(struct spi_device *spi, struct spi_message *message) * guard against reentrancy from a different context. The io_mutex * will catch those cases. */ - if (READ_ONCE(ctlr->queue_empty)) { + if (READ_ONCE(ctlr->queue_empty) && !ctlr->must_async) { message->actual_length = 0; message->status = -EINPROGRESS; diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e6c73d5ff1a8..f089ee1ead58 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -469,6 +469,7 @@ extern struct spi_device *spi_new_ancillary_device(struct spi_device *spi, u8 ch * SPI_TRANS_FAIL_NO_START. * @queue_empty: signal green light for opportunistically skipping the queue * for spi_sync transfers. + * @must_async: disable all fast paths in the core * * Each SPI controller can communicate with one or more @spi_device * children. These make a small bus, sharing MOSI, MISO and SCK signals @@ -690,6 +691,7 @@ struct spi_controller { /* Flag for enabling opportunistic skipping of the queue in spi_sync */ bool queue_empty; + bool must_async; }; static inline void *spi_controller_get_devdata(struct spi_controller *ctlr) -- cgit v1.2.3 From 2aec909912da55a6e469fd6ee8412080a5433ed2 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 29 Aug 2022 11:46:38 +0200 Subject: wifi: use struct_group to copy addresses We sometimes copy all the addresses from the 802.11 header for the AAD, which may cause complaints from fortify checks. Use struct_group() to avoid the compiler warnings/errors. Change-Id: Ic3ea389105e7813b22095b295079eecdabde5045 Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 8 +++++--- net/mac80211/wpa.c | 4 ++-- net/wireless/lib80211_crypt_ccmp.c | 2 +- 3 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 55e6f4ad0ca6..b6e6d5b40774 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -310,9 +310,11 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2) struct ieee80211_hdr { __le16 frame_control; __le16 duration_id; - u8 addr1[ETH_ALEN]; - u8 addr2[ETH_ALEN]; - u8 addr3[ETH_ALEN]; + struct_group(addrs, + u8 addr1[ETH_ALEN]; + u8 addr2[ETH_ALEN]; + u8 addr3[ETH_ALEN]; + ); __le16 seq_ctrl; u8 addr4[ETH_ALEN]; } __packed __aligned(2); diff --git a/net/mac80211/wpa.c b/net/mac80211/wpa.c index 93ec2f349748..20f742b5503b 100644 --- a/net/mac80211/wpa.c +++ b/net/mac80211/wpa.c @@ -351,7 +351,7 @@ static u8 ccmp_gcmp_aad(struct sk_buff *skb, u8 *aad) * FC | A1 | A2 | A3 | SC | [A4] | [QC] */ put_unaligned_be16(len_a, &aad[0]); put_unaligned(mask_fc, (__le16 *)&aad[2]); - memcpy(&aad[4], &hdr->addr1, 3 * ETH_ALEN); + memcpy(&aad[4], &hdr->addrs, 3 * ETH_ALEN); /* Mask Seq#, leave Frag# */ aad[22] = *((u8 *) &hdr->seq_ctrl) & 0x0f; @@ -792,7 +792,7 @@ static void bip_aad(struct sk_buff *skb, u8 *aad) IEEE80211_FCTL_MOREDATA); put_unaligned(mask_fc, (__le16 *) &aad[0]); /* A1 || A2 || A3 */ - memcpy(aad + 2, &hdr->addr1, 3 * ETH_ALEN); + memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN); } diff --git a/net/wireless/lib80211_crypt_ccmp.c b/net/wireless/lib80211_crypt_ccmp.c index 6a5f08f7491e..cca5e1cf089e 100644 --- a/net/wireless/lib80211_crypt_ccmp.c +++ b/net/wireless/lib80211_crypt_ccmp.c @@ -136,7 +136,7 @@ static int ccmp_init_iv_and_aad(const struct ieee80211_hdr *hdr, pos = (u8 *) hdr; aad[0] = pos[0] & 0x8f; aad[1] = pos[1] & 0xc7; - memcpy(aad + 2, hdr->addr1, 3 * ETH_ALEN); + memcpy(aad + 2, &hdr->addrs, 3 * ETH_ALEN); pos = (u8 *) & hdr->seq_ctrl; aad[20] = pos[0] & 0x0f; aad[21] = 0; /* all bits masked */ -- cgit v1.2.3 From dec9b2f1e0455a151a7293c367da22ab973f713e Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 2 Sep 2022 16:59:15 +0200 Subject: debugfs: add debugfs_lookup_and_remove() There is a very common pattern of using debugfs_remove(debufs_lookup(..)) which results in a dentry leak of the dentry that was looked up. Instead of having to open-code the correct pattern of calling dput() on the dentry, create debugfs_lookup_and_remove() to handle this pattern automatically and properly without any memory leaks. Cc: stable Reported-by: Kuyo Chang Tested-by: Kuyo Chang Link: https://lore.kernel.org/r/YxIaQ8cSinDR881k@kroah.com Signed-off-by: Greg Kroah-Hartman --- fs/debugfs/inode.c | 22 ++++++++++++++++++++++ include/linux/debugfs.h | 6 ++++++ 2 files changed, 28 insertions(+) (limited to 'include/linux') diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 3dcf0b8b4e93..232cfdf095ae 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -744,6 +744,28 @@ void debugfs_remove(struct dentry *dentry) } EXPORT_SYMBOL_GPL(debugfs_remove); +/** + * debugfs_lookup_and_remove - lookup a directory or file and recursively remove it + * @name: a pointer to a string containing the name of the item to look up. + * @parent: a pointer to the parent dentry of the item. + * + * This is the equlivant of doing something like + * debugfs_remove(debugfs_lookup(..)) but with the proper reference counting + * handled for the directory being looked up. + */ +void debugfs_lookup_and_remove(const char *name, struct dentry *parent) +{ + struct dentry *dentry; + + dentry = debugfs_lookup(name, parent); + if (!dentry) + return; + + debugfs_remove(dentry); + dput(dentry); +} +EXPORT_SYMBOL_GPL(debugfs_lookup_and_remove); + /** * debugfs_rename - rename a file/directory in the debugfs filesystem * @old_dir: a pointer to the parent dentry for the renamed object. This diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index c869f1e73d75..f60674692d36 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -91,6 +91,8 @@ struct dentry *debugfs_create_automount(const char *name, void debugfs_remove(struct dentry *dentry); #define debugfs_remove_recursive debugfs_remove +void debugfs_lookup_and_remove(const char *name, struct dentry *parent); + const struct file_operations *debugfs_real_fops(const struct file *filp); int debugfs_file_get(struct dentry *dentry); @@ -225,6 +227,10 @@ static inline void debugfs_remove(struct dentry *dentry) static inline void debugfs_remove_recursive(struct dentry *dentry) { } +static inline void debugfs_lookup_and_remove(const char *name, + struct dentry *parent) +{ } + const struct file_operations *debugfs_real_fops(const struct file *filp); static inline int debugfs_file_get(struct dentry *dentry) -- cgit v1.2.3 From 9ca05b0f27de928be121cccf07735819dc9e1ed3 Mon Sep 17 00:00:00 2001 From: Maher Sanalla Date: Mon, 29 Aug 2022 12:02:27 +0300 Subject: RDMA/mlx5: Rely on RoCE fw cap instead of devlink when setting profile When the RDMA auxiliary driver probes, it sets its profile based on devlink driverinit value. The latter might not be in sync with FW yet (In case devlink reload is not performed), thus causing a mismatch between RDMA driver and FW. This results in the following FW syndrome when the RDMA driver tries to adjust RoCE state, which fails the probe: "0xC1F678 | modify_nic_vport_context: roce_en set on a vport that doesn't support roce" To prevent this, select the PF profile based on FW RoCE capability instead of relying on devlink driverinit value. To provide backward compatibility of the RoCE disable feature, on older FW's where roce_rw is not set (FW RoCE capability is read-only), keep the current behavior e.g., rely on devlink driverinit value. Fixes: fbfa97b4d79f ("net/mlx5: Disable roce at HCA level") Reviewed-by: Shay Drory Reviewed-by: Michael Guralnik Reviewed-by: Saeed Mahameed Signed-off-by: Maher Sanalla Link: https://lore.kernel.org/r/cb34ce9a1df4a24c135cb804db87f7d2418bd6cc.1661763459.git.leonro@nvidia.com Signed-off-by: Leon Romanovsky --- drivers/infiniband/hw/mlx5/main.c | 2 +- drivers/net/ethernet/mellanox/mlx5/core/main.c | 23 +++++++++++++++++++++-- include/linux/mlx5/driver.h | 19 ++++++++++--------- 3 files changed, 32 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c index fc94a1b25485..883d7c60143e 100644 --- a/drivers/infiniband/hw/mlx5/main.c +++ b/drivers/infiniband/hw/mlx5/main.c @@ -4336,7 +4336,7 @@ static int mlx5r_probe(struct auxiliary_device *adev, dev->mdev = mdev; dev->num_ports = num_ports; - if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_is_roce_init_enabled(mdev)) + if (ll == IB_LINK_LAYER_ETHERNET && !mlx5_get_roce_state(mdev)) profile = &raw_eth_profile; else profile = &pf_profile; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index bec8d6d0b5f6..4870a88cecb7 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -494,6 +494,24 @@ static int max_uc_list_get_devlink_param(struct mlx5_core_dev *dev) return err; } +bool mlx5_is_roce_on(struct mlx5_core_dev *dev) +{ + struct devlink *devlink = priv_to_devlink(dev); + union devlink_param_value val; + int err; + + err = devlink_param_driverinit_value_get(devlink, + DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, + &val); + + if (!err) + return val.vbool; + + mlx5_core_dbg(dev, "Failed to get param. err = %d\n", err); + return MLX5_CAP_GEN(dev, roce); +} +EXPORT_SYMBOL(mlx5_is_roce_on); + static int handle_hca_cap_2(struct mlx5_core_dev *dev, void *set_ctx) { void *set_hca_cap; @@ -597,7 +615,8 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) MLX5_CAP_GEN_MAX(dev, num_total_dynamic_vf_msix)); if (MLX5_CAP_GEN(dev, roce_rw_supported)) - MLX5_SET(cmd_hca_cap, set_hca_cap, roce, mlx5_is_roce_init_enabled(dev)); + MLX5_SET(cmd_hca_cap, set_hca_cap, roce, + mlx5_is_roce_on(dev)); max_uc_list = max_uc_list_get_devlink_param(dev); if (max_uc_list > 0) @@ -623,7 +642,7 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx) */ static bool is_roce_fw_disabled(struct mlx5_core_dev *dev) { - return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_init_enabled(dev)) || + return (MLX5_CAP_GEN(dev, roce_rw_supported) && !mlx5_is_roce_on(dev)) || (!MLX5_CAP_GEN(dev, roce_rw_supported) && !MLX5_CAP_GEN(dev, roce)); } diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 96b16fbe1aa4..d6338fb449c8 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1279,16 +1279,17 @@ enum { MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, }; -static inline bool mlx5_is_roce_init_enabled(struct mlx5_core_dev *dev) +bool mlx5_is_roce_on(struct mlx5_core_dev *dev); + +static inline bool mlx5_get_roce_state(struct mlx5_core_dev *dev) { - struct devlink *devlink = priv_to_devlink(dev); - union devlink_param_value val; - int err; - - err = devlink_param_driverinit_value_get(devlink, - DEVLINK_PARAM_GENERIC_ID_ENABLE_ROCE, - &val); - return err ? MLX5_CAP_GEN(dev, roce) : val.vbool; + if (MLX5_CAP_GEN(dev, roce_rw_supported)) + return MLX5_CAP_GEN(dev, roce); + + /* If RoCE cap is read-only in FW, get RoCE state from devlink + * in order to support RoCE enable/disable feature + */ + return mlx5_is_roce_on(dev); } #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 8409fe92d88c332923130149fe209d1c882b286e Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 27 Aug 2022 15:03:43 +0200 Subject: PCI: Move PCI_VENDOR_ID_MICROSOFT/PCI_DEVICE_ID_HYPERV_VIDEO definitions to pci_ids.h There are already three places in kernel which define PCI_VENDOR_ID_MICROSOFT and two for PCI_DEVICE_ID_HYPERV_VIDEO and there's a need to use these from core VMBus code. Move the defines where they belong. No functional change. Reviewed-by: Michael Kelley Acked-by: Bjorn Helgaas # pci_ids.h Signed-off-by: Vitaly Kuznetsov Link: https://lore.kernel.org/r/20220827130345.1320254-2-vkuznets@redhat.com Signed-off-by: Wei Liu --- drivers/gpu/drm/hyperv/hyperv_drm_drv.c | 3 --- drivers/net/ethernet/microsoft/mana/gdma_main.c | 4 ---- drivers/video/fbdev/hyperv_fb.c | 4 ---- include/linux/pci_ids.h | 3 +++ 4 files changed, 3 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c index fc8b4e045f5d..f84d39762a72 100644 --- a/drivers/gpu/drm/hyperv/hyperv_drm_drv.c +++ b/drivers/gpu/drm/hyperv/hyperv_drm_drv.c @@ -23,9 +23,6 @@ #define DRIVER_MAJOR 1 #define DRIVER_MINOR 0 -#define PCI_VENDOR_ID_MICROSOFT 0x1414 -#define PCI_DEVICE_ID_HYPERV_VIDEO 0x5353 - DEFINE_DRM_GEM_FOPS(hv_fops); static struct drm_driver hyperv_driver = { diff --git a/drivers/net/ethernet/microsoft/mana/gdma_main.c b/drivers/net/ethernet/microsoft/mana/gdma_main.c index 5f9240182351..00d8198072ae 100644 --- a/drivers/net/ethernet/microsoft/mana/gdma_main.c +++ b/drivers/net/ethernet/microsoft/mana/gdma_main.c @@ -1465,10 +1465,6 @@ static void mana_gd_shutdown(struct pci_dev *pdev) pci_disable_device(pdev); } -#ifndef PCI_VENDOR_ID_MICROSOFT -#define PCI_VENDOR_ID_MICROSOFT 0x1414 -#endif - static const struct pci_device_id mana_id_table[] = { { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, MANA_PF_DEVICE_ID) }, { PCI_DEVICE(PCI_VENDOR_ID_MICROSOFT, MANA_VF_DEVICE_ID) }, diff --git a/drivers/video/fbdev/hyperv_fb.c b/drivers/video/fbdev/hyperv_fb.c index 886c564787f1..b58b445bb529 100644 --- a/drivers/video/fbdev/hyperv_fb.c +++ b/drivers/video/fbdev/hyperv_fb.c @@ -74,10 +74,6 @@ #define SYNTHVID_DEPTH_WIN8 32 #define SYNTHVID_FB_SIZE_WIN8 (8 * 1024 * 1024) -#define PCI_VENDOR_ID_MICROSOFT 0x1414 -#define PCI_DEVICE_ID_HYPERV_VIDEO 0x5353 - - enum pipe_msg_type { PIPE_MSG_INVALID, PIPE_MSG_DATA, diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index 6feade66efdb..15b49e655ce3 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -2079,6 +2079,9 @@ #define PCI_DEVICE_ID_ICE_1712 0x1712 #define PCI_DEVICE_ID_VT1724 0x1724 +#define PCI_VENDOR_ID_MICROSOFT 0x1414 +#define PCI_DEVICE_ID_HYPERV_VIDEO 0x5353 + #define PCI_VENDOR_ID_OXSEMI 0x1415 #define PCI_DEVICE_ID_OXSEMI_12PCI840 0x8403 #define PCI_DEVICE_ID_OXSEMI_PCIe840 0xC000 -- cgit v1.2.3 From 9fc18f6d56d5b79d527c17a8100a0965d18345cf Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 21 Aug 2022 16:06:44 +0200 Subject: dma-mapping: mark dma_supported static Now that the remaining users in drivers are gone, this function can be marked static. Signed-off-by: Christoph Hellwig --- include/linux/dma-mapping.h | 5 ----- kernel/dma/mapping.c | 3 +-- 2 files changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 25a30906289d..0ee20b764000 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -139,7 +139,6 @@ int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); bool dma_can_mmap(struct device *dev); -int dma_supported(struct device *dev, u64 mask); bool dma_pci_p2pdma_supported(struct device *dev); int dma_set_mask(struct device *dev, u64 mask); int dma_set_coherent_mask(struct device *dev, u64 mask); @@ -248,10 +247,6 @@ static inline bool dma_can_mmap(struct device *dev) { return false; } -static inline int dma_supported(struct device *dev, u64 mask) -{ - return 0; -} static inline bool dma_pci_p2pdma_supported(struct device *dev) { return false; diff --git a/kernel/dma/mapping.c b/kernel/dma/mapping.c index 49cbf3e33de7..27f272381cf2 100644 --- a/kernel/dma/mapping.c +++ b/kernel/dma/mapping.c @@ -707,7 +707,7 @@ int dma_mmap_noncontiguous(struct device *dev, struct vm_area_struct *vma, } EXPORT_SYMBOL_GPL(dma_mmap_noncontiguous); -int dma_supported(struct device *dev, u64 mask) +static int dma_supported(struct device *dev, u64 mask) { const struct dma_map_ops *ops = get_dma_ops(dev); @@ -721,7 +721,6 @@ int dma_supported(struct device *dev, u64 mask) return 1; return ops->dma_supported(dev, mask); } -EXPORT_SYMBOL(dma_supported); bool dma_pci_p2pdma_supported(struct device *dev) { -- cgit v1.2.3 From 283945017cbf685546ba7d065f254ad77eb888b1 Mon Sep 17 00:00:00 2001 From: Yuan Can Date: Mon, 15 Aug 2022 01:33:39 +0000 Subject: iommu: Remove comment of dev_has_feat in struct doc dev_has_feat has been removed from iommu_ops in commit 309c56e84602 ("iommu: remove the unused dev_has_feat method"), remove its description in the struct doc. Signed-off-by: Yuan Can Link: https://lore.kernel.org/r/20220815013339.2552-1-yuancan@huawei.com Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index ea30f00dc145..a004e87e0e1d 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -212,7 +212,7 @@ struct iommu_iotlb_gather { * @of_xlate: add OF master IDs to iommu grouping * @is_attach_deferred: Check if domain attach should be deferred from iommu * driver init to device driver init (default no) - * @dev_has/enable/disable_feat: per device entries to check/enable/disable + * @dev_enable/disable_feat: per device entries to enable/disable * iommu specific features. * @sva_bind: Bind process address space to device * @sva_unbind: Unbind process address space from device -- cgit v1.2.3 From 359ad15763762c713a51300134e784a72eb9cb80 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 16:26:49 +0100 Subject: iommu: Retire iommu_capable() With all callers now converted to the device-specific version, retire the old bus-based interface, and give drivers the chance to indicate accurate per-instance capabilities. Signed-off-by: Robin Murphy Reviewed-by: Lu Baolu Link: https://lore.kernel.org/r/d8bd8777d06929ad8f49df7fc80e1b9af32a41b5.1660574547.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/iommu.c | 2 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- drivers/iommu/arm/arm-smmu/arm-smmu.c | 2 +- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 2 +- drivers/iommu/fsl_pamu_domain.c | 2 +- drivers/iommu/intel/iommu.c | 2 +- drivers/iommu/iommu.c | 11 +---------- drivers/iommu/s390-iommu.c | 2 +- include/linux/iommu.h | 8 +------- 9 files changed, 9 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index 65b8e4fd8217..94220eb5bff3 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -2251,7 +2251,7 @@ static phys_addr_t amd_iommu_iova_to_phys(struct iommu_domain *dom, return ops->iova_to_phys(ops, iova); } -static bool amd_iommu_capable(enum iommu_cap cap) +static bool amd_iommu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index d32b02336411..ab919ec43c4d 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -1992,7 +1992,7 @@ static const struct iommu_flush_ops arm_smmu_flush_ops = { }; /* IOMMU API */ -static bool arm_smmu_capable(enum iommu_cap cap) +static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index dfa82df00342..ce036a053fb8 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -1330,7 +1330,7 @@ static phys_addr_t arm_smmu_iova_to_phys(struct iommu_domain *domain, return ops->iova_to_phys(ops, iova); } -static bool arm_smmu_capable(enum iommu_cap cap) +static bool arm_smmu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 17235116d3bb..66ca47f2e57f 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -493,7 +493,7 @@ static phys_addr_t qcom_iommu_iova_to_phys(struct iommu_domain *domain, return ret; } -static bool qcom_iommu_capable(enum iommu_cap cap) +static bool qcom_iommu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 011f9ab7f743..08fd9089e3ba 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -178,7 +178,7 @@ static phys_addr_t fsl_pamu_iova_to_phys(struct iommu_domain *domain, return iova; } -static bool fsl_pamu_capable(enum iommu_cap cap) +static bool fsl_pamu_capable(struct device *dev, enum iommu_cap cap) { return cap == IOMMU_CAP_CACHE_COHERENCY; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 7cca030a508e..da63b358ef4a 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -4429,7 +4429,7 @@ static bool intel_iommu_enforce_cache_coherency(struct iommu_domain *domain) return true; } -static bool intel_iommu_capable(enum iommu_cap cap) +static bool intel_iommu_capable(struct device *dev, enum iommu_cap cap) { if (cap == IOMMU_CAP_CACHE_COHERENCY) return true; diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 12e49dcf91bd..55d242f16824 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1868,19 +1868,10 @@ bool device_iommu_capable(struct device *dev, enum iommu_cap cap) if (!ops->capable) return false; - return ops->capable(cap); + return ops->capable(dev, cap); } EXPORT_SYMBOL_GPL(device_iommu_capable); -bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) -{ - if (!bus->iommu_ops || !bus->iommu_ops->capable) - return false; - - return bus->iommu_ops->capable(cap); -} -EXPORT_SYMBOL_GPL(iommu_capable); - /** * iommu_set_fault_handler() - set a fault handler for an iommu domain * @domain: iommu domain diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index c898bcbbce11..cd0ee89d63e0 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -39,7 +39,7 @@ static struct s390_domain *to_s390_domain(struct iommu_domain *dom) return container_of(dom, struct s390_domain, domain); } -static bool s390_iommu_capable(enum iommu_cap cap) +static bool s390_iommu_capable(struct device *dev, enum iommu_cap cap) { switch (cap) { case IOMMU_CAP_CACHE_COHERENCY: diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a004e87e0e1d..0013a1befe7b 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -227,7 +227,7 @@ struct iommu_iotlb_gather { * @owner: Driver module providing these ops */ struct iommu_ops { - bool (*capable)(enum iommu_cap); + bool (*capable)(struct device *dev, enum iommu_cap); /* Domain allocation and freeing by the iommu driver */ struct iommu_domain *(*domain_alloc)(unsigned iommu_domain_type); @@ -420,7 +420,6 @@ extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); -extern bool iommu_capable(struct bus_type *bus, enum iommu_cap cap); extern struct iommu_domain *iommu_domain_alloc(struct bus_type *bus); extern struct iommu_group *iommu_group_get_by_id(int id); extern void iommu_domain_free(struct iommu_domain *domain); @@ -697,11 +696,6 @@ static inline bool device_iommu_capable(struct device *dev, enum iommu_cap cap) return false; } -static inline bool iommu_capable(struct bus_type *bus, enum iommu_cap cap) -{ - return false; -} - static inline struct iommu_domain *iommu_domain_alloc(struct bus_type *bus) { return NULL; -- cgit v1.2.3 From 29e932295bfaba792d29e66e8be0637ff3994724 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Mon, 15 Aug 2022 17:20:17 +0100 Subject: iommu: Clean up bus_set_iommu() Clean up the remaining trivial bus_set_iommu() callsites along with the implementation. Now drivers only have to know and care about iommu_device instances, phew! Reviewed-by: Kevin Tian Tested-by: Matthew Rosato # s390 Tested-by: Niklas Schnelle # s390 Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/ea383d5f4d74ffe200ab61248e5de6e95846180a.1660572783.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- drivers/iommu/arm/arm-smmu/qcom_iommu.c | 4 ---- drivers/iommu/fsl_pamu_domain.c | 4 ---- drivers/iommu/intel/iommu.c | 2 -- drivers/iommu/iommu.c | 24 ------------------------ drivers/iommu/msm_iommu.c | 2 -- drivers/iommu/rockchip-iommu.c | 2 -- drivers/iommu/s390-iommu.c | 6 ------ drivers/iommu/sprd-iommu.c | 5 ----- drivers/iommu/sun50i-iommu.c | 2 -- include/linux/iommu.h | 1 - 10 files changed, 52 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/arm/arm-smmu/qcom_iommu.c b/drivers/iommu/arm/arm-smmu/qcom_iommu.c index 66ca47f2e57f..3869c3ecda8c 100644 --- a/drivers/iommu/arm/arm-smmu/qcom_iommu.c +++ b/drivers/iommu/arm/arm-smmu/qcom_iommu.c @@ -837,8 +837,6 @@ static int qcom_iommu_device_probe(struct platform_device *pdev) goto err_pm_disable; } - bus_set_iommu(&platform_bus_type, &qcom_iommu_ops); - if (qcom_iommu->local_base) { pm_runtime_get_sync(dev); writel_relaxed(0xffffffff, qcom_iommu->local_base + SMMU_INTR_SEL_NS); @@ -856,8 +854,6 @@ static int qcom_iommu_device_remove(struct platform_device *pdev) { struct qcom_iommu_dev *qcom_iommu = platform_get_drvdata(pdev); - bus_set_iommu(&platform_bus_type, NULL); - pm_runtime_force_suspend(&pdev->dev); platform_set_drvdata(pdev, NULL); iommu_device_sysfs_remove(&qcom_iommu->iommu); diff --git a/drivers/iommu/fsl_pamu_domain.c b/drivers/iommu/fsl_pamu_domain.c index 08fd9089e3ba..fa20f4b03e12 100644 --- a/drivers/iommu/fsl_pamu_domain.c +++ b/drivers/iommu/fsl_pamu_domain.c @@ -476,11 +476,7 @@ int __init pamu_domain_init(void) if (ret) { iommu_device_sysfs_remove(&pamu_iommu); pr_err("Can't register iommu device\n"); - return ret; } - bus_set_iommu(&platform_bus_type, &fsl_pamu_ops); - bus_set_iommu(&pci_bus_type, &fsl_pamu_ops); - return ret; } diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 5d3c220a0308..1ff58d25b713 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3890,7 +3890,6 @@ static int __init probe_acpi_namespace_devices(void) continue; } - pn->dev->bus->iommu_ops = &intel_iommu_ops; ret = iommu_probe_device(pn->dev); if (ret) break; @@ -4023,7 +4022,6 @@ int __init intel_iommu_init(void) } up_read(&dmar_global_lock); - bus_set_iommu(&pci_bus_type, &intel_iommu_ops); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 422d4e9f980b..83688db121f0 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -1826,30 +1826,6 @@ int bus_iommu_probe(struct bus_type *bus) return ret; } -/** - * bus_set_iommu - set iommu-callbacks for the bus - * @bus: bus. - * @ops: the callbacks provided by the iommu-driver - * - * This function is called by an iommu driver to set the iommu methods - * used for a particular bus. Drivers for devices on that bus can use - * the iommu-api after these ops are registered. - * This special function is needed because IOMMUs are usually devices on - * the bus itself, so the iommu drivers are not initialized when the bus - * is set up. With this function the iommu-driver can set the iommu-ops - * afterwards. - */ -int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops) -{ - if (bus->iommu_ops && ops && bus->iommu_ops != ops) - return -EBUSY; - - bus->iommu_ops = ops; - - return 0; -} -EXPORT_SYMBOL_GPL(bus_set_iommu); - bool iommu_present(struct bus_type *bus) { return bus->iommu_ops != NULL; diff --git a/drivers/iommu/msm_iommu.c b/drivers/iommu/msm_iommu.c index 6a24aa804ea3..16179a9a7283 100644 --- a/drivers/iommu/msm_iommu.c +++ b/drivers/iommu/msm_iommu.c @@ -792,8 +792,6 @@ static int msm_iommu_probe(struct platform_device *pdev) goto fail; } - bus_set_iommu(&platform_bus_type, &msm_iommu_ops); - pr_info("device mapped at %p, irq %d with %d ctx banks\n", iommu->base, iommu->irq, iommu->ncb); diff --git a/drivers/iommu/rockchip-iommu.c b/drivers/iommu/rockchip-iommu.c index ab57c4b8fade..a3fc59b814ab 100644 --- a/drivers/iommu/rockchip-iommu.c +++ b/drivers/iommu/rockchip-iommu.c @@ -1300,8 +1300,6 @@ static int rk_iommu_probe(struct platform_device *pdev) if (!dma_dev) dma_dev = &pdev->dev; - bus_set_iommu(&platform_bus_type, &rk_iommu_ops); - pm_runtime_enable(dev); for (i = 0; i < iommu->num_irq; i++) { diff --git a/drivers/iommu/s390-iommu.c b/drivers/iommu/s390-iommu.c index ac4dab6d79e2..3c071782f6f1 100644 --- a/drivers/iommu/s390-iommu.c +++ b/drivers/iommu/s390-iommu.c @@ -390,9 +390,3 @@ static const struct iommu_ops s390_iommu_ops = { .free = s390_domain_free, } }; - -static int __init s390_iommu_init(void) -{ - return bus_set_iommu(&pci_bus_type, &s390_iommu_ops); -} -subsys_initcall(s390_iommu_init); diff --git a/drivers/iommu/sprd-iommu.c b/drivers/iommu/sprd-iommu.c index 511959c8a14d..fadd2c907222 100644 --- a/drivers/iommu/sprd-iommu.c +++ b/drivers/iommu/sprd-iommu.c @@ -496,9 +496,6 @@ static int sprd_iommu_probe(struct platform_device *pdev) if (ret) goto remove_sysfs; - if (!iommu_present(&platform_bus_type)) - bus_set_iommu(&platform_bus_type, &sprd_iommu_ops); - ret = sprd_iommu_clk_enable(sdev); if (ret) goto unregister_iommu; @@ -534,8 +531,6 @@ static int sprd_iommu_remove(struct platform_device *pdev) iommu_group_put(sdev->group); sdev->group = NULL; - bus_set_iommu(&platform_bus_type, NULL); - platform_set_drvdata(pdev, NULL); iommu_device_sysfs_remove(&sdev->iommu); iommu_device_unregister(&sdev->iommu); diff --git a/drivers/iommu/sun50i-iommu.c b/drivers/iommu/sun50i-iommu.c index a84c63518773..cd9b74ee24de 100644 --- a/drivers/iommu/sun50i-iommu.c +++ b/drivers/iommu/sun50i-iommu.c @@ -965,8 +965,6 @@ static int sun50i_iommu_probe(struct platform_device *pdev) if (ret < 0) goto err_unregister; - bus_set_iommu(&platform_bus_type, &sun50i_iommu_ops); - return 0; err_unregister: diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 0013a1befe7b..35810f87ae74 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -416,7 +416,6 @@ static inline const struct iommu_ops *dev_iommu_ops(struct device *dev) return dev->iommu->iommu_dev->ops; } -extern int bus_set_iommu(struct bus_type *bus, const struct iommu_ops *ops); extern int bus_iommu_probe(struct bus_type *bus); extern bool iommu_present(struct bus_type *bus); extern bool device_iommu_capable(struct device *dev, enum iommu_cap cap); -- cgit v1.2.3 From fa49364cd5e65797014688cba623541083b3e5b0 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 16 Aug 2022 18:28:04 +0100 Subject: iommu/dma: Move public interfaces to linux/iommu.h The iommu-dma layer is now mostly encapsulated by iommu_dma_ops, with only a couple more public interfaces left pertaining to MSI integration. Since these depend on the main IOMMU API header anyway, move their declarations there, taking the opportunity to update the half-baked comments to proper kerneldoc along the way. Signed-off-by: Robin Murphy Acked-by: Catalin Marinas Acked-by: Marc Zyngier Link: https://lore.kernel.org/r/9cd99738f52094e6bed44bfee03fa4f288d20695.1660668998.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- arch/arm64/mm/dma-mapping.c | 2 +- drivers/iommu/dma-iommu.c | 15 +++++++++++++-- drivers/irqchip/irq-gic-v2m.c | 2 +- drivers/irqchip/irq-gic-v3-its.c | 2 +- drivers/irqchip/irq-gic-v3-mbi.c | 2 +- drivers/irqchip/irq-ls-scfg-msi.c | 2 +- drivers/vfio/vfio_iommu_type1.c | 1 - include/linux/dma-iommu.h | 40 --------------------------------------- include/linux/iommu.h | 36 +++++++++++++++++++++++++++++++++++ 9 files changed, 54 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 599cf81f5685..7d7e9a046305 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 17dd683b2fce..6809b33ac9df 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -1633,6 +1633,13 @@ out_free_page: return NULL; } +/** + * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU domain + * @desc: MSI descriptor, will store the MSI page + * @msi_addr: MSI target address to be mapped + * + * Return: 0 on success or negative error code if the mapping failed. + */ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) { struct device *dev = msi_desc_to_dev(desc); @@ -1661,8 +1668,12 @@ int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) return 0; } -void iommu_dma_compose_msi_msg(struct msi_desc *desc, - struct msi_msg *msg) +/** + * iommu_dma_compose_msi_msg() - Apply translation to an MSI message + * @desc: MSI descriptor prepared by iommu_dma_prepare_msi() + * @msg: MSI message containing target physical address + */ +void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) { struct device *dev = msi_desc_to_dev(desc); const struct iommu_domain *domain = iommu_get_domain_for_dev(dev); diff --git a/drivers/irqchip/irq-gic-v2m.c b/drivers/irqchip/irq-gic-v2m.c index b249d4df899e..6e1ac330d7a6 100644 --- a/drivers/irqchip/irq-gic-v2m.c +++ b/drivers/irqchip/irq-gic-v2m.c @@ -13,7 +13,7 @@ #define pr_fmt(fmt) "GICv2m: " fmt #include -#include +#include #include #include #include diff --git a/drivers/irqchip/irq-gic-v3-its.c b/drivers/irqchip/irq-gic-v3-its.c index 5ff09de6c48f..e7d8d4208ee6 100644 --- a/drivers/irqchip/irq-gic-v3-its.c +++ b/drivers/irqchip/irq-gic-v3-its.c @@ -11,9 +11,9 @@ #include #include #include -#include #include #include +#include #include #include #include diff --git a/drivers/irqchip/irq-gic-v3-mbi.c b/drivers/irqchip/irq-gic-v3-mbi.c index a2163d32f17d..e1efdec9e9ac 100644 --- a/drivers/irqchip/irq-gic-v3-mbi.c +++ b/drivers/irqchip/irq-gic-v3-mbi.c @@ -6,7 +6,7 @@ #define pr_fmt(fmt) "GICv3: " fmt -#include +#include #include #include #include diff --git a/drivers/irqchip/irq-ls-scfg-msi.c b/drivers/irqchip/irq-ls-scfg-msi.c index b4927e425f7b..527c90e0920e 100644 --- a/drivers/irqchip/irq-ls-scfg-msi.c +++ b/drivers/irqchip/irq-ls-scfg-msi.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -18,7 +19,6 @@ #include #include #include -#include #define MSI_IRQS_PER_MSIR 32 #define MSI_MSIR_OFFSET 4 diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index db516c90a977..2f41e32c640c 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -37,7 +37,6 @@ #include #include #include -#include #include #include "vfio.h" diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 24607dc3c2ac..e83de4f1f3d6 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -15,27 +15,10 @@ /* Domain management interface for IOMMU drivers */ int iommu_get_dma_cookie(struct iommu_domain *domain); -int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); void iommu_put_dma_cookie(struct iommu_domain *domain); -/* Setup call for arch DMA mapping code */ -void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); int iommu_dma_init_fq(struct iommu_domain *domain); -/* The DMA API isn't _quite_ the whole story, though... */ -/* - * iommu_dma_prepare_msi() - Map the MSI page in the IOMMU device - * - * The MSI page will be stored in @desc. - * - * Return: 0 on success otherwise an error describing the failure. - */ -int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); - -/* Update the MSI message if required. */ -void iommu_dma_compose_msi_msg(struct msi_desc *desc, - struct msi_msg *msg); - void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, @@ -46,15 +29,8 @@ extern bool iommu_dma_forcedac; #else /* CONFIG_IOMMU_DMA */ struct iommu_domain; -struct msi_desc; -struct msi_msg; struct device; -static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, - u64 dma_limit) -{ -} - static inline int iommu_dma_init_fq(struct iommu_domain *domain) { return -EINVAL; @@ -65,26 +41,10 @@ static inline int iommu_get_dma_cookie(struct iommu_domain *domain) return -ENODEV; } -static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) -{ - return -ENODEV; -} - static inline void iommu_put_dma_cookie(struct iommu_domain *domain) { } -static inline int iommu_dma_prepare_msi(struct msi_desc *desc, - phys_addr_t msi_addr) -{ - return 0; -} - -static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, - struct msi_msg *msg) -{ -} - static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) { } diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 35810f87ae74..a325532aeab5 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -1063,4 +1063,40 @@ void iommu_debugfs_setup(void); static inline void iommu_debugfs_setup(void) {} #endif +#ifdef CONFIG_IOMMU_DMA +#include + +/* Setup call for arch DMA mapping code */ +void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit); + +int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base); + +int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr); +void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg); + +#else /* CONFIG_IOMMU_DMA */ + +struct msi_desc; +struct msi_msg; + +static inline void iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 dma_limit) +{ +} + +static inline int iommu_get_msi_cookie(struct iommu_domain *domain, dma_addr_t base) +{ + return -ENODEV; +} + +static inline int iommu_dma_prepare_msi(struct msi_desc *desc, phys_addr_t msi_addr) +{ + return 0; +} + +static inline void iommu_dma_compose_msi_msg(struct msi_desc *desc, struct msi_msg *msg) +{ +} + +#endif /* CONFIG_IOMMU_DMA */ + #endif /* __LINUX_IOMMU_H */ -- cgit v1.2.3 From aaac38f614871df252aa7459647bf68d42f7c3e7 Mon Sep 17 00:00:00 2001 From: Vasant Hegde Date: Thu, 25 Aug 2022 06:39:36 +0000 Subject: iommu/amd: Initial support for AMD IOMMU v2 page table Introduce IO page table framework support for AMD IOMMU v2 page table. This patch implements 4 level page table within iommu amd driver and supports 4K/2M/1G page sizes. Signed-off-by: Vasant Hegde Link: https://lore.kernel.org/r/20220825063939.8360-7-vasant.hegde@amd.com Signed-off-by: Joerg Roedel --- drivers/iommu/amd/Makefile | 2 +- drivers/iommu/amd/amd_iommu_types.h | 5 +- drivers/iommu/amd/io_pgtable_v2.c | 415 ++++++++++++++++++++++++++++++++++++ drivers/iommu/io-pgtable.c | 1 + include/linux/io-pgtable.h | 2 + 5 files changed, 423 insertions(+), 2 deletions(-) create mode 100644 drivers/iommu/amd/io_pgtable_v2.c (limited to 'include/linux') diff --git a/drivers/iommu/amd/Makefile b/drivers/iommu/amd/Makefile index a935f8f4b974..773d8aa00283 100644 --- a/drivers/iommu/amd/Makefile +++ b/drivers/iommu/amd/Makefile @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-only -obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o +obj-$(CONFIG_AMD_IOMMU) += iommu.o init.o quirks.o io_pgtable.o io_pgtable_v2.o obj-$(CONFIG_AMD_IOMMU_DEBUGFS) += debugfs.o obj-$(CONFIG_AMD_IOMMU_V2) += iommu_v2.o diff --git a/drivers/iommu/amd/amd_iommu_types.h b/drivers/iommu/amd/amd_iommu_types.h index 5b1019dab328..660c1f044912 100644 --- a/drivers/iommu/amd/amd_iommu_types.h +++ b/drivers/iommu/amd/amd_iommu_types.h @@ -276,6 +276,8 @@ * 512GB Pages are not supported due to a hardware bug */ #define AMD_IOMMU_PGSIZES ((~0xFFFUL) & ~(2ULL << 38)) +/* 4K, 2MB, 1G page sizes are supported */ +#define AMD_IOMMU_PGSIZES_V2 (PAGE_SIZE | (1ULL << 21) | (1ULL << 30)) /* Bit value definition for dte irq remapping fields*/ #define DTE_IRQ_PHYS_ADDR_MASK (((1ULL << 45)-1) << 6) @@ -526,7 +528,8 @@ struct amd_io_pgtable { struct io_pgtable iop; int mode; u64 *root; - atomic64_t pt_root; /* pgtable root and pgtable mode */ + atomic64_t pt_root; /* pgtable root and pgtable mode */ + u64 *pgd; /* v2 pgtable pgd pointer */ }; /* diff --git a/drivers/iommu/amd/io_pgtable_v2.c b/drivers/iommu/amd/io_pgtable_v2.c new file mode 100644 index 000000000000..8638ddf6fb3b --- /dev/null +++ b/drivers/iommu/amd/io_pgtable_v2.c @@ -0,0 +1,415 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * CPU-agnostic AMD IO page table v2 allocator. + * + * Copyright (C) 2022 Advanced Micro Devices, Inc. + * Author: Suravee Suthikulpanit + * Author: Vasant Hegde + */ + +#define pr_fmt(fmt) "AMD-Vi: " fmt +#define dev_fmt(fmt) pr_fmt(fmt) + +#include +#include +#include + +#include + +#include "amd_iommu_types.h" +#include "amd_iommu.h" + +#define IOMMU_PAGE_PRESENT BIT_ULL(0) /* Is present */ +#define IOMMU_PAGE_RW BIT_ULL(1) /* Writeable */ +#define IOMMU_PAGE_USER BIT_ULL(2) /* Userspace addressable */ +#define IOMMU_PAGE_PWT BIT_ULL(3) /* Page write through */ +#define IOMMU_PAGE_PCD BIT_ULL(4) /* Page cache disabled */ +#define IOMMU_PAGE_ACCESS BIT_ULL(5) /* Was accessed (updated by IOMMU) */ +#define IOMMU_PAGE_DIRTY BIT_ULL(6) /* Was written to (updated by IOMMU) */ +#define IOMMU_PAGE_PSE BIT_ULL(7) /* Page Size Extensions */ +#define IOMMU_PAGE_NX BIT_ULL(63) /* No execute */ + +#define MAX_PTRS_PER_PAGE 512 + +#define IOMMU_PAGE_SIZE_2M BIT_ULL(21) +#define IOMMU_PAGE_SIZE_1G BIT_ULL(30) + + +static inline int get_pgtable_level(void) +{ + /* 5 level page table is not supported */ + return PAGE_MODE_4_LEVEL; +} + +static inline bool is_large_pte(u64 pte) +{ + return (pte & IOMMU_PAGE_PSE); +} + +static inline void *alloc_pgtable_page(void) +{ + return (void *)get_zeroed_page(GFP_KERNEL); +} + +static inline u64 set_pgtable_attr(u64 *page) +{ + u64 prot; + + prot = IOMMU_PAGE_PRESENT | IOMMU_PAGE_RW | IOMMU_PAGE_USER; + prot |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + + return (iommu_virt_to_phys(page) | prot); +} + +static inline void *get_pgtable_pte(u64 pte) +{ + return iommu_phys_to_virt(pte & PM_ADDR_MASK); +} + +static u64 set_pte_attr(u64 paddr, u64 pg_size, int prot) +{ + u64 pte; + + pte = __sme_set(paddr & PM_ADDR_MASK); + pte |= IOMMU_PAGE_PRESENT | IOMMU_PAGE_USER; + pte |= IOMMU_PAGE_ACCESS | IOMMU_PAGE_DIRTY; + + if (prot & IOMMU_PROT_IW) + pte |= IOMMU_PAGE_RW; + + /* Large page */ + if (pg_size == IOMMU_PAGE_SIZE_1G || pg_size == IOMMU_PAGE_SIZE_2M) + pte |= IOMMU_PAGE_PSE; + + return pte; +} + +static inline u64 get_alloc_page_size(u64 size) +{ + if (size >= IOMMU_PAGE_SIZE_1G) + return IOMMU_PAGE_SIZE_1G; + + if (size >= IOMMU_PAGE_SIZE_2M) + return IOMMU_PAGE_SIZE_2M; + + return PAGE_SIZE; +} + +static inline int page_size_to_level(u64 pg_size) +{ + if (pg_size == IOMMU_PAGE_SIZE_1G) + return PAGE_MODE_3_LEVEL; + if (pg_size == IOMMU_PAGE_SIZE_2M) + return PAGE_MODE_2_LEVEL; + + return PAGE_MODE_1_LEVEL; +} + +static inline void free_pgtable_page(u64 *pt) +{ + free_page((unsigned long)pt); +} + +static void free_pgtable(u64 *pt, int level) +{ + u64 *p; + int i; + + for (i = 0; i < MAX_PTRS_PER_PAGE; i++) { + /* PTE present? */ + if (!IOMMU_PTE_PRESENT(pt[i])) + continue; + + if (is_large_pte(pt[i])) + continue; + + /* + * Free the next level. No need to look at l1 tables here since + * they can only contain leaf PTEs; just free them directly. + */ + p = get_pgtable_pte(pt[i]); + if (level > 2) + free_pgtable(p, level - 1); + else + free_pgtable_page(p); + } + + free_pgtable_page(pt); +} + +/* Allocate page table */ +static u64 *v2_alloc_pte(u64 *pgd, unsigned long iova, + unsigned long pg_size, bool *updated) +{ + u64 *pte, *page; + int level, end_level; + + level = get_pgtable_level() - 1; + end_level = page_size_to_level(pg_size); + pte = &pgd[PM_LEVEL_INDEX(level, iova)]; + iova = PAGE_SIZE_ALIGN(iova, PAGE_SIZE); + + while (level >= end_level) { + u64 __pte, __npte; + + __pte = *pte; + + if (IOMMU_PTE_PRESENT(__pte) && is_large_pte(__pte)) { + /* Unmap large pte */ + cmpxchg64(pte, *pte, 0ULL); + *updated = true; + continue; + } + + if (!IOMMU_PTE_PRESENT(__pte)) { + page = alloc_pgtable_page(); + if (!page) + return NULL; + + __npte = set_pgtable_attr(page); + /* pte could have been changed somewhere. */ + if (cmpxchg64(pte, __pte, __npte) != __pte) + free_pgtable_page(page); + else if (IOMMU_PTE_PRESENT(__pte)) + *updated = true; + + continue; + } + + level -= 1; + pte = get_pgtable_pte(__pte); + pte = &pte[PM_LEVEL_INDEX(level, iova)]; + } + + /* Tear down existing pte entries */ + if (IOMMU_PTE_PRESENT(*pte)) { + u64 *__pte; + + *updated = true; + __pte = get_pgtable_pte(*pte); + cmpxchg64(pte, *pte, 0ULL); + if (pg_size == IOMMU_PAGE_SIZE_1G) + free_pgtable(__pte, end_level - 1); + else if (pg_size == IOMMU_PAGE_SIZE_2M) + free_pgtable_page(__pte); + } + + return pte; +} + +/* + * This function checks if there is a PTE for a given dma address. + * If there is one, it returns the pointer to it. + */ +static u64 *fetch_pte(struct amd_io_pgtable *pgtable, + unsigned long iova, unsigned long *page_size) +{ + u64 *pte; + int level; + + level = get_pgtable_level() - 1; + pte = &pgtable->pgd[PM_LEVEL_INDEX(level, iova)]; + /* Default page size is 4K */ + *page_size = PAGE_SIZE; + + while (level) { + /* Not present */ + if (!IOMMU_PTE_PRESENT(*pte)) + return NULL; + + /* Walk to the next level */ + pte = get_pgtable_pte(*pte); + pte = &pte[PM_LEVEL_INDEX(level - 1, iova)]; + + /* Large page */ + if (is_large_pte(*pte)) { + if (level == PAGE_MODE_3_LEVEL) + *page_size = IOMMU_PAGE_SIZE_1G; + else if (level == PAGE_MODE_2_LEVEL) + *page_size = IOMMU_PAGE_SIZE_2M; + else + return NULL; /* Wrongly set PSE bit in PTE */ + + break; + } + + level -= 1; + } + + return pte; +} + +static int iommu_v2_map_pages(struct io_pgtable_ops *ops, unsigned long iova, + phys_addr_t paddr, size_t pgsize, size_t pgcount, + int prot, gfp_t gfp, size_t *mapped) +{ + struct protection_domain *pdom = io_pgtable_ops_to_domain(ops); + struct io_pgtable_cfg *cfg = &pdom->iop.iop.cfg; + u64 *pte; + unsigned long map_size; + unsigned long mapped_size = 0; + unsigned long o_iova = iova; + size_t size = pgcount << __ffs(pgsize); + int count = 0; + int ret = 0; + bool updated = false; + + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize) || !pgcount) + return -EINVAL; + + if (!(prot & IOMMU_PROT_MASK)) + return -EINVAL; + + while (mapped_size < size) { + map_size = get_alloc_page_size(pgsize); + pte = v2_alloc_pte(pdom->iop.pgd, iova, map_size, &updated); + if (!pte) { + ret = -EINVAL; + goto out; + } + + *pte = set_pte_attr(paddr, map_size, prot); + + count++; + iova += map_size; + paddr += map_size; + mapped_size += map_size; + } + +out: + if (updated) { + if (count > 1) + amd_iommu_flush_tlb(&pdom->domain, 0); + else + amd_iommu_flush_page(&pdom->domain, 0, o_iova); + } + + if (mapped) + *mapped += mapped_size; + + return ret; +} + +static unsigned long iommu_v2_unmap_pages(struct io_pgtable_ops *ops, + unsigned long iova, + size_t pgsize, size_t pgcount, + struct iommu_iotlb_gather *gather) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + struct io_pgtable_cfg *cfg = &pgtable->iop.cfg; + unsigned long unmap_size; + unsigned long unmapped = 0; + size_t size = pgcount << __ffs(pgsize); + u64 *pte; + + if (WARN_ON(!pgsize || (pgsize & cfg->pgsize_bitmap) != pgsize || !pgcount)) + return 0; + + while (unmapped < size) { + pte = fetch_pte(pgtable, iova, &unmap_size); + if (!pte) + return unmapped; + + *pte = 0ULL; + + iova = (iova & ~(unmap_size - 1)) + unmap_size; + unmapped += unmap_size; + } + + return unmapped; +} + +static phys_addr_t iommu_v2_iova_to_phys(struct io_pgtable_ops *ops, unsigned long iova) +{ + struct amd_io_pgtable *pgtable = io_pgtable_ops_to_data(ops); + unsigned long offset_mask, pte_pgsize; + u64 *pte, __pte; + + pte = fetch_pte(pgtable, iova, &pte_pgsize); + if (!pte || !IOMMU_PTE_PRESENT(*pte)) + return 0; + + offset_mask = pte_pgsize - 1; + __pte = __sme_clr(*pte & PM_ADDR_MASK); + + return (__pte & ~offset_mask) | (iova & offset_mask); +} + +/* + * ---------------------------------------------------- + */ +static void v2_tlb_flush_all(void *cookie) +{ +} + +static void v2_tlb_flush_walk(unsigned long iova, size_t size, + size_t granule, void *cookie) +{ +} + +static void v2_tlb_add_page(struct iommu_iotlb_gather *gather, + unsigned long iova, size_t granule, + void *cookie) +{ +} + +static const struct iommu_flush_ops v2_flush_ops = { + .tlb_flush_all = v2_tlb_flush_all, + .tlb_flush_walk = v2_tlb_flush_walk, + .tlb_add_page = v2_tlb_add_page, +}; + +static void v2_free_pgtable(struct io_pgtable *iop) +{ + struct protection_domain *pdom; + struct amd_io_pgtable *pgtable = container_of(iop, struct amd_io_pgtable, iop); + + pdom = container_of(pgtable, struct protection_domain, iop); + if (!(pdom->flags & PD_IOMMUV2_MASK)) + return; + + /* + * Make changes visible to IOMMUs. No need to clear gcr3 entry + * as gcr3 table is already freed. + */ + amd_iommu_domain_update(pdom); + + /* Free page table */ + free_pgtable(pgtable->pgd, get_pgtable_level()); +} + +static struct io_pgtable *v2_alloc_pgtable(struct io_pgtable_cfg *cfg, void *cookie) +{ + struct amd_io_pgtable *pgtable = io_pgtable_cfg_to_data(cfg); + struct protection_domain *pdom = (struct protection_domain *)cookie; + int ret; + + pgtable->pgd = alloc_pgtable_page(); + if (!pgtable->pgd) + return NULL; + + ret = amd_iommu_domain_set_gcr3(&pdom->domain, 0, iommu_virt_to_phys(pgtable->pgd)); + if (ret) + goto err_free_pgd; + + pgtable->iop.ops.map_pages = iommu_v2_map_pages; + pgtable->iop.ops.unmap_pages = iommu_v2_unmap_pages; + pgtable->iop.ops.iova_to_phys = iommu_v2_iova_to_phys; + + cfg->pgsize_bitmap = AMD_IOMMU_PGSIZES_V2, + cfg->ias = IOMMU_IN_ADDR_BIT_SIZE, + cfg->oas = IOMMU_OUT_ADDR_BIT_SIZE, + cfg->tlb = &v2_flush_ops; + + return &pgtable->iop; + +err_free_pgd: + free_pgtable_page(pgtable->pgd); + + return NULL; +} + +struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns = { + .alloc = v2_alloc_pgtable, + .free = v2_free_pgtable, +}; diff --git a/drivers/iommu/io-pgtable.c b/drivers/iommu/io-pgtable.c index f4bfcef98297..ac02a45ed798 100644 --- a/drivers/iommu/io-pgtable.c +++ b/drivers/iommu/io-pgtable.c @@ -27,6 +27,7 @@ io_pgtable_init_table[IO_PGTABLE_NUM_FMTS] = { #endif #ifdef CONFIG_AMD_IOMMU [AMD_IOMMU_V1] = &io_pgtable_amd_iommu_v1_init_fns, + [AMD_IOMMU_V2] = &io_pgtable_amd_iommu_v2_init_fns, #endif }; diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h index ca98aeadcc80..ffe616db9409 100644 --- a/include/linux/io-pgtable.h +++ b/include/linux/io-pgtable.h @@ -16,6 +16,7 @@ enum io_pgtable_fmt { ARM_V7S, ARM_MALI_LPAE, AMD_IOMMU_V1, + AMD_IOMMU_V2, APPLE_DART, IO_PGTABLE_NUM_FMTS, }; @@ -260,6 +261,7 @@ extern struct io_pgtable_init_fns io_pgtable_arm_64_lpae_s2_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_v7s_init_fns; extern struct io_pgtable_init_fns io_pgtable_arm_mali_lpae_init_fns; extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v1_init_fns; +extern struct io_pgtable_init_fns io_pgtable_amd_iommu_v2_init_fns; extern struct io_pgtable_init_fns io_pgtable_apple_dart_init_fns; #endif /* __IO_PGTABLE_H */ -- cgit v1.2.3 From e77cab77f2cb3a1ca2ba8df4af45bb35617ac16d Mon Sep 17 00:00:00 2001 From: Ilpo Järvinen Date: Thu, 1 Sep 2022 17:39:32 +0300 Subject: serial: Create uart_xmit_advance() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A very common pattern in the drivers is to advance xmit tail index and do bookkeeping of Tx'ed characters. Create uart_xmit_advance() to handle it. Reviewed-by: Andy Shevchenko Cc: stable Signed-off-by: Ilpo Järvinen Link: https://lore.kernel.org/r/20220901143934.8850-2-ilpo.jarvinen@linux.intel.com Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 6e4f4765d209..1eaea9fe44d8 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -624,6 +624,23 @@ struct uart_state { /* number of characters left in xmit buffer before we ask for more */ #define WAKEUP_CHARS 256 +/** + * uart_xmit_advance - Advance xmit buffer and account Tx'ed chars + * @up: uart_port structure describing the port + * @chars: number of characters sent + * + * This function advances the tail of circular xmit buffer by the number of + * @chars transmitted and handles accounting of transmitted bytes (into + * @up's icount.tx). + */ +static inline void uart_xmit_advance(struct uart_port *up, unsigned int chars) +{ + struct circ_buf *xmit = &up->state->xmit; + + xmit->tail = (xmit->tail + chars) & (UART_XMIT_SIZE - 1); + up->icount.tx += chars; +} + struct module; struct tty_driver; -- cgit v1.2.3 From 2f79cdfe58c13949bbbb65ba5926abfe9561d0ec Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Aug 2022 09:46:12 -0700 Subject: fs: only do a memory barrier for the first set_buffer_uptodate() Commit d4252071b97d ("add barriers to buffer_uptodate and set_buffer_uptodate") added proper memory barriers to the buffer head BH_Uptodate bit, so that anybody who tests a buffer for being up-to-date will be guaranteed to actually see initialized state. However, that commit didn't _just_ add the memory barrier, it also ended up dropping the "was it already set" logic that the BUFFER_FNS() macro had. That's conceptually the right thing for a generic "this is a memory barrier" operation, but in the case of the buffer contents, we really only care about the memory barrier for the _first_ time we set the bit, in that the only memory ordering protection we need is to avoid anybody seeing uninitialized memory contents. Any other access ordering wouldn't be about the BH_Uptodate bit anyway, and would require some other proper lock (typically BH_Lock or the folio lock). A reader that races with somebody invalidating the buffer head isn't an issue wrt the memory ordering, it's a serialization issue. Now, you'd think that the buffer head operations don't matter in this day and age (and I certainly thought so), but apparently some loads still end up being heavy users of buffer heads. In particular, the kernel test robot reported that not having this bit access optimization in place caused a noticeable direct IO performance regression on ext4: fxmark.ssd_ext4_no_jnl_DWTL_54_directio.works/sec -26.5% regression although you presumably need a fast disk and a lot of cores to actually notice. Link: https://lore.kernel.org/all/Yw8L7HTZ%2FdE2%2Fo9C@xsang-OptiPlex-9020/ Reported-by: kernel test robot Tested-by: Fengwei Yin Cc: Mikulas Patocka Cc: Matthew Wilcox (Oracle) Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 089c9ade4325..df518c429667 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -137,6 +137,17 @@ BUFFER_FNS(Defer_Completion, defer_completion) static __always_inline void set_buffer_uptodate(struct buffer_head *bh) { + /* + * If somebody else already set this uptodate, they will + * have done the memory barrier, and a reader will thus + * see *some* valid buffer state. + * + * Any other serialization (with IO errors or whatever that + * might clear the bit) has to come from other state (eg BH_Lock). + */ + if (test_bit(BH_Uptodate, &bh->b_state)) + return; + /* * make it consistent with folio_mark_uptodate * pairs with smp_load_acquire in buffer_uptodate -- cgit v1.2.3 From f2042ed21da7f8886c93efefff61f93e6d57e9bd Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 16 Aug 2022 18:28:05 +0100 Subject: iommu/dma: Make header private Now that dma-iommu.h only contains internal interfaces, make it private to the IOMMU subsytem. Signed-off-by: Robin Murphy Link: https://lore.kernel.org/r/b237e06c56a101f77af142a54b629b27aa179d22.1660668998.git.robin.murphy@arm.com [ joro : re-add stub for iommu_dma_get_resv_regions ] Signed-off-by: Joerg Roedel --- MAINTAINERS | 2 +- drivers/acpi/viot.c | 1 - drivers/gpu/drm/exynos/exynos_drm_dma.c | 1 - drivers/iommu/amd/iommu.c | 2 +- drivers/iommu/apple-dart.c | 3 +- drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c | 2 +- drivers/iommu/arm/arm-smmu/arm-smmu.c | 2 +- drivers/iommu/dma-iommu.c | 3 +- drivers/iommu/dma-iommu.h | 42 +++++++++++++++++++++++ drivers/iommu/intel/iommu.c | 2 +- drivers/iommu/iommu.c | 3 +- drivers/iommu/virtio-iommu.c | 3 +- include/linux/dma-iommu.h | 53 ----------------------------- 13 files changed, 55 insertions(+), 64 deletions(-) create mode 100644 drivers/iommu/dma-iommu.h delete mode 100644 include/linux/dma-iommu.h (limited to 'include/linux') diff --git a/MAINTAINERS b/MAINTAINERS index d30f26e07cd3..09abd5444fe7 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -10626,8 +10626,8 @@ L: iommu@lists.linux.dev S: Maintained T: git git://git.kernel.org/pub/scm/linux/kernel/git/joro/iommu.git F: drivers/iommu/dma-iommu.c +F: drivers/iommu/dma-iommu.h F: drivers/iommu/iova.c -F: include/linux/dma-iommu.h F: include/linux/iova.h IOMMU SUBSYSTEM diff --git a/drivers/acpi/viot.c b/drivers/acpi/viot.c index 6132092dab2a..ed752cbbe636 100644 --- a/drivers/acpi/viot.c +++ b/drivers/acpi/viot.c @@ -19,7 +19,6 @@ #define pr_fmt(fmt) "ACPI: VIOT: " fmt #include -#include #include #include #include diff --git a/drivers/gpu/drm/exynos/exynos_drm_dma.c b/drivers/gpu/drm/exynos/exynos_drm_dma.c index bf33c3084cb4..a971590b8132 100644 --- a/drivers/gpu/drm/exynos/exynos_drm_dma.c +++ b/drivers/gpu/drm/exynos/exynos_drm_dma.c @@ -4,7 +4,6 @@ // Author: Inki Dae // Author: Andrzej Hajda -#include #include #include #include diff --git a/drivers/iommu/amd/iommu.c b/drivers/iommu/amd/iommu.c index da18f40f9abc..b46b0e8ecbdf 100644 --- a/drivers/iommu/amd/iommu.c +++ b/drivers/iommu/amd/iommu.c @@ -18,7 +18,6 @@ #include #include #include -#include #include #include #include @@ -40,6 +39,7 @@ #include #include "amd_iommu.h" +#include "../dma-iommu.h" #include "../irq_remapping.h" #define CMD_SET_TYPE(cmd, t) ((cmd)->data[1] |= ((t) << 28)) diff --git a/drivers/iommu/apple-dart.c b/drivers/iommu/apple-dart.c index 437aed674fba..79643b84cb14 100644 --- a/drivers/iommu/apple-dart.c +++ b/drivers/iommu/apple-dart.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include @@ -33,6 +32,8 @@ #include #include +#include "dma-iommu.h" + #define DART_MAX_STREAMS 16 #define DART_MAX_TTBR 4 #define MAX_DARTS_PER_DEVICE 2 diff --git a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c index 241efb7482e9..b788a38d8fdf 100644 --- a/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c +++ b/drivers/iommu/arm/arm-smmu-v3/arm-smmu-v3.c @@ -14,7 +14,6 @@ #include #include #include -#include #include #include #include @@ -29,6 +28,7 @@ #include #include "arm-smmu-v3.h" +#include "../../dma-iommu.h" #include "../../iommu-sva-lib.h" static bool disable_bypass = true; diff --git a/drivers/iommu/arm/arm-smmu/arm-smmu.c b/drivers/iommu/arm/arm-smmu/arm-smmu.c index 4049980d4914..6c1114a4d6cc 100644 --- a/drivers/iommu/arm/arm-smmu/arm-smmu.c +++ b/drivers/iommu/arm/arm-smmu/arm-smmu.c @@ -21,7 +21,6 @@ #include #include #include -#include #include #include #include @@ -40,6 +39,7 @@ #include #include "arm-smmu.h" +#include "../../dma-iommu.h" /* * Apparently, some Qualcomm arm64 platforms which appear to expose their SMMU diff --git a/drivers/iommu/dma-iommu.c b/drivers/iommu/dma-iommu.c index 6809b33ac9df..9297b741f5e8 100644 --- a/drivers/iommu/dma-iommu.c +++ b/drivers/iommu/dma-iommu.c @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -30,6 +29,8 @@ #include #include +#include "dma-iommu.h" + struct iommu_dma_msi_page { struct list_head list; dma_addr_t iova; diff --git a/drivers/iommu/dma-iommu.h b/drivers/iommu/dma-iommu.h new file mode 100644 index 000000000000..942790009292 --- /dev/null +++ b/drivers/iommu/dma-iommu.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0-only */ +/* + * Copyright (C) 2014-2015 ARM Ltd. + */ +#ifndef __DMA_IOMMU_H +#define __DMA_IOMMU_H + +#include + +#ifdef CONFIG_IOMMU_DMA + +int iommu_get_dma_cookie(struct iommu_domain *domain); +void iommu_put_dma_cookie(struct iommu_domain *domain); + +int iommu_dma_init_fq(struct iommu_domain *domain); + +void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); + +extern bool iommu_dma_forcedac; + +#else /* CONFIG_IOMMU_DMA */ + +static inline int iommu_dma_init_fq(struct iommu_domain *domain) +{ + return -EINVAL; +} + +static inline int iommu_get_dma_cookie(struct iommu_domain *domain) +{ + return -ENODEV; +} + +static inline void iommu_put_dma_cookie(struct iommu_domain *domain) +{ +} + +static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) +{ +} + +#endif /* CONFIG_IOMMU_DMA */ +#endif /* __DMA_IOMMU_H */ diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1ff58d25b713..3bbd865910a6 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -15,7 +15,6 @@ #include #include -#include #include #include #include @@ -26,6 +25,7 @@ #include #include "iommu.h" +#include "../dma-iommu.h" #include "../irq_remapping.h" #include "../iommu-sva-lib.h" #include "pasid.h" diff --git a/drivers/iommu/iommu.c b/drivers/iommu/iommu.c index 83688db121f0..c864cbb16523 100644 --- a/drivers/iommu/iommu.c +++ b/drivers/iommu/iommu.c @@ -8,7 +8,6 @@ #include #include -#include #include #include #include @@ -30,6 +29,8 @@ #include #include +#include "dma-iommu.h" + static struct kset *iommu_group_kset; static DEFINE_IDA(iommu_group_ida); diff --git a/drivers/iommu/virtio-iommu.c b/drivers/iommu/virtio-iommu.c index 31ab9d622b67..5ee24dc6f5ae 100644 --- a/drivers/iommu/virtio-iommu.c +++ b/drivers/iommu/virtio-iommu.c @@ -8,7 +8,6 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include #include #include #include @@ -23,6 +22,8 @@ #include +#include "dma-iommu.h" + #define MSI_IOVA_BASE 0x8000000 #define MSI_IOVA_LENGTH 0x100000 diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h deleted file mode 100644 index e83de4f1f3d6..000000000000 --- a/include/linux/dma-iommu.h +++ /dev/null @@ -1,53 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0-only */ -/* - * Copyright (C) 2014-2015 ARM Ltd. - */ -#ifndef __DMA_IOMMU_H -#define __DMA_IOMMU_H - -#include -#include - -#ifdef CONFIG_IOMMU_DMA -#include -#include -#include - -/* Domain management interface for IOMMU drivers */ -int iommu_get_dma_cookie(struct iommu_domain *domain); -void iommu_put_dma_cookie(struct iommu_domain *domain); - -int iommu_dma_init_fq(struct iommu_domain *domain); - -void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); - -void iommu_dma_free_cpu_cached_iovas(unsigned int cpu, - struct iommu_domain *domain); - -extern bool iommu_dma_forcedac; - -#else /* CONFIG_IOMMU_DMA */ - -struct iommu_domain; -struct device; - -static inline int iommu_dma_init_fq(struct iommu_domain *domain) -{ - return -EINVAL; -} - -static inline int iommu_get_dma_cookie(struct iommu_domain *domain) -{ - return -ENODEV; -} - -static inline void iommu_put_dma_cookie(struct iommu_domain *domain) -{ -} - -static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) -{ -} - -#endif /* CONFIG_IOMMU_DMA */ -#endif /* __DMA_IOMMU_H */ -- cgit v1.2.3 From 9cd4f1434479f1ac25c440c421fbf52069079914 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Sun, 11 Sep 2022 11:18:45 +0800 Subject: iommu/vt-d: Fix possible recursive locking in intel_iommu_init() The global rwsem dmar_global_lock was introduced by commit 3a5670e8ac932 ("iommu/vt-d: Introduce a rwsem to protect global data structures"). It is used to protect DMAR related global data from DMAR hotplug operations. The dmar_global_lock used in the intel_iommu_init() might cause recursive locking issue, for example, intel_iommu_get_resv_regions() is taking the dmar_global_lock from within a section where intel_iommu_init() already holds it via probe_acpi_namespace_devices(). Using dmar_global_lock in intel_iommu_init() could be relaxed since it is unlikely that any IO board must be hot added before the IOMMU subsystem is initialized. This eliminates the possible recursive locking issue by moving down DMAR hotplug support after the IOMMU is initialized and removing the uses of dmar_global_lock in intel_iommu_init(). Fixes: d5692d4af08cd ("iommu/vt-d: Fix suspicious RCU usage in probe_acpi_namespace_devices()") Reported-by: Robin Murphy Signed-off-by: Lu Baolu Reviewed-by: Kevin Tian Link: https://lore.kernel.org/r/894db0ccae854b35c73814485569b634237b5538.1657034828.git.robin.murphy@arm.com Link: https://lore.kernel.org/r/20220718235325.3952426-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 7 +++++++ drivers/iommu/intel/iommu.c | 27 ++------------------------- include/linux/dmar.h | 4 +++- 3 files changed, 12 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 5a8f780e7ffd..497c912ad9e1 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -2349,6 +2349,13 @@ static int dmar_device_hotplug(acpi_handle handle, bool insert) if (!dmar_in_use()) return 0; + /* + * It's unlikely that any I/O board is hot added before the IOMMU + * subsystem is initialized. + */ + if (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled) + return -EOPNOTSUPP; + if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) { tmp = handle; } else { diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 64d30895a4c8..1f2cd43cf9bc 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3019,13 +3019,7 @@ static int __init init_dmars(void) #ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { - /* - * Call dmar_alloc_hwirq() with dmar_global_lock held, - * could cause possible lock race condition. - */ - up_write(&dmar_global_lock); ret = intel_svm_enable_prq(iommu); - down_write(&dmar_global_lock); if (ret) goto free_iommu; } @@ -3938,7 +3932,6 @@ int __init intel_iommu_init(void) force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || platform_optin_force_iommu(); - down_write(&dmar_global_lock); if (dmar_table_init()) { if (force_on) panic("tboot: Failed to initialize DMAR table\n"); @@ -3951,16 +3944,6 @@ int __init intel_iommu_init(void) goto out_free_dmar; } - up_write(&dmar_global_lock); - - /* - * The bus notifier takes the dmar_global_lock, so lockdep will - * complain later when we register it under the lock. - */ - dmar_register_bus_notifier(); - - down_write(&dmar_global_lock); - if (!no_iommu) intel_iommu_debugfs_init(); @@ -4005,11 +3988,9 @@ int __init intel_iommu_init(void) pr_err("Initialization failed\n"); goto out_free_dmar; } - up_write(&dmar_global_lock); init_iommu_pm_ops(); - down_read(&dmar_global_lock); for_each_active_iommu(iommu, drhd) { /* * The flush queue implementation does not perform @@ -4027,13 +4008,11 @@ int __init intel_iommu_init(void) "%s", iommu->name); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); } - up_read(&dmar_global_lock); bus_set_iommu(&pci_bus_type, &intel_iommu_ops); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); - down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) pr_warn("ACPI name space devices didn't probe correctly\n"); @@ -4044,17 +4023,15 @@ int __init intel_iommu_init(void) iommu_disable_protect_mem_regions(iommu); } - up_read(&dmar_global_lock); - - pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); intel_iommu_enabled = 1; + dmar_register_bus_notifier(); + pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); return 0; out_free_dmar: intel_iommu_free_dmars(); - up_write(&dmar_global_lock); return ret; } diff --git a/include/linux/dmar.h b/include/linux/dmar.h index d81a51978d01..8917a32173c4 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -65,6 +65,7 @@ struct dmar_pci_notify_info { extern struct rw_semaphore dmar_global_lock; extern struct list_head dmar_drhd_units; +extern int intel_iommu_enabled; #define for_each_drhd_unit(drhd) \ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ @@ -88,7 +89,8 @@ extern struct list_head dmar_drhd_units; static inline bool dmar_rcu_check(void) { return rwsem_is_locked(&dmar_global_lock) || - system_state == SYSTEM_BOOTING; + system_state == SYSTEM_BOOTING || + (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled); } #define dmar_rcu_dereference(p) rcu_dereference_check((p), dmar_rcu_check()) -- cgit v1.2.3 From 4b9d1bc7911c9d9159c4881455c584cde99fbb19 Mon Sep 17 00:00:00 2001 From: Jiangshan Yi Date: Tue, 6 Sep 2022 13:49:01 +0800 Subject: Input: hp_sdc: fix spelling typo in comment Fix spelling typo in comment. Reported-by: k2ci Signed-off-by: Jiangshan Yi Signed-off-by: Helge Deller --- include/linux/hp_sdc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hp_sdc.h b/include/linux/hp_sdc.h index 6f1dee7e67e0..9be8704e2d38 100644 --- a/include/linux/hp_sdc.h +++ b/include/linux/hp_sdc.h @@ -180,7 +180,7 @@ switch (val) { \ #define HP_SDC_CMD_SET_IM 0x40 /* 010xxxxx == set irq mask */ -/* The documents provided do not explicitly state that all registers betweem +/* The documents provided do not explicitly state that all registers between * 0x01 and 0x1f inclusive can be read by sending their register index as a * command, but this is implied and appears to be the case. */ -- cgit v1.2.3 From 7ebb5f8e001037efbdf18afabbbebf71bd98825e Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Wed, 21 Sep 2022 10:40:53 +0800 Subject: Revert "iommu/vt-d: Fix possible recursive locking in intel_iommu_init()" This reverts commit 9cd4f1434479f1ac25c440c421fbf52069079914. Some issues were reported on the original commit. Some thunderbolt devices don't work anymore due to the following DMA fault. DMAR: DRHD: handling fault status reg 2 DMAR: [INTR-REMAP] Request device [09:00.0] fault index 0x8080 [fault reason 0x25] Blocked a compatibility format interrupt request Bring it back for now to avoid functional regression. Fixes: 9cd4f1434479f ("iommu/vt-d: Fix possible recursive locking in intel_iommu_init()") Link: https://lore.kernel.org/linux-iommu/485A6EA5-6D58-42EA-B298-8571E97422DE@getmailspring.com/ Link: https://bugzilla.kernel.org/show_bug.cgi?id=216497 Cc: Mika Westerberg Cc: # 5.19.x Reported-and-tested-by: George Hilliard Signed-off-by: Lu Baolu Link: https://lore.kernel.org/r/20220920081701.3453504-1-baolu.lu@linux.intel.com Signed-off-by: Joerg Roedel --- drivers/iommu/intel/dmar.c | 7 ------- drivers/iommu/intel/iommu.c | 27 +++++++++++++++++++++++++-- include/linux/dmar.h | 4 +--- 3 files changed, 26 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/drivers/iommu/intel/dmar.c b/drivers/iommu/intel/dmar.c index 497c912ad9e1..5a8f780e7ffd 100644 --- a/drivers/iommu/intel/dmar.c +++ b/drivers/iommu/intel/dmar.c @@ -2349,13 +2349,6 @@ static int dmar_device_hotplug(acpi_handle handle, bool insert) if (!dmar_in_use()) return 0; - /* - * It's unlikely that any I/O board is hot added before the IOMMU - * subsystem is initialized. - */ - if (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled) - return -EOPNOTSUPP; - if (dmar_detect_dsm(handle, DMAR_DSM_FUNC_DRHD)) { tmp = handle; } else { diff --git a/drivers/iommu/intel/iommu.c b/drivers/iommu/intel/iommu.c index 1f2cd43cf9bc..64d30895a4c8 100644 --- a/drivers/iommu/intel/iommu.c +++ b/drivers/iommu/intel/iommu.c @@ -3019,7 +3019,13 @@ static int __init init_dmars(void) #ifdef CONFIG_INTEL_IOMMU_SVM if (pasid_supported(iommu) && ecap_prs(iommu->ecap)) { + /* + * Call dmar_alloc_hwirq() with dmar_global_lock held, + * could cause possible lock race condition. + */ + up_write(&dmar_global_lock); ret = intel_svm_enable_prq(iommu); + down_write(&dmar_global_lock); if (ret) goto free_iommu; } @@ -3932,6 +3938,7 @@ int __init intel_iommu_init(void) force_on = (!intel_iommu_tboot_noforce && tboot_force_iommu()) || platform_optin_force_iommu(); + down_write(&dmar_global_lock); if (dmar_table_init()) { if (force_on) panic("tboot: Failed to initialize DMAR table\n"); @@ -3944,6 +3951,16 @@ int __init intel_iommu_init(void) goto out_free_dmar; } + up_write(&dmar_global_lock); + + /* + * The bus notifier takes the dmar_global_lock, so lockdep will + * complain later when we register it under the lock. + */ + dmar_register_bus_notifier(); + + down_write(&dmar_global_lock); + if (!no_iommu) intel_iommu_debugfs_init(); @@ -3988,9 +4005,11 @@ int __init intel_iommu_init(void) pr_err("Initialization failed\n"); goto out_free_dmar; } + up_write(&dmar_global_lock); init_iommu_pm_ops(); + down_read(&dmar_global_lock); for_each_active_iommu(iommu, drhd) { /* * The flush queue implementation does not perform @@ -4008,11 +4027,13 @@ int __init intel_iommu_init(void) "%s", iommu->name); iommu_device_register(&iommu->iommu, &intel_iommu_ops, NULL); } + up_read(&dmar_global_lock); bus_set_iommu(&pci_bus_type, &intel_iommu_ops); if (si_domain && !hw_pass_through) register_memory_notifier(&intel_iommu_memory_nb); + down_read(&dmar_global_lock); if (probe_acpi_namespace_devices()) pr_warn("ACPI name space devices didn't probe correctly\n"); @@ -4023,15 +4044,17 @@ int __init intel_iommu_init(void) iommu_disable_protect_mem_regions(iommu); } + up_read(&dmar_global_lock); - intel_iommu_enabled = 1; - dmar_register_bus_notifier(); pr_info("Intel(R) Virtualization Technology for Directed I/O\n"); + intel_iommu_enabled = 1; + return 0; out_free_dmar: intel_iommu_free_dmars(); + up_write(&dmar_global_lock); return ret; } diff --git a/include/linux/dmar.h b/include/linux/dmar.h index 8917a32173c4..d81a51978d01 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -65,7 +65,6 @@ struct dmar_pci_notify_info { extern struct rw_semaphore dmar_global_lock; extern struct list_head dmar_drhd_units; -extern int intel_iommu_enabled; #define for_each_drhd_unit(drhd) \ list_for_each_entry_rcu(drhd, &dmar_drhd_units, list, \ @@ -89,8 +88,7 @@ extern int intel_iommu_enabled; static inline bool dmar_rcu_check(void) { return rwsem_is_locked(&dmar_global_lock) || - system_state == SYSTEM_BOOTING || - (IS_ENABLED(CONFIG_INTEL_IOMMU) && !intel_iommu_enabled); + system_state == SYSTEM_BOOTING; } #define dmar_rcu_dereference(p) rcu_dereference_check((p), dmar_rcu_check()) -- cgit v1.2.3 From d7f06bdd6ee87fbefa05af5f57361d85e7715b11 Mon Sep 17 00:00:00 2001 From: Phil Auld Date: Tue, 6 Sep 2022 16:35:42 -0400 Subject: drivers/base: Fix unsigned comparison to -1 in CPUMAP_FILE_MAX_BYTES As PAGE_SIZE is unsigned long, -1 > PAGE_SIZE when NR_CPUS <= 3. This leads to very large file sizes: topology$ ls -l total 0 -r--r--r-- 1 root root 18446744073709551615 Sep 5 11:59 core_cpus -r--r--r-- 1 root root 4096 Sep 5 11:59 core_cpus_list -r--r--r-- 1 root root 4096 Sep 5 10:58 core_id -r--r--r-- 1 root root 18446744073709551615 Sep 5 10:10 core_siblings -r--r--r-- 1 root root 4096 Sep 5 11:59 core_siblings_list -r--r--r-- 1 root root 18446744073709551615 Sep 5 11:59 die_cpus -r--r--r-- 1 root root 4096 Sep 5 11:59 die_cpus_list -r--r--r-- 1 root root 4096 Sep 5 11:59 die_id -r--r--r-- 1 root root 18446744073709551615 Sep 5 11:59 package_cpus -r--r--r-- 1 root root 4096 Sep 5 11:59 package_cpus_list -r--r--r-- 1 root root 4096 Sep 5 10:58 physical_package_id -r--r--r-- 1 root root 18446744073709551615 Sep 5 10:10 thread_siblings -r--r--r-- 1 root root 4096 Sep 5 11:59 thread_siblings_list Adjust the inequality to catch the case when NR_CPUS is configured to a small value. Fixes: 7ee951acd31a ("drivers/base: fix userspace break from using bin_attributes for cpumap and cpulist") Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Yury Norov Cc: stable@vger.kernel.org Cc: feng xiangjun Reported-by: feng xiangjun Signed-off-by: Phil Auld Signed-off-by: Yury Norov Link: https://lore.kernel.org/r/20220906203542.1796629-1-pauld@redhat.com Signed-off-by: Greg Kroah-Hartman --- include/linux/cpumask.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index bd047864c7ac..e8ad12b5b9d2 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -1127,9 +1127,10 @@ cpumap_print_list_to_buf(char *buf, const struct cpumask *mask, * cover a worst-case of every other cpu being on one of two nodes for a * very large NR_CPUS. * - * Use PAGE_SIZE as a minimum for smaller configurations. + * Use PAGE_SIZE as a minimum for smaller configurations while avoiding + * unsigned comparison to -1. */ -#define CPUMAP_FILE_MAX_BYTES ((((NR_CPUS * 9)/32 - 1) > PAGE_SIZE) \ +#define CPUMAP_FILE_MAX_BYTES (((NR_CPUS * 9)/32 > PAGE_SIZE) \ ? (NR_CPUS * 9)/32 - 1 : PAGE_SIZE) #define CPULIST_FILE_MAX_BYTES (((NR_CPUS * 7)/2 > PAGE_SIZE) ? (NR_CPUS * 7)/2 : PAGE_SIZE) -- cgit v1.2.3 From 4f58330fcc8482aa90674e1f40f601e82f18ed4a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Tue, 13 Sep 2022 12:47:20 +0100 Subject: iommu/iova: Fix module config properly IOMMU_IOVA is intended to be an optional library for users to select as and when they desire. Since it can be a module now, this means that built-in code which has chosen not to select it should not fail to link if it happens to have selected as a module by someone else. Replace IS_ENABLED() with IS_REACHABLE() to do the right thing. CC: Thierry Reding Reported-by: John Garry Fixes: 15bbdec3931e ("iommu: Make the iova library a module") Signed-off-by: Robin Murphy Reviewed-by: Thierry Reding Link: https://lore.kernel.org/r/548c2f683ca379aface59639a8f0cccc3a1ac050.1663069227.git.robin.murphy@arm.com Signed-off-by: Joerg Roedel --- include/linux/iova.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index c6ba6d95d79c..83c00fac2acb 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -75,7 +75,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -#if IS_ENABLED(CONFIG_IOMMU_IOVA) +#if IS_REACHABLE(CONFIG_IOMMU_IOVA) int iova_cache_get(void); void iova_cache_put(void); -- cgit v1.2.3