summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--arch/arm/mach-mvebu/coherency.c62
-rw-r--r--arch/arm/mach-mvebu/coherency_ll.S77
2 files changed, 106 insertions, 33 deletions
diff --git a/arch/arm/mach-mvebu/coherency.c b/arch/arm/mach-mvebu/coherency.c
index d5a975b6a590..477202fd39cc 100644
--- a/arch/arm/mach-mvebu/coherency.c
+++ b/arch/arm/mach-mvebu/coherency.c
@@ -29,8 +29,10 @@
#include <linux/slab.h>
#include <linux/mbus.h>
#include <linux/clk.h>
+#include <linux/pci.h>
#include <asm/smp_plat.h>
#include <asm/cacheflush.h>
+#include <asm/mach/map.h>
#include "armada-370-xp.h"
#include "coherency.h"
#include "mvebu-soc-id.h"
@@ -274,8 +276,8 @@ static struct dma_map_ops mvebu_hwcc_dma_ops = {
.set_dma_mask = arm_dma_set_mask,
};
-static int mvebu_hwcc_platform_notifier(struct notifier_block *nb,
- unsigned long event, void *__dev)
+static int mvebu_hwcc_notifier(struct notifier_block *nb,
+ unsigned long event, void *__dev)
{
struct device *dev = __dev;
@@ -286,8 +288,8 @@ static int mvebu_hwcc_platform_notifier(struct notifier_block *nb,
return NOTIFY_OK;
}
-static struct notifier_block mvebu_hwcc_platform_nb = {
- .notifier_call = mvebu_hwcc_platform_notifier,
+static struct notifier_block mvebu_hwcc_nb = {
+ .notifier_call = mvebu_hwcc_notifier,
};
static void __init armada_370_coherency_init(struct device_node *np)
@@ -308,9 +310,47 @@ static void __init armada_370_coherency_init(struct device_node *np)
set_cpu_coherent();
}
+/*
+ * This ioremap hook is used on Armada 375/38x to ensure that PCIe
+ * memory areas are mapped as MT_UNCACHED instead of MT_DEVICE. This
+ * is needed as a workaround for a deadlock issue between the PCIe
+ * interface and the cache controller.
+ */
+static void __iomem *
+armada_pcie_wa_ioremap_caller(phys_addr_t phys_addr, size_t size,
+ unsigned int mtype, void *caller)
+{
+ struct resource pcie_mem;
+
+ mvebu_mbus_get_pcie_mem_aperture(&pcie_mem);
+
+ if (pcie_mem.start <= phys_addr && (phys_addr + size) <= pcie_mem.end)
+ mtype = MT_UNCACHED;
+
+ return __arm_ioremap_caller(phys_addr, size, mtype, caller);
+}
+
static void __init armada_375_380_coherency_init(struct device_node *np)
{
+ struct device_node *cache_dn;
+
coherency_cpu_base = of_iomap(np, 0);
+ arch_ioremap_caller = armada_pcie_wa_ioremap_caller;
+
+ /*
+ * Add the PL310 property "arm,io-coherent". This makes sure the
+ * outer sync operation is not used, which allows to
+ * workaround the system erratum that causes deadlocks when
+ * doing PCIe in an SMP situation on Armada 375 and Armada
+ * 38x.
+ */
+ for_each_compatible_node(cache_dn, NULL, "arm,pl310-cache") {
+ struct property *p;
+
+ p = kzalloc(sizeof(*p), GFP_KERNEL);
+ p->name = kstrdup("arm,io-coherent", GFP_KERNEL);
+ of_add_property(cache_dn, p);
+ }
}
static int coherency_type(void)
@@ -375,9 +415,21 @@ static int __init coherency_late_init(void)
}
bus_register_notifier(&platform_bus_type,
- &mvebu_hwcc_platform_nb);
+ &mvebu_hwcc_nb);
return 0;
}
postcore_initcall(coherency_late_init);
+
+#if IS_ENABLED(CONFIG_PCI)
+static int __init coherency_pci_init(void)
+{
+ if (coherency_available())
+ bus_register_notifier(&pci_bus_type,
+ &mvebu_hwcc_nb);
+ return 0;
+}
+
+arch_initcall(coherency_pci_init);
+#endif
diff --git a/arch/arm/mach-mvebu/coherency_ll.S b/arch/arm/mach-mvebu/coherency_ll.S
index 6828f9f157b0..510c29e079ca 100644
--- a/arch/arm/mach-mvebu/coherency_ll.S
+++ b/arch/arm/mach-mvebu/coherency_ll.S
@@ -24,52 +24,69 @@
#include <asm/cp15.h>
.text
-/* Returns with the coherency address in r1 (r0 is untouched)*/
+/* Returns the coherency base address in r1 (r0 is untouched) */
ENTRY(ll_get_coherency_base)
mrc p15, 0, r1, c1, c0, 0
tst r1, #CR_M @ Check MMU bit enabled
bne 1f
- /* use physical address of the coherency register */
+ /*
+ * MMU is disabled, use the physical address of the coherency
+ * base address.
+ */
adr r1, 3f
ldr r3, [r1]
ldr r1, [r1, r3]
b 2f
1:
- /* use virtual address of the coherency register */
+ /*
+ * MMU is enabled, use the virtual address of the coherency
+ * base address.
+ */
ldr r1, =coherency_base
ldr r1, [r1]
2:
mov pc, lr
ENDPROC(ll_get_coherency_base)
-/* Returns with the CPU ID in r3 (r0 is untouched)*/
-ENTRY(ll_get_cpuid)
+/*
+ * Returns the coherency CPU mask in r3 (r0 is untouched). This
+ * coherency CPU mask can be used with the coherency fabric
+ * configuration and control registers. Note that the mask is already
+ * endian-swapped as appropriate so that the calling functions do not
+ * have to care about endianness issues while accessing the coherency
+ * fabric registers
+ */
+ENTRY(ll_get_coherency_cpumask)
mrc 15, 0, r3, cr0, cr0, 5
and r3, r3, #15
mov r2, #(1 << 24)
lsl r3, r2, r3
-ARM_BE8(rev r1, r1)
+ARM_BE8(rev r3, r3)
mov pc, lr
-ENDPROC(ll_get_cpuid)
+ENDPROC(ll_get_coherency_cpumask)
-/* ll_add_cpu_to_smp_group, ll_enable_coherency and
- * ll_disable_coherency use strex/ldrex whereas MMU can be off. The
- * Armada XP SoC has an exclusive monitor that can track transactions
- * to Device and/or SO and as such also when MMU is disabled the
- * exclusive transactions will be functional
+/*
+ * ll_add_cpu_to_smp_group(), ll_enable_coherency() and
+ * ll_disable_coherency() use the strex/ldrex instructions while the
+ * MMU can be disabled. The Armada XP SoC has an exclusive monitor
+ * that tracks transactions to Device and/or SO memory and thanks to
+ * that, exclusive transactions are functional even when the MMU is
+ * disabled.
*/
ENTRY(ll_add_cpu_to_smp_group)
/*
- * r0 being untouched in ll_get_coherency_base and
- * ll_get_cpuid, we can use it to save lr modifing it with the
- * following bl
+ * As r0 is not modified by ll_get_coherency_base() and
+ * ll_get_coherency_cpumask(), we use it to temporarly save lr
+ * and avoid it being modified by the branch and link
+ * calls. This function is used very early in the secondary
+ * CPU boot, and no stack is available at this point.
*/
- mov r0, lr
+ mov r0, lr
bl ll_get_coherency_base
- bl ll_get_cpuid
- mov lr, r0
+ bl ll_get_coherency_cpumask
+ mov lr, r0
add r0, r1, #ARMADA_XP_CFB_CFG_REG_OFFSET
1:
ldrex r2, [r0]
@@ -82,13 +99,15 @@ ENDPROC(ll_add_cpu_to_smp_group)
ENTRY(ll_enable_coherency)
/*
- * r0 being untouched in ll_get_coherency_base and
- * ll_get_cpuid, we can use it to save lr modifing it with the
- * following bl
+ * As r0 is not modified by ll_get_coherency_base() and
+ * ll_get_coherency_cpumask(), we use it to temporarly save lr
+ * and avoid it being modified by the branch and link
+ * calls. This function is used very early in the secondary
+ * CPU boot, and no stack is available at this point.
*/
mov r0, lr
bl ll_get_coherency_base
- bl ll_get_cpuid
+ bl ll_get_coherency_cpumask
mov lr, r0
add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
1:
@@ -104,14 +123,16 @@ ENDPROC(ll_enable_coherency)
ENTRY(ll_disable_coherency)
/*
- * r0 being untouched in ll_get_coherency_base and
- * ll_get_cpuid, we can use it to save lr modifing it with the
- * following bl
+ * As r0 is not modified by ll_get_coherency_base() and
+ * ll_get_coherency_cpumask(), we use it to temporarly save lr
+ * and avoid it being modified by the branch and link
+ * calls. This function is used very early in the secondary
+ * CPU boot, and no stack is available at this point.
*/
- mov r0, lr
+ mov r0, lr
bl ll_get_coherency_base
- bl ll_get_cpuid
- mov lr, r0
+ bl ll_get_coherency_cpumask
+ mov lr, r0
add r0, r1, #ARMADA_XP_CFB_CTL_REG_OFFSET
1:
ldrex r2, [r0]