diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-03 16:15:17 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2013-04-03 16:15:17 -0700 |
commit | 66ade474237745a57b7e87da9a93c7ec69fd52bb (patch) | |
tree | a3e08728d8584fde056ec51f4b2249e3dd6032c5 /arch | |
parent | cbfa0e7204159515e63321142bcc2d6dcb854045 (diff) | |
parent | 4e1db26a0b42e2b6e27c05d68adcc01709c2eed2 (diff) | |
download | linux-66ade474237745a57b7e87da9a93c7ec69fd52bb.tar.bz2 |
Merge branch 'fixes' of git://git.linaro.org/people/rmk/linux-arm
Pull ARM fixes from Russell King:
"Another round of ARM fixes, which include:
- Fixing a problem with LPAE mapping sections
- Reporting of some hwcaps on Krait CPUs
- Avoiding repetitive warnings in the breakpoint code
- Fixing a build error noticed on Dove platforms with PJ4 CPUs
- Fix masking of level 2 cache revision.
- Fixing timer-based udelay()
- A larger fix for an erratum causing people major grief with Cortex
A15 CPUs"
* 'fixes' of git://git.linaro.org/people/rmk/linux-arm:
ARM: 7690/1: mm: fix CONFIG_LPAE typos
ARM: 7689/1: add unwind annotations to ftrace asm
ARM: 7685/1: delay: use private ticks_per_jiffy field for timer-based delay ops
ARM: 7684/1: errata: Workaround for Cortex-A15 erratum 798181 (TLBI/DSB operations)
ARM: 7682/1: cache-l2x0: fix masking of RTL revision numbering and set_debug init
ARM: iWMMXt: always enable iWMMXt support with PJ4 CPUs
ARM: 7681/1: hw_breakpoint: use warn_once to avoid spam from reset_ctrl_regs()
ARM: 7678/1: Work around faulty ISAR0 register in some Krait CPUs
ARM: 7680/1: Detect support for SDIV/UDIV from ISAR0 register
ARM: 7679/1: Clear IDIVT hwcap if CONFIG_ARM_THUMB=n
ARM: 7677/1: LPAE: Fix mapping in alloc_init_section for unaligned addresses
ARM: KVM: vgic: take distributor lock on sync_hwstate path
ARM: KVM: vgic: force EOIed LRs to the empty state
Diffstat (limited to 'arch')
-rw-r--r-- | arch/arm/Kconfig | 14 | ||||
-rw-r--r-- | arch/arm/include/asm/delay.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/highmem.h | 7 | ||||
-rw-r--r-- | arch/arm/include/asm/mmu_context.h | 2 | ||||
-rw-r--r-- | arch/arm/include/asm/tlbflush.h | 15 | ||||
-rw-r--r-- | arch/arm/kernel/entry-common.S | 12 | ||||
-rw-r--r-- | arch/arm/kernel/head.S | 2 | ||||
-rw-r--r-- | arch/arm/kernel/hw_breakpoint.c | 6 | ||||
-rw-r--r-- | arch/arm/kernel/setup.c | 24 | ||||
-rw-r--r-- | arch/arm/kernel/smp.c | 3 | ||||
-rw-r--r-- | arch/arm/kernel/smp_tlb.c | 66 | ||||
-rw-r--r-- | arch/arm/kvm/vgic.c | 35 | ||||
-rw-r--r-- | arch/arm/lib/delay.c | 8 | ||||
-rw-r--r-- | arch/arm/mm/cache-l2x0.c | 11 | ||||
-rw-r--r-- | arch/arm/mm/context.c | 3 | ||||
-rw-r--r-- | arch/arm/mm/mmu.c | 73 | ||||
-rw-r--r-- | arch/arm/mm/proc-v7.S | 19 |
17 files changed, 230 insertions, 72 deletions
diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 13b739469c51..1cacda426a0e 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -1183,9 +1183,9 @@ config ARM_NR_BANKS default 8 config IWMMXT - bool "Enable iWMMXt support" + bool "Enable iWMMXt support" if !CPU_PJ4 depends on CPU_XSCALE || CPU_XSC3 || CPU_MOHAWK || CPU_PJ4 - default y if PXA27x || PXA3xx || ARCH_MMP + default y if PXA27x || PXA3xx || ARCH_MMP || CPU_PJ4 help Enable support for iWMMXt context switching at run time if running on a CPU that supports it. @@ -1439,6 +1439,16 @@ config ARM_ERRATA_775420 to deadlock. This workaround puts DSB before executing ISB if an abort may occur on cache maintenance. +config ARM_ERRATA_798181 + bool "ARM errata: TLBI/DSB failure on Cortex-A15" + depends on CPU_V7 && SMP + help + On Cortex-A15 (r0p0..r3p2) the TLBI*IS/DSB operations are not + adequately shooting down all use of the old entries. This + option enables the Linux kernel workaround for this erratum + which sends an IPI to the CPUs that are running the same ASID + as the one being invalidated. + endmenu source "arch/arm/common/Kconfig" diff --git a/arch/arm/include/asm/delay.h b/arch/arm/include/asm/delay.h index 720799fd3a81..dff714d886d5 100644 --- a/arch/arm/include/asm/delay.h +++ b/arch/arm/include/asm/delay.h @@ -24,7 +24,7 @@ extern struct arm_delay_ops { void (*delay)(unsigned long); void (*const_udelay)(unsigned long); void (*udelay)(unsigned long); - bool const_clock; + unsigned long ticks_per_jiffy; } arm_delay_ops; #define __delay(n) arm_delay_ops.delay(n) diff --git a/arch/arm/include/asm/highmem.h b/arch/arm/include/asm/highmem.h index 8c5e828f484d..91b99abe7a95 100644 --- a/arch/arm/include/asm/highmem.h +++ b/arch/arm/include/asm/highmem.h @@ -41,6 +41,13 @@ extern void kunmap_high(struct page *page); #endif #endif +/* + * Needed to be able to broadcast the TLB invalidation for kmap. + */ +#ifdef CONFIG_ARM_ERRATA_798181 +#undef ARCH_NEEDS_KMAP_HIGH_GET +#endif + #ifdef ARCH_NEEDS_KMAP_HIGH_GET extern void *kmap_high_get(struct page *page); #else diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 863a6611323c..a7b85e0d0cc1 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -27,6 +27,8 @@ void __check_vmalloc_seq(struct mm_struct *mm); void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk); #define init_new_context(tsk,mm) ({ atomic64_set(&mm->context.id, 0); 0; }) +DECLARE_PER_CPU(atomic64_t, active_asids); + #else /* !CONFIG_CPU_HAS_ASID */ #ifdef CONFIG_MMU diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index 4db8c8820f0d..9e9c041358ca 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -450,6 +450,21 @@ static inline void local_flush_bp_all(void) isb(); } +#ifdef CONFIG_ARM_ERRATA_798181 +static inline void dummy_flush_tlb_a15_erratum(void) +{ + /* + * Dummy TLBIMVAIS. Using the unmapped address 0 and ASID 0. + */ + asm("mcr p15, 0, %0, c8, c3, 1" : : "r" (0)); + dsb(); +} +#else +static inline void dummy_flush_tlb_a15_erratum(void) +{ +} +#endif + /* * flush_pmd_entry * diff --git a/arch/arm/kernel/entry-common.S b/arch/arm/kernel/entry-common.S index 3248cde504ed..fefd7f971437 100644 --- a/arch/arm/kernel/entry-common.S +++ b/arch/arm/kernel/entry-common.S @@ -276,7 +276,13 @@ ENDPROC(ftrace_graph_caller_old) */ .macro mcount_enter +/* + * This pad compensates for the push {lr} at the call site. Note that we are + * unable to unwind through a function which does not otherwise save its lr. + */ + UNWIND(.pad #4) stmdb sp!, {r0-r3, lr} + UNWIND(.save {r0-r3, lr}) .endm .macro mcount_get_lr reg @@ -289,6 +295,7 @@ ENDPROC(ftrace_graph_caller_old) .endm ENTRY(__gnu_mcount_nc) +UNWIND(.fnstart) #ifdef CONFIG_DYNAMIC_FTRACE mov ip, lr ldmia sp!, {lr} @@ -296,17 +303,22 @@ ENTRY(__gnu_mcount_nc) #else __mcount #endif +UNWIND(.fnend) ENDPROC(__gnu_mcount_nc) #ifdef CONFIG_DYNAMIC_FTRACE ENTRY(ftrace_caller) +UNWIND(.fnstart) __ftrace_caller +UNWIND(.fnend) ENDPROC(ftrace_caller) #endif #ifdef CONFIG_FUNCTION_GRAPH_TRACER ENTRY(ftrace_graph_caller) +UNWIND(.fnstart) __ftrace_graph_caller +UNWIND(.fnend) ENDPROC(ftrace_graph_caller) #endif diff --git a/arch/arm/kernel/head.S b/arch/arm/kernel/head.S index e0eb9a1cae77..8bac553fe213 100644 --- a/arch/arm/kernel/head.S +++ b/arch/arm/kernel/head.S @@ -267,7 +267,7 @@ __create_page_tables: addne r6, r6, #1 << SECTION_SHIFT strne r6, [r3] -#if defined(CONFIG_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8) +#if defined(CONFIG_ARM_LPAE) && defined(CONFIG_CPU_ENDIAN_BE8) sub r4, r4, #4 @ Fixup page table pointer @ for 64-bit descriptors #endif diff --git a/arch/arm/kernel/hw_breakpoint.c b/arch/arm/kernel/hw_breakpoint.c index 96093b75ab90..5dc1aa6f0f7d 100644 --- a/arch/arm/kernel/hw_breakpoint.c +++ b/arch/arm/kernel/hw_breakpoint.c @@ -966,7 +966,7 @@ static void reset_ctrl_regs(void *unused) } if (err) { - pr_warning("CPU %d debug is powered down!\n", cpu); + pr_warn_once("CPU %d debug is powered down!\n", cpu); cpumask_or(&debug_err_mask, &debug_err_mask, cpumask_of(cpu)); return; } @@ -987,7 +987,7 @@ clear_vcr: isb(); if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { - pr_warning("CPU %d failed to disable vector catch\n", cpu); + pr_warn_once("CPU %d failed to disable vector catch\n", cpu); return; } @@ -1007,7 +1007,7 @@ clear_vcr: } if (cpumask_intersects(&debug_err_mask, cpumask_of(cpu))) { - pr_warning("CPU %d failed to clear debug register pairs\n", cpu); + pr_warn_once("CPU %d failed to clear debug register pairs\n", cpu); return; } diff --git a/arch/arm/kernel/setup.c b/arch/arm/kernel/setup.c index 3f6cbb2e3eda..d343a6c3a6d1 100644 --- a/arch/arm/kernel/setup.c +++ b/arch/arm/kernel/setup.c @@ -353,6 +353,23 @@ void __init early_print(const char *str, ...) printk("%s", buf); } +static void __init cpuid_init_hwcaps(void) +{ + unsigned int divide_instrs; + + if (cpu_architecture() < CPU_ARCH_ARMv7) + return; + + divide_instrs = (read_cpuid_ext(CPUID_EXT_ISAR0) & 0x0f000000) >> 24; + + switch (divide_instrs) { + case 2: + elf_hwcap |= HWCAP_IDIVA; + case 1: + elf_hwcap |= HWCAP_IDIVT; + } +} + static void __init feat_v6_fixup(void) { int id = read_cpuid_id(); @@ -483,8 +500,11 @@ static void __init setup_processor(void) snprintf(elf_platform, ELF_PLATFORM_SIZE, "%s%c", list->elf_name, ENDIANNESS); elf_hwcap = list->elf_hwcap; + + cpuid_init_hwcaps(); + #ifndef CONFIG_ARM_THUMB - elf_hwcap &= ~HWCAP_THUMB; + elf_hwcap &= ~(HWCAP_THUMB | HWCAP_IDIVT); #endif feat_v6_fixup(); @@ -524,7 +544,7 @@ int __init arm_add_memory(phys_addr_t start, phys_addr_t size) size -= start & ~PAGE_MASK; bank->start = PAGE_ALIGN(start); -#ifndef CONFIG_LPAE +#ifndef CONFIG_ARM_LPAE if (bank->start + size < bank->start) { printk(KERN_CRIT "Truncating memory at 0x%08llx to fit in " "32-bit physical address space\n", (long long)start); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 79078edbb9bc..1f2ccccaf009 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -673,9 +673,6 @@ static int cpufreq_callback(struct notifier_block *nb, if (freq->flags & CPUFREQ_CONST_LOOPS) return NOTIFY_OK; - if (arm_delay_ops.const_clock) - return NOTIFY_OK; - if (!per_cpu(l_p_j_ref, cpu)) { per_cpu(l_p_j_ref, cpu) = per_cpu(cpu_data, cpu).loops_per_jiffy; diff --git a/arch/arm/kernel/smp_tlb.c b/arch/arm/kernel/smp_tlb.c index bd0300531399..e82e1d248772 100644 --- a/arch/arm/kernel/smp_tlb.c +++ b/arch/arm/kernel/smp_tlb.c @@ -12,6 +12,7 @@ #include <asm/smp_plat.h> #include <asm/tlbflush.h> +#include <asm/mmu_context.h> /**********************************************************************/ @@ -69,12 +70,72 @@ static inline void ipi_flush_bp_all(void *ignored) local_flush_bp_all(); } +#ifdef CONFIG_ARM_ERRATA_798181 +static int erratum_a15_798181(void) +{ + unsigned int midr = read_cpuid_id(); + + /* Cortex-A15 r0p0..r3p2 affected */ + if ((midr & 0xff0ffff0) != 0x410fc0f0 || midr > 0x413fc0f2) + return 0; + return 1; +} +#else +static int erratum_a15_798181(void) +{ + return 0; +} +#endif + +static void ipi_flush_tlb_a15_erratum(void *arg) +{ + dmb(); +} + +static void broadcast_tlb_a15_erratum(void) +{ + if (!erratum_a15_798181()) + return; + + dummy_flush_tlb_a15_erratum(); + smp_call_function_many(cpu_online_mask, ipi_flush_tlb_a15_erratum, + NULL, 1); +} + +static void broadcast_tlb_mm_a15_erratum(struct mm_struct *mm) +{ + int cpu; + cpumask_t mask = { CPU_BITS_NONE }; + + if (!erratum_a15_798181()) + return; + + dummy_flush_tlb_a15_erratum(); + for_each_online_cpu(cpu) { + if (cpu == smp_processor_id()) + continue; + /* + * We only need to send an IPI if the other CPUs are running + * the same ASID as the one being invalidated. There is no + * need for locking around the active_asids check since the + * switch_mm() function has at least one dmb() (as required by + * this workaround) in case a context switch happens on + * another CPU after the condition below. + */ + if (atomic64_read(&mm->context.id) == + atomic64_read(&per_cpu(active_asids, cpu))) + cpumask_set_cpu(cpu, &mask); + } + smp_call_function_many(&mask, ipi_flush_tlb_a15_erratum, NULL, 1); +} + void flush_tlb_all(void) { if (tlb_ops_need_broadcast()) on_each_cpu(ipi_flush_tlb_all, NULL, 1); else local_flush_tlb_all(); + broadcast_tlb_a15_erratum(); } void flush_tlb_mm(struct mm_struct *mm) @@ -83,6 +144,7 @@ void flush_tlb_mm(struct mm_struct *mm) on_each_cpu_mask(mm_cpumask(mm), ipi_flush_tlb_mm, mm, 1); else local_flush_tlb_mm(mm); + broadcast_tlb_mm_a15_erratum(mm); } void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) @@ -95,6 +157,7 @@ void flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) &ta, 1); } else local_flush_tlb_page(vma, uaddr); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); } void flush_tlb_kernel_page(unsigned long kaddr) @@ -105,6 +168,7 @@ void flush_tlb_kernel_page(unsigned long kaddr) on_each_cpu(ipi_flush_tlb_kernel_page, &ta, 1); } else local_flush_tlb_kernel_page(kaddr); + broadcast_tlb_a15_erratum(); } void flush_tlb_range(struct vm_area_struct *vma, @@ -119,6 +183,7 @@ void flush_tlb_range(struct vm_area_struct *vma, &ta, 1); } else local_flush_tlb_range(vma, start, end); + broadcast_tlb_mm_a15_erratum(vma->vm_mm); } void flush_tlb_kernel_range(unsigned long start, unsigned long end) @@ -130,6 +195,7 @@ void flush_tlb_kernel_range(unsigned long start, unsigned long end) on_each_cpu(ipi_flush_tlb_kernel_range, &ta, 1); } else local_flush_tlb_kernel_range(start, end); + broadcast_tlb_a15_erratum(); } void flush_bp_all(void) diff --git a/arch/arm/kvm/vgic.c b/arch/arm/kvm/vgic.c index c9a17316e9fe..0e4cfe123b38 100644 --- a/arch/arm/kvm/vgic.c +++ b/arch/arm/kvm/vgic.c @@ -883,8 +883,7 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) lr, irq, vgic_cpu->vgic_lr[lr]); BUG_ON(!test_bit(lr, vgic_cpu->lr_used)); vgic_cpu->vgic_lr[lr] |= GICH_LR_PENDING_BIT; - - goto out; + return true; } /* Try to use another LR for this interrupt */ @@ -898,7 +897,6 @@ static bool vgic_queue_irq(struct kvm_vcpu *vcpu, u8 sgi_source_id, int irq) vgic_cpu->vgic_irq_lr_map[irq] = lr; set_bit(lr, vgic_cpu->lr_used); -out: if (!vgic_irq_is_edge(vcpu, irq)) vgic_cpu->vgic_lr[lr] |= GICH_LR_EOI; @@ -1018,21 +1016,6 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) kvm_debug("MISR = %08x\n", vgic_cpu->vgic_misr); - /* - * We do not need to take the distributor lock here, since the only - * action we perform is clearing the irq_active_bit for an EOIed - * level interrupt. There is a potential race with - * the queuing of an interrupt in __kvm_vgic_flush_hwstate(), where we - * check if the interrupt is already active. Two possibilities: - * - * - The queuing is occurring on the same vcpu: cannot happen, - * as we're already in the context of this vcpu, and - * executing the handler - * - The interrupt has been migrated to another vcpu, and we - * ignore this interrupt for this run. Big deal. It is still - * pending though, and will get considered when this vcpu - * exits. - */ if (vgic_cpu->vgic_misr & GICH_MISR_EOI) { /* * Some level interrupts have been EOIed. Clear their @@ -1054,6 +1037,13 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } else { vgic_cpu_irq_clear(vcpu, irq); } + + /* + * Despite being EOIed, the LR may not have + * been marked as empty. + */ + set_bit(lr, (unsigned long *)vgic_cpu->vgic_elrsr); + vgic_cpu->vgic_lr[lr] &= ~GICH_LR_ACTIVE_BIT; } } @@ -1064,9 +1054,8 @@ static bool vgic_process_maintenance(struct kvm_vcpu *vcpu) } /* - * Sync back the VGIC state after a guest run. We do not really touch - * the distributor here (the irq_pending_on_cpu bit is safe to set), - * so there is no need for taking its lock. + * Sync back the VGIC state after a guest run. The distributor lock is + * needed so we don't get preempted in the middle of the state processing. */ static void __kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { @@ -1112,10 +1101,14 @@ void kvm_vgic_flush_hwstate(struct kvm_vcpu *vcpu) void kvm_vgic_sync_hwstate(struct kvm_vcpu *vcpu) { + struct vgic_dist *dist = &vcpu->kvm->arch.vgic; + if (!irqchip_in_kernel(vcpu->kvm)) return; + spin_lock(&dist->lock); __kvm_vgic_sync_hwstate(vcpu); + spin_unlock(&dist->lock); } int kvm_vgic_vcpu_pending_irq(struct kvm_vcpu *vcpu) diff --git a/arch/arm/lib/delay.c b/arch/arm/lib/delay.c index 6b93f6a1a3c7..64dbfa57204a 100644 --- a/arch/arm/lib/delay.c +++ b/arch/arm/lib/delay.c @@ -58,7 +58,7 @@ static void __timer_delay(unsigned long cycles) static void __timer_const_udelay(unsigned long xloops) { unsigned long long loops = xloops; - loops *= loops_per_jiffy; + loops *= arm_delay_ops.ticks_per_jiffy; __timer_delay(loops >> UDELAY_SHIFT); } @@ -73,11 +73,13 @@ void __init register_current_timer_delay(const struct delay_timer *timer) pr_info("Switching to timer-based delay loop\n"); delay_timer = timer; lpj_fine = timer->freq / HZ; - loops_per_jiffy = lpj_fine; + + /* cpufreq may scale loops_per_jiffy, so keep a private copy */ + arm_delay_ops.ticks_per_jiffy = lpj_fine; arm_delay_ops.delay = __timer_delay; arm_delay_ops.const_udelay = __timer_const_udelay; arm_delay_ops.udelay = __timer_udelay; - arm_delay_ops.const_clock = true; + delay_calibrated = true; } else { pr_info("Ignoring duplicate/late registration of read_current_timer delay\n"); diff --git a/arch/arm/mm/cache-l2x0.c b/arch/arm/mm/cache-l2x0.c index c2f37390308a..c465faca51b0 100644 --- a/arch/arm/mm/cache-l2x0.c +++ b/arch/arm/mm/cache-l2x0.c @@ -299,7 +299,7 @@ static void l2x0_unlock(u32 cache_id) int lockregs; int i; - switch (cache_id) { + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: lockregs = 8; break; @@ -333,15 +333,14 @@ void __init l2x0_init(void __iomem *base, u32 aux_val, u32 aux_mask) if (cache_id_part_number_from_dt) cache_id = cache_id_part_number_from_dt; else - cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID) - & L2X0_CACHE_ID_PART_MASK; + cache_id = readl_relaxed(l2x0_base + L2X0_CACHE_ID); aux = readl_relaxed(l2x0_base + L2X0_AUX_CTRL); aux &= aux_mask; aux |= aux_val; /* Determine the number of ways */ - switch (cache_id) { + switch (cache_id & L2X0_CACHE_ID_PART_MASK) { case L2X0_CACHE_ID_PART_L310: if (aux & (1 << 16)) ways = 16; @@ -725,7 +724,6 @@ static const struct l2x0_of_data pl310_data = { .flush_all = l2x0_flush_all, .inv_all = l2x0_inv_all, .disable = l2x0_disable, - .set_debug = pl310_set_debug, }, }; @@ -814,9 +812,8 @@ int __init l2x0_of_init(u32 aux_val, u32 aux_mask) data->save(); of_init = true; - l2x0_init(l2x0_base, aux_val, aux_mask); - memcpy(&outer_cache, &data->outer_cache, sizeof(outer_cache)); + l2x0_init(l2x0_base, aux_val, aux_mask); return 0; } diff --git a/arch/arm/mm/context.c b/arch/arm/mm/context.c index a5a4b2bc42ba..2ac37372ef52 100644 --- a/arch/arm/mm/context.c +++ b/arch/arm/mm/context.c @@ -48,7 +48,7 @@ static DEFINE_RAW_SPINLOCK(cpu_asid_lock); static atomic64_t asid_generation = ATOMIC64_INIT(ASID_FIRST_VERSION); static DECLARE_BITMAP(asid_map, NUM_USER_ASIDS); -static DEFINE_PER_CPU(atomic64_t, active_asids); +DEFINE_PER_CPU(atomic64_t, active_asids); static DEFINE_PER_CPU(u64, reserved_asids); static cpumask_t tlb_flush_pending; @@ -215,6 +215,7 @@ void check_and_switch_context(struct mm_struct *mm, struct task_struct *tsk) if (cpumask_test_and_clear_cpu(cpu, &tlb_flush_pending)) { local_flush_bp_all(); local_flush_tlb_all(); + dummy_flush_tlb_a15_erratum(); } atomic64_set(&per_cpu(active_asids, cpu), asid); diff --git a/arch/arm/mm/mmu.c b/arch/arm/mm/mmu.c index e95a996ab78f..78978945492a 100644 --- a/arch/arm/mm/mmu.c +++ b/arch/arm/mm/mmu.c @@ -598,39 +598,60 @@ static void __init alloc_init_pte(pmd_t *pmd, unsigned long addr, } while (pte++, addr += PAGE_SIZE, addr != end); } -static void __init alloc_init_section(pud_t *pud, unsigned long addr, - unsigned long end, phys_addr_t phys, - const struct mem_type *type) +static void __init map_init_section(pmd_t *pmd, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) { - pmd_t *pmd = pmd_offset(pud, addr); - +#ifndef CONFIG_ARM_LPAE /* - * Try a section mapping - end, addr and phys must all be aligned - * to a section boundary. Note that PMDs refer to the individual - * L1 entries, whereas PGDs refer to a group of L1 entries making - * up one logical pointer to an L2 table. + * In classic MMU format, puds and pmds are folded in to + * the pgds. pmd_offset gives the PGD entry. PGDs refer to a + * group of L1 entries making up one logical pointer to + * an L2 table (2MB), where as PMDs refer to the individual + * L1 entries (1MB). Hence increment to get the correct + * offset for odd 1MB sections. + * (See arch/arm/include/asm/pgtable-2level.h) */ - if (type->prot_sect && ((addr | end | phys) & ~SECTION_MASK) == 0) { - pmd_t *p = pmd; - -#ifndef CONFIG_ARM_LPAE - if (addr & SECTION_SIZE) - pmd++; + if (addr & SECTION_SIZE) + pmd++; #endif + do { + *pmd = __pmd(phys | type->prot_sect); + phys += SECTION_SIZE; + } while (pmd++, addr += SECTION_SIZE, addr != end); - do { - *pmd = __pmd(phys | type->prot_sect); - phys += SECTION_SIZE; - } while (pmd++, addr += SECTION_SIZE, addr != end); + flush_pmd_entry(pmd); +} - flush_pmd_entry(p); - } else { +static void __init alloc_init_pmd(pud_t *pud, unsigned long addr, + unsigned long end, phys_addr_t phys, + const struct mem_type *type) +{ + pmd_t *pmd = pmd_offset(pud, addr); + unsigned long next; + + do { /* - * No need to loop; pte's aren't interested in the - * individual L1 entries. + * With LPAE, we must loop over to map + * all the pmds for the given range. */ - alloc_init_pte(pmd, addr, end, __phys_to_pfn(phys), type); - } + next = pmd_addr_end(addr, end); + + /* + * Try a section mapping - addr, next and phys must all be + * aligned to a section boundary. + */ + if (type->prot_sect && + ((addr | next | phys) & ~SECTION_MASK) == 0) { + map_init_section(pmd, addr, next, phys, type); + } else { + alloc_init_pte(pmd, addr, next, + __phys_to_pfn(phys), type); + } + + phys += next - addr; + + } while (pmd++, addr = next, addr != end); } static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, @@ -641,7 +662,7 @@ static void __init alloc_init_pud(pgd_t *pgd, unsigned long addr, do { next = pud_addr_end(addr, end); - alloc_init_section(pud, addr, next, phys, type); + alloc_init_pmd(pud, addr, next, phys, type); phys += next - addr; } while (pud++, addr = next, addr != end); } diff --git a/arch/arm/mm/proc-v7.S b/arch/arm/mm/proc-v7.S index 3a3c015f8d5c..f584d3f5b37c 100644 --- a/arch/arm/mm/proc-v7.S +++ b/arch/arm/mm/proc-v7.S @@ -420,7 +420,7 @@ __v7_pj4b_proc_info: __v7_ca7mp_proc_info: .long 0x410fc070 .long 0xff0ffff0 - __v7_proc __v7_ca7mp_setup, hwcaps = HWCAP_IDIV + __v7_proc __v7_ca7mp_setup .size __v7_ca7mp_proc_info, . - __v7_ca7mp_proc_info /* @@ -430,10 +430,25 @@ __v7_ca7mp_proc_info: __v7_ca15mp_proc_info: .long 0x410fc0f0 .long 0xff0ffff0 - __v7_proc __v7_ca15mp_setup, hwcaps = HWCAP_IDIV + __v7_proc __v7_ca15mp_setup .size __v7_ca15mp_proc_info, . - __v7_ca15mp_proc_info /* + * Qualcomm Inc. Krait processors. + */ + .type __krait_proc_info, #object +__krait_proc_info: + .long 0x510f0400 @ Required ID value + .long 0xff0ffc00 @ Mask for ID + /* + * Some Krait processors don't indicate support for SDIV and UDIV + * instructions in the ARM instruction set, even though they actually + * do support them. + */ + __v7_proc __v7_setup, hwcaps = HWCAP_IDIV + .size __krait_proc_info, . - __krait_proc_info + + /* * Match any ARMv7 processor core. */ .type __v7_proc_info, #object |