diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-21 18:55:10 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-03-21 18:55:10 -0700 |
commit | 5375871d432ae9fc581014ac117b96aaee3cd0c7 (patch) | |
tree | be98e8255b0f927fb920fb532a598b93fa140dbe /arch/powerpc/mm | |
parent | b57cb7231b2ce52d3dda14a7b417ae125fb2eb97 (diff) | |
parent | dfbc2d75c1bd47c3186fa91f1655ea2f3825b0ec (diff) | |
download | linux-5375871d432ae9fc581014ac117b96aaee3cd0c7.tar.bz2 |
Merge branch 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc
Pull powerpc merge from Benjamin Herrenschmidt:
"Here's the powerpc batch for this merge window. It is going to be a
bit more nasty than usual as in touching things outside of
arch/powerpc mostly due to the big iSeriesectomy :-) We finally got
rid of the bugger (legacy iSeries support) which was a PITA to
maintain and that nobody really used anymore.
Here are some of the highlights:
- Legacy iSeries is gone. Thanks Stephen ! There's still some bits
and pieces remaining if you do a grep -ir series arch/powerpc but
they are harmless and will be removed in the next few weeks
hopefully.
- The 'fadump' functionality (Firmware Assisted Dump) replaces the
previous (equivalent) "pHyp assisted dump"... it's a rewrite of a
mechanism to get the hypervisor to do crash dumps on pSeries, the
new implementation hopefully being much more reliable. Thanks
Mahesh Salgaonkar.
- The "EEH" code (pSeries PCI error handling & recovery) got a big
spring cleaning, motivated by the need to be able to implement a
new backend for it on top of some new different type of firwmare.
The work isn't complete yet, but a good chunk of the cleanups is
there. Note that this adds a field to struct device_node which is
not very nice and which Grant objects to. I will have a patch soon
that moves that to a powerpc private data structure (hopefully
before rc1) and we'll improve things further later on (hopefully
getting rid of the need for that pointer completely). Thanks Gavin
Shan.
- I dug into our exception & interrupt handling code to improve the
way we do lazy interrupt handling (and make it work properly with
"edge" triggered interrupt sources), and while at it found & fixed
a wagon of issues in those areas, including adding support for page
fault retry & fatal signals on page faults.
- Your usual random batch of small fixes & updates, including a bunch
of new embedded boards, both Freescale and APM based ones, etc..."
I fixed up some conflicts with the generalized irq-domain changes from
Grant Likely, hopefully correctly.
* 'next' of git://git.kernel.org/pub/scm/linux/kernel/git/benh/powerpc: (141 commits)
powerpc/ps3: Do not adjust the wrapper load address
powerpc: Remove the rest of the legacy iSeries include files
powerpc: Remove the remaining CONFIG_PPC_ISERIES pieces
init: Remove CONFIG_PPC_ISERIES
powerpc: Remove FW_FEATURE ISERIES from arch code
tty/hvc_vio: FW_FEATURE_ISERIES is no longer selectable
powerpc/spufs: Fix double unlocks
powerpc/5200: convert mpc5200 to use of_platform_populate()
powerpc/mpc5200: add options to mpc5200_defconfig
powerpc/mpc52xx: add a4m072 board support
powerpc/mpc5200: update mpc5200_defconfig to fit for charon board
Documentation/powerpc/mpc52xx.txt: Checkpatch cleanup
powerpc/44x: Add additional device support for APM821xx SoC and Bluestone board
powerpc/44x: Add support PCI-E for APM821xx SoC and Bluestone board
MAINTAINERS: Update PowerPC 4xx tree
powerpc/44x: The bug fixed support for APM821xx SoC and Bluestone board
powerpc: document the FSL MPIC message register binding
powerpc: add support for MPIC message register API
powerpc/fsl: Added aliased MSIIR register address to MSI node in dts
powerpc/85xx: mpc8548cds - add 36-bit dts
...
Diffstat (limited to 'arch/powerpc/mm')
-rw-r--r-- | arch/powerpc/mm/fault.c | 181 | ||||
-rw-r--r-- | arch/powerpc/mm/fsl_booke_mmu.c | 19 | ||||
-rw-r--r-- | arch/powerpc/mm/hash_utils_64.c | 20 | ||||
-rw-r--r-- | arch/powerpc/mm/icswx.c | 23 | ||||
-rw-r--r-- | arch/powerpc/mm/icswx.h | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/pgtable_32.c | 2 | ||||
-rw-r--r-- | arch/powerpc/mm/slb.c | 6 | ||||
-rw-r--r-- | arch/powerpc/mm/slb_low.S | 16 | ||||
-rw-r--r-- | arch/powerpc/mm/stab.c | 9 |
9 files changed, 186 insertions, 96 deletions
diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 2f0d1b032a89..19f2f9498b27 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -105,6 +105,82 @@ static int store_updates_sp(struct pt_regs *regs) } return 0; } +/* + * do_page_fault error handling helpers + */ + +#define MM_FAULT_RETURN 0 +#define MM_FAULT_CONTINUE -1 +#define MM_FAULT_ERR(sig) (sig) + +static int out_of_memory(struct pt_regs *regs) +{ + /* + * We ran out of memory, or some other thing happened to us that made + * us unable to handle the page fault gracefully. + */ + up_read(¤t->mm->mmap_sem); + if (!user_mode(regs)) + return MM_FAULT_ERR(SIGKILL); + pagefault_out_of_memory(); + return MM_FAULT_RETURN; +} + +static int do_sigbus(struct pt_regs *regs, unsigned long address) +{ + siginfo_t info; + + up_read(¤t->mm->mmap_sem); + + if (user_mode(regs)) { + info.si_signo = SIGBUS; + info.si_errno = 0; + info.si_code = BUS_ADRERR; + info.si_addr = (void __user *)address; + force_sig_info(SIGBUS, &info, current); + return MM_FAULT_RETURN; + } + return MM_FAULT_ERR(SIGBUS); +} + +static int mm_fault_error(struct pt_regs *regs, unsigned long addr, int fault) +{ + /* + * Pagefault was interrupted by SIGKILL. We have no reason to + * continue the pagefault. + */ + if (fatal_signal_pending(current)) { + /* + * If we have retry set, the mmap semaphore will have + * alrady been released in __lock_page_or_retry(). Else + * we release it now. + */ + if (!(fault & VM_FAULT_RETRY)) + up_read(¤t->mm->mmap_sem); + /* Coming from kernel, we need to deal with uaccess fixups */ + if (user_mode(regs)) + return MM_FAULT_RETURN; + return MM_FAULT_ERR(SIGKILL); + } + + /* No fault: be happy */ + if (!(fault & VM_FAULT_ERROR)) + return MM_FAULT_CONTINUE; + + /* Out of memory */ + if (fault & VM_FAULT_OOM) + return out_of_memory(regs); + + /* Bus error. x86 handles HWPOISON here, we'll add this if/when + * we support the feature in HW + */ + if (fault & VM_FAULT_SIGBUS) + return do_sigbus(regs, addr); + + /* We don't understand the fault code, this is fatal */ + BUG(); + return MM_FAULT_CONTINUE; +} /* * For 600- and 800-family processors, the error_code parameter is DSISR @@ -124,11 +200,12 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, { struct vm_area_struct * vma; struct mm_struct *mm = current->mm; - siginfo_t info; + unsigned int flags = FAULT_FLAG_ALLOW_RETRY | FAULT_FLAG_KILLABLE; int code = SEGV_MAPERR; - int is_write = 0, ret; + int is_write = 0; int trap = TRAP(regs); int is_exec = trap == 0x400; + int fault; #if !(defined(CONFIG_4xx) || defined(CONFIG_BOOKE)) /* @@ -145,6 +222,9 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, is_write = error_code & ESR_DST; #endif /* CONFIG_4xx || CONFIG_BOOKE */ + if (is_write) + flags |= FAULT_FLAG_WRITE; + #ifdef CONFIG_PPC_ICSWX /* * we need to do this early because this "data storage @@ -152,13 +232,11 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, * look at it */ if (error_code & ICSWX_DSI_UCT) { - int ret; - - ret = acop_handle_fault(regs, address, error_code); - if (ret) - return ret; + int rc = acop_handle_fault(regs, address, error_code); + if (rc) + return rc; } -#endif +#endif /* CONFIG_PPC_ICSWX */ if (notify_page_fault(regs)) return 0; @@ -179,6 +257,10 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, } #endif + /* We restore the interrupt state now */ + if (!arch_irq_disabled_regs(regs)) + local_irq_enable(); + if (in_atomic() || mm == NULL) { if (!user_mode(regs)) return SIGSEGV; @@ -212,7 +294,15 @@ int __kprobes do_page_fault(struct pt_regs *regs, unsigned long address, if (!user_mode(regs) && !search_exception_tables(regs->nip)) goto bad_area_nosemaphore; +retry: down_read(&mm->mmap_sem); + } else { + /* + * The above down_read_trylock() might have succeeded in + * which case we'll have missed the might_sleep() from + * down_read(): + */ + might_sleep(); } vma = find_vma(mm, address); @@ -327,30 +417,43 @@ good_area: * make sure we exit gracefully rather than endlessly redo * the fault. */ - ret = handle_mm_fault(mm, vma, address, is_write ? FAULT_FLAG_WRITE : 0); - if (unlikely(ret & VM_FAULT_ERROR)) { - if (ret & VM_FAULT_OOM) - goto out_of_memory; - else if (ret & VM_FAULT_SIGBUS) - goto do_sigbus; - BUG(); + fault = handle_mm_fault(mm, vma, address, flags); + if (unlikely(fault & (VM_FAULT_RETRY|VM_FAULT_ERROR))) { + int rc = mm_fault_error(regs, address, fault); + if (rc >= MM_FAULT_RETURN) + return rc; } - if (ret & VM_FAULT_MAJOR) { - current->maj_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, - regs, address); + + /* + * Major/minor page fault accounting is only done on the + * initial attempt. If we go through a retry, it is extremely + * likely that the page will be found in page cache at that point. + */ + if (flags & FAULT_FLAG_ALLOW_RETRY) { + if (fault & VM_FAULT_MAJOR) { + current->maj_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MAJ, 1, + regs, address); #ifdef CONFIG_PPC_SMLPAR - if (firmware_has_feature(FW_FEATURE_CMO)) { - preempt_disable(); - get_lppaca()->page_ins += (1 << PAGE_FACTOR); - preempt_enable(); + if (firmware_has_feature(FW_FEATURE_CMO)) { + preempt_disable(); + get_lppaca()->page_ins += (1 << PAGE_FACTOR); + preempt_enable(); + } +#endif /* CONFIG_PPC_SMLPAR */ + } else { + current->min_flt++; + perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, + regs, address); + } + if (fault & VM_FAULT_RETRY) { + /* Clear FAULT_FLAG_ALLOW_RETRY to avoid any risk + * of starvation. */ + flags &= ~FAULT_FLAG_ALLOW_RETRY; + goto retry; } -#endif - } else { - current->min_flt++; - perf_sw_event(PERF_COUNT_SW_PAGE_FAULTS_MIN, 1, - regs, address); } + up_read(&mm->mmap_sem); return 0; @@ -371,28 +474,6 @@ bad_area_nosemaphore: return SIGSEGV; -/* - * We ran out of memory, or some other thing happened to us that made - * us unable to handle the page fault gracefully. - */ -out_of_memory: - up_read(&mm->mmap_sem); - if (!user_mode(regs)) - return SIGKILL; - pagefault_out_of_memory(); - return 0; - -do_sigbus: - up_read(&mm->mmap_sem); - if (user_mode(regs)) { - info.si_signo = SIGBUS; - info.si_errno = 0; - info.si_code = BUS_ADRERR; - info.si_addr = (void __user *)address; - force_sig_info(SIGBUS, &info, current); - return 0; - } - return SIGBUS; } /* diff --git a/arch/powerpc/mm/fsl_booke_mmu.c b/arch/powerpc/mm/fsl_booke_mmu.c index 66a6fd38e9cd..07ba45b0f07c 100644 --- a/arch/powerpc/mm/fsl_booke_mmu.c +++ b/arch/powerpc/mm/fsl_booke_mmu.c @@ -149,12 +149,19 @@ static void settlbcam(int index, unsigned long virt, phys_addr_t phys, unsigned long calc_cam_sz(unsigned long ram, unsigned long virt, phys_addr_t phys) { - unsigned int camsize = __ilog2(ram) & ~1U; - unsigned int align = __ffs(virt | phys) & ~1U; - unsigned long max_cam = (mfspr(SPRN_TLB1CFG) >> 16) & 0xf; - - /* Convert (4^max) kB to (2^max) bytes */ - max_cam = max_cam * 2 + 10; + unsigned int camsize = __ilog2(ram); + unsigned int align = __ffs(virt | phys); + unsigned long max_cam; + + if ((mfspr(SPRN_MMUCFG) & MMUCFG_MAVN) == MMUCFG_MAVN_V1) { + /* Convert (4^max) kB to (2^max) bytes */ + max_cam = ((mfspr(SPRN_TLB1CFG) >> 16) & 0xf) * 2 + 10; + camsize &= ~1U; + align &= ~1U; + } else { + /* Convert (2^max) kB to (2^max) bytes */ + max_cam = __ilog2(mfspr(SPRN_TLB1PS)) + 10; + } if (camsize > align) camsize = align; diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 2d282186cb45..3e8c37a4e395 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -55,6 +55,8 @@ #include <asm/spu.h> #include <asm/udbg.h> #include <asm/code-patching.h> +#include <asm/fadump.h> +#include <asm/firmware.h> #ifdef DEBUG #define DBG(fmt...) udbg_printf(fmt) @@ -625,6 +627,16 @@ static void __init htab_initialize(void) /* Using a hypervisor which owns the htab */ htab_address = NULL; _SDR1 = 0; +#ifdef CONFIG_FA_DUMP + /* + * If firmware assisted dump is active firmware preserves + * the contents of htab along with entire partition memory. + * Clear the htab if firmware assisted dump is active so + * that we dont end up using old mappings. + */ + if (is_fadump_active() && ppc_md.hpte_clear_all) + ppc_md.hpte_clear_all(); +#endif } else { /* Find storage for the HPT. Must be contiguous in * the absolute address space. On cell we want it to be @@ -745,12 +757,9 @@ void __init early_init_mmu(void) */ htab_initialize(); - /* Initialize stab / SLB management except on iSeries - */ + /* Initialize stab / SLB management */ if (mmu_has_feature(MMU_FTR_SLB)) slb_initialize(); - else if (!firmware_has_feature(FW_FEATURE_ISERIES)) - stab_initialize(get_paca()->stab_real); } #ifdef CONFIG_SMP @@ -761,8 +770,7 @@ void __cpuinit early_init_mmu_secondary(void) mtspr(SPRN_SDR1, _SDR1); /* Initialize STAB/SLB. We use a virtual address as it works - * in real mode on pSeries and we want a virtual address on - * iSeries anyway + * in real mode on pSeries. */ if (mmu_has_feature(MMU_FTR_SLB)) slb_initialize(); diff --git a/arch/powerpc/mm/icswx.c b/arch/powerpc/mm/icswx.c index 5d9a59eaad93..8cdbd8634a58 100644 --- a/arch/powerpc/mm/icswx.c +++ b/arch/powerpc/mm/icswx.c @@ -163,7 +163,7 @@ EXPORT_SYMBOL_GPL(drop_cop); static int acop_use_cop(int ct) { - /* todo */ + /* There is no alternate policy, yet */ return -1; } @@ -227,11 +227,30 @@ int acop_handle_fault(struct pt_regs *regs, unsigned long address, ct = (ccw >> 16) & 0x3f; } + /* + * We could be here because another thread has enabled acop + * but the ACOP register has yet to be updated. + * + * This should have been taken care of by the IPI to sync all + * the threads (see smp_call_function(sync_cop, mm, 1)), but + * that could take forever if there are a significant amount + * of threads. + * + * Given the number of threads on some of these systems, + * perhaps this is the best way to sync ACOP rather than whack + * every thread with an IPI. + */ + if ((acop_copro_type_bit(ct) & current->active_mm->context.acop) != 0) { + sync_cop(current->active_mm); + return 0; + } + + /* check for alternate policy */ if (!acop_use_cop(ct)) return 0; /* at this point the CT is unknown to the system */ - pr_warn("%s[%d]: Coprocessor %d is unavailable", + pr_warn("%s[%d]: Coprocessor %d is unavailable\n", current->comm, current->pid, ct); /* get inst if we don't already have it */ diff --git a/arch/powerpc/mm/icswx.h b/arch/powerpc/mm/icswx.h index 42176bd0884c..6dedc08e62c8 100644 --- a/arch/powerpc/mm/icswx.h +++ b/arch/powerpc/mm/icswx.h @@ -59,4 +59,10 @@ extern void free_cop_pid(int free_pid); extern int acop_handle_fault(struct pt_regs *regs, unsigned long address, unsigned long error_code); + +static inline u64 acop_copro_type_bit(unsigned int type) +{ + return 1ULL << (63 - type); +} + #endif /* !_ARCH_POWERPC_MM_ICSWX_H_ */ diff --git a/arch/powerpc/mm/pgtable_32.c b/arch/powerpc/mm/pgtable_32.c index 51f87956f8f8..0907f92ce309 100644 --- a/arch/powerpc/mm/pgtable_32.c +++ b/arch/powerpc/mm/pgtable_32.c @@ -207,7 +207,7 @@ __ioremap_caller(phys_addr_t addr, unsigned long size, unsigned long flags, */ if (mem_init_done && (p < virt_to_phys(high_memory)) && !(__allow_ioremap_reserved && memblock_is_region_reserved(p, size))) { - printk("__ioremap(): phys addr 0x%llx is RAM lr %p\n", + printk("__ioremap(): phys addr 0x%llx is RAM lr %pf\n", (unsigned long long)p, __builtin_return_address(0)); return NULL; } diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index e22276cb67a4..a538c80db2df 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -21,7 +21,6 @@ #include <asm/cputable.h> #include <asm/cacheflush.h> #include <asm/smp.h> -#include <asm/firmware.h> #include <linux/compiler.h> #include <asm/udbg.h> #include <asm/code-patching.h> @@ -307,11 +306,6 @@ void slb_initialize(void) get_paca()->stab_rr = SLB_NUM_BOLTED; - /* On iSeries the bolted entries have already been set up by - * the hypervisor from the lparMap data in head.S */ - if (firmware_has_feature(FW_FEATURE_ISERIES)) - return; - lflags = SLB_VSID_KERNEL | linear_llp; vflags = SLB_VSID_KERNEL | vmalloc_llp; diff --git a/arch/powerpc/mm/slb_low.S b/arch/powerpc/mm/slb_low.S index ef653dc95b65..b9ee79ce2200 100644 --- a/arch/powerpc/mm/slb_low.S +++ b/arch/powerpc/mm/slb_low.S @@ -217,21 +217,6 @@ slb_finish_load: * free slot first but that took too long. Unfortunately we * dont have any LRU information to help us choose a slot. */ -#ifdef CONFIG_PPC_ISERIES -BEGIN_FW_FTR_SECTION - /* - * On iSeries, the "bolted" stack segment can be cast out on - * shared processor switch so we need to check for a miss on - * it and restore it to the right slot. - */ - ld r9,PACAKSAVE(r13) - clrrdi r9,r9,28 - clrrdi r3,r3,28 - li r10,SLB_NUM_BOLTED-1 /* Stack goes in last bolted slot */ - cmpld r9,r3 - beq 3f -END_FW_FTR_SECTION_IFSET(FW_FEATURE_ISERIES) -#endif /* CONFIG_PPC_ISERIES */ 7: ld r10,PACASTABRR(r13) addi r10,r10,1 @@ -282,7 +267,6 @@ _GLOBAL(slb_compare_rr_to_size) /* * Finish loading of a 1T SLB entry (for the kernel linear mapping) and return. - * We assume legacy iSeries will never have 1T segments. * * r3 = EA, r10 = proto-VSID, r11 = flags, clobbers r9 */ diff --git a/arch/powerpc/mm/stab.c b/arch/powerpc/mm/stab.c index 41e31642a86a..9106ebb118f5 100644 --- a/arch/powerpc/mm/stab.c +++ b/arch/powerpc/mm/stab.c @@ -21,8 +21,6 @@ #include <asm/cputable.h> #include <asm/prom.h> #include <asm/abs_addr.h> -#include <asm/firmware.h> -#include <asm/iseries/hv_call.h> struct stab_entry { unsigned long esid_data; @@ -285,12 +283,5 @@ void stab_initialize(unsigned long stab) /* Set ASR */ stabreal = get_paca()->stab_real | 0x1ul; -#ifdef CONFIG_PPC_ISERIES - if (firmware_has_feature(FW_FEATURE_ISERIES)) { - HvCall1(HvCallBaseSetASR, stabreal); - return; - } -#endif /* CONFIG_PPC_ISERIES */ - mtspr(SPRN_ASR, stabreal); } |