diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-20 11:48:06 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-09-20 11:48:06 -0700 |
commit | 45824fc0da6e46cc5d563105e1eaaf3098a686f9 (patch) | |
tree | 8e57c1f18104ed5f0d74d9eed9dc0365b3c137b8 /arch/powerpc/mm/book3s64/pgtable.c | |
parent | 8c2b418c3f95a488f5226870eee68574d323f0f8 (diff) | |
parent | d9101bfa6adc831bda8836c4d774820553c14942 (diff) | |
download | linux-45824fc0da6e46cc5d563105e1eaaf3098a686f9.tar.bz2 |
Merge tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman:
"This is a bit late, partly due to me travelling, and partly due to a
power outage knocking out some of my test systems *while* I was
travelling.
- Initial support for running on a system with an Ultravisor, which
is software that runs below the hypervisor and protects guests
against some attacks by the hypervisor.
- Support for building the kernel to run as a "Secure Virtual
Machine", ie. as a guest capable of running on a system with an
Ultravisor.
- Some changes to our DMA code on bare metal, to allow devices with
medium sized DMA masks (> 32 && < 59 bits) to use more than 2GB of
DMA space.
- Support for firmware assisted crash dumps on bare metal (powernv).
- Two series fixing bugs in and refactoring our PCI EEH code.
- A large series refactoring our exception entry code to use gas
macros, both to make it more readable and also enable some future
optimisations.
As well as many cleanups and other minor features & fixups.
Thanks to: Adam Zerella, Alexey Kardashevskiy, Alistair Popple, Andrew
Donnellan, Aneesh Kumar K.V, Anju T Sudhakar, Anshuman Khandual,
Balbir Singh, Benjamin Herrenschmidt, Cédric Le Goater, Christophe
JAILLET, Christophe Leroy, Christopher M. Riedl, Christoph Hellwig,
Claudio Carvalho, Daniel Axtens, David Gibson, David Hildenbrand,
Desnes A. Nunes do Rosario, Ganesh Goudar, Gautham R. Shenoy, Greg
Kurz, Guerney Hunt, Gustavo Romero, Halil Pasic, Hari Bathini, Joakim
Tjernlund, Jonathan Neuschafer, Jordan Niethe, Leonardo Bras, Lianbo
Jiang, Madhavan Srinivasan, Mahesh Salgaonkar, Mahesh Salgaonkar,
Masahiro Yamada, Maxiwell S. Garcia, Michael Anderson, Nathan
Chancellor, Nathan Lynch, Naveen N. Rao, Nicholas Piggin, Oliver
O'Halloran, Qian Cai, Ram Pai, Ravi Bangoria, Reza Arbab, Ryan Grimm,
Sam Bobroff, Santosh Sivaraj, Segher Boessenkool, Sukadev Bhattiprolu,
Thiago Bauermann, Thiago Jung Bauermann, Thomas Gleixner, Tom
Lendacky, Vasant Hegde"
* tag 'powerpc-5.4-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (264 commits)
powerpc/mm/mce: Keep irqs disabled during lockless page table walk
powerpc: Use ftrace_graph_ret_addr() when unwinding
powerpc/ftrace: Enable HAVE_FUNCTION_GRAPH_RET_ADDR_PTR
ftrace: Look up the address of return_to_handler() using helpers
powerpc: dump kernel log before carrying out fadump or kdump
docs: powerpc: Add missing documentation reference
powerpc/xmon: Fix output of XIVE IPI
powerpc/xmon: Improve output of XIVE interrupts
powerpc/mm/radix: remove useless kernel messages
powerpc/fadump: support holes in kernel boot memory area
powerpc/fadump: remove RMA_START and RMA_END macros
powerpc/fadump: update documentation about option to release opalcore
powerpc/fadump: consider f/w load area
powerpc/opalcore: provide an option to invalidate /sys/firmware/opal/core file
powerpc/opalcore: export /sys/firmware/opal/core for analysing opal crashes
powerpc/fadump: update documentation about CONFIG_PRESERVE_FA_DUMP
powerpc/fadump: add support to preserve crash data on FADUMP disabled kernel
powerpc/fadump: improve how crashed kernel's memory is reserved
powerpc/fadump: consider reserved ranges while releasing memory
powerpc/fadump: make crash memory ranges array allocation generic
...
Diffstat (limited to 'arch/powerpc/mm/book3s64/pgtable.c')
-rw-r--r-- | arch/powerpc/mm/book3s64/pgtable.c | 121 |
1 files changed, 85 insertions, 36 deletions
diff --git a/arch/powerpc/mm/book3s64/pgtable.c b/arch/powerpc/mm/book3s64/pgtable.c index 7d0e0d0d22c4..75483b40fcb1 100644 --- a/arch/powerpc/mm/book3s64/pgtable.c +++ b/arch/powerpc/mm/book3s64/pgtable.c @@ -8,10 +8,13 @@ #include <linux/memblock.h> #include <misc/cxl-base.h> +#include <asm/debugfs.h> #include <asm/pgalloc.h> #include <asm/tlb.h> #include <asm/trace.h> #include <asm/powernv.h> +#include <asm/firmware.h> +#include <asm/ultravisor.h> #include <mm/mmu_decl.h> #include <trace/events/thp.h> @@ -21,9 +24,6 @@ EXPORT_SYMBOL(__pmd_frag_nr); unsigned long __pmd_frag_size_shift; EXPORT_SYMBOL(__pmd_frag_size_shift); -int (*register_process_table)(unsigned long base, unsigned long page_size, - unsigned long tbl_size); - #ifdef CONFIG_TRANSPARENT_HUGEPAGE /* * This is called when relaxing access to a hugepage. It's also called in the page @@ -205,37 +205,61 @@ void __init mmu_partition_table_init(void) * 64 K size. */ ptcr = __pa(partition_tb) | (PATB_SIZE_SHIFT - 12); - mtspr(SPRN_PTCR, ptcr); + set_ptcr_when_no_uv(ptcr); powernv_set_nmmu_ptcr(ptcr); } +static void flush_partition(unsigned int lpid, bool radix) +{ + if (radix) { + radix__flush_all_lpid(lpid); + radix__flush_all_lpid_guest(lpid); + } else { + asm volatile("ptesync" : : : "memory"); + asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : + "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); + /* do we need fixup here ?*/ + asm volatile("eieio; tlbsync; ptesync" : : : "memory"); + trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); + } +} + void mmu_partition_table_set_entry(unsigned int lpid, unsigned long dw0, - unsigned long dw1) + unsigned long dw1, bool flush) { unsigned long old = be64_to_cpu(partition_tb[lpid].patb0); + /* + * When ultravisor is enabled, the partition table is stored in secure + * memory and can only be accessed doing an ultravisor call. However, we + * maintain a copy of the partition table in normal memory to allow Nest + * MMU translations to occur (for normal VMs). + * + * Therefore, here we always update partition_tb, regardless of whether + * we are running under an ultravisor or not. + */ partition_tb[lpid].patb0 = cpu_to_be64(dw0); partition_tb[lpid].patb1 = cpu_to_be64(dw1); /* - * Global flush of TLBs and partition table caches for this lpid. - * The type of flush (hash or radix) depends on what the previous - * use of this partition ID was, not the new use. + * If ultravisor is enabled, we do an ultravisor call to register the + * partition table entry (PATE), which also do a global flush of TLBs + * and partition table caches for the lpid. Otherwise, just do the + * flush. The type of flush (hash or radix) depends on what the previous + * use of the partition ID was, not the new use. */ - asm volatile("ptesync" : : : "memory"); - if (old & PATB_HR) { - asm volatile(PPC_TLBIE_5(%0,%1,2,0,1) : : - "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); - asm volatile(PPC_TLBIE_5(%0,%1,2,1,1) : : - "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); - trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 1); - } else { - asm volatile(PPC_TLBIE_5(%0,%1,2,0,0) : : - "r" (TLBIEL_INVAL_SET_LPID), "r" (lpid)); - trace_tlbie(lpid, 0, TLBIEL_INVAL_SET_LPID, lpid, 2, 0, 0); + if (firmware_has_feature(FW_FEATURE_ULTRAVISOR)) { + uv_register_pate(lpid, dw0, dw1); + pr_info("PATE registered by ultravisor: dw0 = 0x%lx, dw1 = 0x%lx\n", + dw0, dw1); + } else if (flush) { + /* + * Boot does not need to flush, because MMU is off and each + * CPU does a tlbiel_all() before switching them on, which + * flushes everything. + */ + flush_partition(lpid, (old & PATB_HR)); } - /* do we need fixup here ?*/ - asm volatile("eieio; tlbsync; ptesync" : : : "memory"); } EXPORT_SYMBOL_GPL(mmu_partition_table_set_entry); @@ -447,23 +471,48 @@ int pmd_move_must_withdraw(struct spinlock *new_pmd_ptl, return true; } -int ioremap_range(unsigned long ea, phys_addr_t pa, unsigned long size, pgprot_t prot, int nid) -{ - unsigned long i; +/* + * Does the CPU support tlbie? + */ +bool tlbie_capable __read_mostly = true; +EXPORT_SYMBOL(tlbie_capable); - if (radix_enabled()) - return radix__ioremap_range(ea, pa, size, prot, nid); - - for (i = 0; i < size; i += PAGE_SIZE) { - int err = map_kernel_page(ea + i, pa + i, prot); - if (err) { - if (slab_is_available()) - unmap_kernel_range(ea, size); - else - WARN_ON_ONCE(1); /* Should clean up */ - return err; - } +/* + * Should tlbie be used for management of CPU TLBs, for kernel and process + * address spaces? tlbie may still be used for nMMU accelerators, and for KVM + * guest address spaces. + */ +bool tlbie_enabled __read_mostly = true; + +static int __init setup_disable_tlbie(char *str) +{ + if (!radix_enabled()) { + pr_err("disable_tlbie: Unable to disable TLBIE with Hash MMU.\n"); + return 1; } + tlbie_capable = false; + tlbie_enabled = false; + + return 1; +} +__setup("disable_tlbie", setup_disable_tlbie); + +static int __init pgtable_debugfs_setup(void) +{ + if (!tlbie_capable) + return 0; + + /* + * There is no locking vs tlb flushing when changing this value. + * The tlb flushers will see one value or another, and use either + * tlbie or tlbiel with IPIs. In both cases the TLBs will be + * invalidated as expected. + */ + debugfs_create_bool("tlbie_enabled", 0600, + powerpc_debugfs_root, + &tlbie_enabled); + return 0; } +arch_initcall(pgtable_debugfs_setup); |