summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/platforms
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 13:34:25 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2020-12-17 13:34:25 -0800
commit8a5be36b9303ae167468d4f5e1b3c090b9981396 (patch)
treeddf1721677782484bab6369a87f13611eafb879a /arch/powerpc/platforms
parent09c0796adf0c793462fda1d7c8c43324551405c7 (diff)
parentc1bea0a840ac75dca19bc6aa05575a33eb9fd058 (diff)
downloadlinux-8a5be36b9303ae167468d4f5e1b3c090b9981396.tar.bz2
Merge tag 'powerpc-5.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux
Pull powerpc updates from Michael Ellerman: - Switch to the generic C VDSO, as well as some cleanups of our VDSO setup/handling code. - Support for KUAP (Kernel User Access Prevention) on systems using the hashed page table MMU, using memory protection keys. - Better handling of PowerVM SMT8 systems where all threads of a core do not share an L2, allowing the scheduler to make better scheduling decisions. - Further improvements to our machine check handling. - Show registers when unwinding interrupt frames during stack traces. - Improvements to our pseries (PowerVM) partition migration code. - Several series from Christophe refactoring and cleaning up various parts of the 32-bit code. - Other smaller features, fixes & cleanups. Thanks to: Alan Modra, Alexey Kardashevskiy, Andrew Donnellan, Aneesh Kumar K.V, Ard Biesheuvel, Athira Rajeev, Balamuruhan S, Bill Wendling, Cédric Le Goater, Christophe Leroy, Christophe Lombard, Colin Ian King, Daniel Axtens, David Hildenbrand, Frederic Barrat, Ganesh Goudar, Gautham R. Shenoy, Geert Uytterhoeven, Giuseppe Sacco, Greg Kurz, Harish, Jan Kratochvil, Jordan Niethe, Kaixu Xia, Laurent Dufour, Leonardo Bras, Madhavan Srinivasan, Mahesh Salgaonkar, Mathieu Desnoyers, Nathan Lynch, Nicholas Piggin, Oleg Nesterov, Oliver O'Halloran, Oscar Salvador, Po-Hsu Lin, Qian Cai, Qinglang Miao, Randy Dunlap, Ravi Bangoria, Sachin Sant, Sandipan Das, Sebastian Andrzej Siewior , Segher Boessenkool, Srikar Dronamraju, Tyrel Datwyler, Uwe Kleine-König, Vincent Stehlé, Youling Tang, and Zhang Xiaoxu. * tag 'powerpc-5.11-1' of git://git.kernel.org/pub/scm/linux/kernel/git/powerpc/linux: (304 commits) powerpc/32s: Fix cleanup_cpu_mmu_context() compile bug powerpc: Add config fragment for disabling -Werror powerpc/configs: Add ppc64le_allnoconfig target powerpc/powernv: Rate limit opal-elog read failure message powerpc/pseries/memhotplug: Quieten some DLPAR operations powerpc/ps3: use dma_mapping_error() powerpc: force inlining of csum_partial() to avoid multiple csum_partial() with GCC10 powerpc/perf: Fix Threshold Event Counter Multiplier width for P10 powerpc/mm: Fix hugetlb_free_pmd_range() and hugetlb_free_pud_range() KVM: PPC: Book3S HV: Fix mask size for emulated msgsndp KVM: PPC: fix comparison to bool warning KVM: PPC: Book3S: Assign boolean values to a bool variable powerpc: Inline setup_kup() powerpc/64s: Mark the kuap/kuep functions non __init KVM: PPC: Book3S HV: XIVE: Add a comment regarding VP numbering powerpc/xive: Improve error reporting of OPAL calls powerpc/xive: Simplify xive_do_source_eoi() powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_EOI_FW powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_MASK_FW powerpc/xive: Remove P9 DD1 flag XIVE_IRQ_FLAG_SHIFT_BUG ...
Diffstat (limited to 'arch/powerpc/platforms')
-rw-r--r--arch/powerpc/platforms/85xx/corenet_generic.c3
-rw-r--r--arch/powerpc/platforms/8xx/Kconfig7
-rw-r--r--arch/powerpc/platforms/8xx/micropatch.c11
-rw-r--r--arch/powerpc/platforms/Kconfig.cputype57
-rw-r--r--arch/powerpc/platforms/powermac/sleep.S132
-rw-r--r--arch/powerpc/platforms/powernv/Kconfig8
-rw-r--r--arch/powerpc/platforms/powernv/idle.c3
-rw-r--r--arch/powerpc/platforms/powernv/memtrace.c175
-rw-r--r--arch/powerpc/platforms/powernv/npu-dma.c16
-rw-r--r--arch/powerpc/platforms/powernv/ocxl.c114
-rw-r--r--arch/powerpc/platforms/powernv/opal-elog.c4
-rw-r--r--arch/powerpc/platforms/powernv/opal-hmi.c29
-rw-r--r--arch/powerpc/platforms/powernv/opal.c2
-rw-r--r--arch/powerpc/platforms/powernv/pci-ioda.c4
-rw-r--r--arch/powerpc/platforms/powernv/pci-sriov.c2
-rw-r--r--arch/powerpc/platforms/ps3/system-bus.c5
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-cpu.c1
-rw-r--r--arch/powerpc/platforms/pseries/hotplug-memory.c12
-rw-r--r--arch/powerpc/platforms/pseries/mobility.c358
-rw-r--r--arch/powerpc/platforms/pseries/pci.c51
-rw-r--r--arch/powerpc/platforms/pseries/ras.c5
-rw-r--r--arch/powerpc/platforms/pseries/smp.c3
-rw-r--r--arch/powerpc/platforms/pseries/suspend.c79
23 files changed, 693 insertions, 388 deletions
diff --git a/arch/powerpc/platforms/85xx/corenet_generic.c b/arch/powerpc/platforms/85xx/corenet_generic.c
index 6aa8defb5857..8d6029099848 100644
--- a/arch/powerpc/platforms/85xx/corenet_generic.c
+++ b/arch/powerpc/platforms/85xx/corenet_generic.c
@@ -106,6 +106,7 @@ int __init corenet_gen_publish_devices(void)
{
return of_platform_bus_probe(NULL, of_device_ids, NULL);
}
+machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
static const char * const boards[] __initconst = {
"fsl,P2041RDB",
@@ -206,5 +207,3 @@ define_machine(corenet_generic) {
.power_save = e500_idle,
#endif
};
-
-machine_arch_initcall(corenet_generic, corenet_gen_publish_devices);
diff --git a/arch/powerpc/platforms/8xx/Kconfig b/arch/powerpc/platforms/8xx/Kconfig
index abb2b45b2789..60cc5b537a98 100644
--- a/arch/powerpc/platforms/8xx/Kconfig
+++ b/arch/powerpc/platforms/8xx/Kconfig
@@ -194,13 +194,6 @@ config PIN_TLB_IMMR
CONFIG_PIN_TLB_DATA is also selected, it will reduce
CONFIG_PIN_TLB_DATA to 24 Mbytes.
-config PIN_TLB_TEXT
- bool "Pinned TLB for TEXT"
- depends on PIN_TLB
- default y
- help
- This pins kernel text with 8M pages.
-
endmenu
endmenu
diff --git a/arch/powerpc/platforms/8xx/micropatch.c b/arch/powerpc/platforms/8xx/micropatch.c
index aed4bc75f352..aef179fcbd4f 100644
--- a/arch/powerpc/platforms/8xx/micropatch.c
+++ b/arch/powerpc/platforms/8xx/micropatch.c
@@ -360,6 +360,17 @@ void __init cpm_load_patch(cpm8xx_t *cp)
if (IS_ENABLED(CONFIG_SMC_UCODE_PATCH)) {
smc_uart_t *smp;
+ if (IS_ENABLED(CONFIG_PPC_EARLY_DEBUG_CPM)) {
+ int i;
+
+ for (i = 0; i < sizeof(*smp); i += 4) {
+ u32 __iomem *src = (u32 __iomem *)&cp->cp_dparam[PROFF_SMC1 + i];
+ u32 __iomem *dst = (u32 __iomem *)&cp->cp_dparam[PROFF_DSP1 + i];
+
+ out_be32(dst, in_be32(src));
+ }
+ }
+
smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC1];
out_be16(&smp->smc_rpbase, 0x1ec0);
smp = (smc_uart_t *)&cp->cp_dparam[PROFF_SMC2];
diff --git a/arch/powerpc/platforms/Kconfig.cputype b/arch/powerpc/platforms/Kconfig.cputype
index c194c4ae8bc7..3ce907523b1e 100644
--- a/arch/powerpc/platforms/Kconfig.cputype
+++ b/arch/powerpc/platforms/Kconfig.cputype
@@ -11,9 +11,6 @@ config PPC64
This option selects whether a 32-bit or a 64-bit kernel
will be built.
-config PPC_BOOK3S_32
- bool
-
menu "Processor support"
choice
prompt "Processor Type"
@@ -23,20 +20,19 @@ choice
The most common ones are the desktop and server CPUs (603,
604, 740, 750, 74xx) CPUs from Freescale and IBM, with their
embedded 512x/52xx/82xx/83xx/86xx counterparts.
- The other embedded parts, namely 4xx, 8xx, e200 (55xx) and e500
+ The other embedded parts, namely 4xx, 8xx and e500
(85xx) each form a family of their own that is not compatible
with the others.
If unsure, select 52xx/6xx/7xx/74xx/82xx/83xx/86xx.
-config PPC_BOOK3S_6xx
+config PPC_BOOK3S_32
bool "512x/52xx/6xx/7xx/74xx/82xx/83xx/86xx"
- select PPC_BOOK3S_32
- select PPC_FPU
+ imply PPC_FPU
select PPC_HAVE_PMU_SUPPORT
select PPC_HAVE_KUEP
select PPC_HAVE_KUAP
- select HAVE_ARCH_VMAP_STACK if !ADB_PMU
+ select HAVE_ARCH_VMAP_STACK
config PPC_85xx
bool "Freescale 85xx"
@@ -66,11 +62,24 @@ config 44x
select HAVE_PCI
select PHYS_64BIT
-config E200
- bool "Freescale e200"
-
endchoice
+config PPC_BOOK3S_603
+ bool "Support for 603 SW loaded TLB"
+ depends on PPC_BOOK3S_32
+ default y
+ help
+ Provide support for processors based on the 603 cores. Those
+ processors don't have a HASH MMU and provide SW TLB loading.
+
+config PPC_BOOK3S_604
+ bool "Support for 604+ HASH MMU" if PPC_BOOK3S_603
+ depends on PPC_BOOK3S_32
+ default y
+ help
+ Provide support for processors not based on the 603 cores.
+ Those processors have a HASH MMU.
+
choice
prompt "Processor Type"
depends on PPC64
@@ -218,9 +227,20 @@ config PPC_E500MC
such as e5500/e6500), and must be disabled for running on
e500v1 or e500v2.
-config PPC_FPU
+config PPC_FPU_REGS
bool
+
+config PPC_FPU
+ bool "Support for Floating Point Unit (FPU)" if PPC_MPC832x
default y if PPC64
+ select PPC_FPU_REGS
+ help
+ This must be enabled to support the Floating Point Unit
+ Most 6xx have an FPU but e300c2 core (mpc832x) don't have
+ an FPU, so when building an embedded kernel for that target
+ you can disable FPU support.
+
+ If unsure say Y.
config FSL_EMB_PERFMON
bool "Freescale Embedded Perfmon"
@@ -247,12 +267,12 @@ config 4xx
config BOOKE
bool
- depends on E200 || E500 || 44x || PPC_BOOK3E
+ depends on E500 || 44x || PPC_BOOK3E
default y
config FSL_BOOKE
bool
- depends on (E200 || E500) && PPC32
+ depends on E500 && PPC32
default y
# this is for common code between PPC32 & PPC64 FSL BOOKE
@@ -317,7 +337,7 @@ config VSX
config SPE_POSSIBLE
def_bool y
- depends on E200 || (E500 && !PPC_E500MC)
+ depends on E500 && !PPC_E500MC
config SPE
bool "SPE Support"
@@ -395,6 +415,11 @@ config PPC_KUAP_DEBUG
Add extra debugging for Kernel Userspace Access Protection (KUAP)
If you're unsure, say N.
+config PPC_PKEY
+ def_bool y
+ depends on PPC_BOOK3S_64
+ depends on PPC_MEM_KEYS || PPC_KUAP || PPC_KUEP
+
config ARCH_ENABLE_HUGEPAGE_MIGRATION
def_bool y
depends on PPC_BOOK3S_64 && HUGETLB_PAGE && MIGRATION
@@ -464,7 +489,7 @@ config NR_CPUS
config NOT_COHERENT_CACHE
bool
- depends on 4xx || PPC_8xx || E200 || PPC_MPC512x || \
+ depends on 4xx || PPC_8xx || PPC_MPC512x || \
GAMECUBE_COMMON || AMIGAONE
select ARCH_HAS_DMA_PREP_COHERENT
select ARCH_HAS_SYNC_DMA_FOR_DEVICE
diff --git a/arch/powerpc/platforms/powermac/sleep.S b/arch/powerpc/platforms/powermac/sleep.S
index 7e0f8ba6e54a..d497a60003d2 100644
--- a/arch/powerpc/platforms/powermac/sleep.S
+++ b/arch/powerpc/platforms/powermac/sleep.S
@@ -44,7 +44,8 @@
#define SL_TB 0xa0
#define SL_R2 0xa8
#define SL_CR 0xac
-#define SL_R12 0xb0 /* r12 to r31 */
+#define SL_LR 0xb0
+#define SL_R12 0xb4 /* r12 to r31 */
#define SL_SIZE (SL_R12 + 80)
.section .text
@@ -63,105 +64,107 @@ _GLOBAL(low_sleep_handler)
blr
#else
mflr r0
- stw r0,4(r1)
- stwu r1,-SL_SIZE(r1)
+ lis r11,sleep_storage@ha
+ addi r11,r11,sleep_storage@l
+ stw r0,SL_LR(r11)
mfcr r0
- stw r0,SL_CR(r1)
- stw r2,SL_R2(r1)
- stmw r12,SL_R12(r1)
+ stw r0,SL_CR(r11)
+ stw r1,SL_SP(r11)
+ stw r2,SL_R2(r11)
+ stmw r12,SL_R12(r11)
/* Save MSR & SDR1 */
mfmsr r4
- stw r4,SL_MSR(r1)
+ stw r4,SL_MSR(r11)
mfsdr1 r4
- stw r4,SL_SDR1(r1)
+ stw r4,SL_SDR1(r11)
/* Get a stable timebase and save it */
1: mftbu r4
- stw r4,SL_TB(r1)
+ stw r4,SL_TB(r11)
mftb r5
- stw r5,SL_TB+4(r1)
+ stw r5,SL_TB+4(r11)
mftbu r3
cmpw r3,r4
bne 1b
/* Save SPRGs */
mfsprg r4,0
- stw r4,SL_SPRG0(r1)
+ stw r4,SL_SPRG0(r11)
mfsprg r4,1
- stw r4,SL_SPRG0+4(r1)
+ stw r4,SL_SPRG0+4(r11)
mfsprg r4,2
- stw r4,SL_SPRG0+8(r1)
+ stw r4,SL_SPRG0+8(r11)
mfsprg r4,3
- stw r4,SL_SPRG0+12(r1)
+ stw r4,SL_SPRG0+12(r11)
/* Save BATs */
mfdbatu r4,0
- stw r4,SL_DBAT0(r1)
+ stw r4,SL_DBAT0(r11)
mfdbatl r4,0
- stw r4,SL_DBAT0+4(r1)
+ stw r4,SL_DBAT0+4(r11)
mfdbatu r4,1
- stw r4,SL_DBAT1(r1)
+ stw r4,SL_DBAT1(r11)
mfdbatl r4,1
- stw r4,SL_DBAT1+4(r1)
+ stw r4,SL_DBAT1+4(r11)
mfdbatu r4,2
- stw r4,SL_DBAT2(r1)
+ stw r4,SL_DBAT2(r11)
mfdbatl r4,2
- stw r4,SL_DBAT2+4(r1)
+ stw r4,SL_DBAT2+4(r11)
mfdbatu r4,3
- stw r4,SL_DBAT3(r1)
+ stw r4,SL_DBAT3(r11)
mfdbatl r4,3
- stw r4,SL_DBAT3+4(r1)
+ stw r4,SL_DBAT3+4(r11)
mfibatu r4,0
- stw r4,SL_IBAT0(r1)
+ stw r4,SL_IBAT0(r11)
mfibatl r4,0
- stw r4,SL_IBAT0+4(r1)
+ stw r4,SL_IBAT0+4(r11)
mfibatu r4,1
- stw r4,SL_IBAT1(r1)
+ stw r4,SL_IBAT1(r11)
mfibatl r4,1
- stw r4,SL_IBAT1+4(r1)
+ stw r4,SL_IBAT1+4(r11)
mfibatu r4,2
- stw r4,SL_IBAT2(r1)
+ stw r4,SL_IBAT2(r11)
mfibatl r4,2
- stw r4,SL_IBAT2+4(r1)
+ stw r4,SL_IBAT2+4(r11)
mfibatu r4,3
- stw r4,SL_IBAT3(r1)
+ stw r4,SL_IBAT3(r11)
mfibatl r4,3
- stw r4,SL_IBAT3+4(r1)
+ stw r4,SL_IBAT3+4(r11)
BEGIN_MMU_FTR_SECTION
mfspr r4,SPRN_DBAT4U
- stw r4,SL_DBAT4(r1)
+ stw r4,SL_DBAT4(r11)
mfspr r4,SPRN_DBAT4L
- stw r4,SL_DBAT4+4(r1)
+ stw r4,SL_DBAT4+4(r11)
mfspr r4,SPRN_DBAT5U
- stw r4,SL_DBAT5(r1)
+ stw r4,SL_DBAT5(r11)
mfspr r4,SPRN_DBAT5L
- stw r4,SL_DBAT5+4(r1)
+ stw r4,SL_DBAT5+4(r11)
mfspr r4,SPRN_DBAT6U
- stw r4,SL_DBAT6(r1)
+ stw r4,SL_DBAT6(r11)
mfspr r4,SPRN_DBAT6L
- stw r4,SL_DBAT6+4(r1)
+ stw r4,SL_DBAT6+4(r11)
mfspr r4,SPRN_DBAT7U
- stw r4,SL_DBAT7(r1)
+ stw r4,SL_DBAT7(r11)
mfspr r4,SPRN_DBAT7L
- stw r4,SL_DBAT7+4(r1)
+ stw r4,SL_DBAT7+4(r11)
mfspr r4,SPRN_IBAT4U
- stw r4,SL_IBAT4(r1)
+ stw r4,SL_IBAT4(r11)
mfspr r4,SPRN_IBAT4L
- stw r4,SL_IBAT4+4(r1)
+ stw r4,SL_IBAT4+4(r11)
mfspr r4,SPRN_IBAT5U
- stw r4,SL_IBAT5(r1)
+ stw r4,SL_IBAT5(r11)
mfspr r4,SPRN_IBAT5L
- stw r4,SL_IBAT5+4(r1)
+ stw r4,SL_IBAT5+4(r11)
mfspr r4,SPRN_IBAT6U
- stw r4,SL_IBAT6(r1)
+ stw r4,SL_IBAT6(r11)
mfspr r4,SPRN_IBAT6L
- stw r4,SL_IBAT6+4(r1)
+ stw r4,SL_IBAT6+4(r11)
mfspr r4,SPRN_IBAT7U
- stw r4,SL_IBAT7(r1)
+ stw r4,SL_IBAT7(r11)
mfspr r4,SPRN_IBAT7L
- stw r4,SL_IBAT7+4(r1)
+ stw r4,SL_IBAT7+4(r11)
END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
/* Backup various CPU config stuffs */
@@ -180,9 +183,9 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
lis r5,grackle_wake_up@ha
addi r5,r5,grackle_wake_up@l
tophys(r5,r5)
- stw r5,SL_PC(r1)
+ stw r5,SL_PC(r11)
lis r4,KERNELBASE@h
- tophys(r5,r1)
+ tophys(r5,r11)
addi r5,r5,SL_PC
lis r6,MAGIC@ha
addi r6,r6,MAGIC@l
@@ -194,12 +197,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
tophys(r3,r3)
stw r3,0x80(r4)
stw r5,0x84(r4)
- /* Store a pointer to our backup storage into
- * a kernel global
- */
- lis r3,sleep_storage@ha
- addi r3,r3,sleep_storage@l
- stw r5,0(r3)
.globl low_cpu_offline_self
low_cpu_offline_self:
@@ -279,7 +276,7 @@ _GLOBAL(core99_wake_up)
lis r3,sleep_storage@ha
addi r3,r3,sleep_storage@l
tophys(r3,r3)
- lwz r1,0(r3)
+ addi r1,r3,SL_PC
/* Pass thru to older resume code ... */
_ASM_NOKPROBE_SYMBOL(core99_wake_up)
@@ -399,13 +396,6 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
blt 1b
sync
- /* restore the MSR and turn on the MMU */
- lwz r3,SL_MSR(r1)
- bl turn_on_mmu
-
- /* get back the stack pointer */
- tovirt(r1,r1)
-
/* Restore TB */
li r3,0
mttbl r3
@@ -419,28 +409,24 @@ END_MMU_FTR_SECTION_IFSET(MMU_FTR_USE_HIGH_BATS)
mtcr r0
lwz r2,SL_R2(r1)
lmw r12,SL_R12(r1)
- addi r1,r1,SL_SIZE
- lwz r0,4(r1)
- mtlr r0
- blr
-_ASM_NOKPROBE_SYMBOL(grackle_wake_up)
-turn_on_mmu:
- mflr r4
- tovirt(r4,r4)
+ /* restore the MSR and SP and turn on the MMU and return */
+ lwz r3,SL_MSR(r1)
+ lwz r4,SL_LR(r1)
+ lwz r1,SL_SP(r1)
mtsrr0 r4
mtsrr1 r3
sync
isync
rfi
-_ASM_NOKPROBE_SYMBOL(turn_on_mmu)
+_ASM_NOKPROBE_SYMBOL(grackle_wake_up)
#endif /* defined(CONFIG_PM) || defined(CONFIG_CPU_FREQ) */
- .section .data
+ .section .bss
.balign L1_CACHE_BYTES
sleep_storage:
- .long 0
+ .space SL_SIZE
.balign L1_CACHE_BYTES, 0
#endif /* CONFIG_PPC_BOOK3S_32 */
diff --git a/arch/powerpc/platforms/powernv/Kconfig b/arch/powerpc/platforms/powernv/Kconfig
index 938803eab0ad..619b093a0657 100644
--- a/arch/powerpc/platforms/powernv/Kconfig
+++ b/arch/powerpc/platforms/powernv/Kconfig
@@ -27,11 +27,11 @@ config OPAL_PRD
recovery diagnostics on OpenPower machines
config PPC_MEMTRACE
- bool "Enable removal of RAM from kernel mappings for tracing"
- depends on PPC_POWERNV && MEMORY_HOTREMOVE
+ bool "Enable runtime allocation of RAM for tracing"
+ depends on PPC_POWERNV && MEMORY_HOTPLUG && CONTIG_ALLOC
help
- Enabling this option allows for the removal of memory (RAM)
- from the kernel mappings to be used for hardware tracing.
+ Enabling this option allows for runtime allocation of memory (RAM)
+ for hardware tracing.
config PPC_VAS
bool "IBM Virtual Accelerator Switchboard (VAS)"
diff --git a/arch/powerpc/platforms/powernv/idle.c b/arch/powerpc/platforms/powernv/idle.c
index 1ed7c5286487..e6f461812856 100644
--- a/arch/powerpc/platforms/powernv/idle.c
+++ b/arch/powerpc/platforms/powernv/idle.c
@@ -589,6 +589,7 @@ struct p9_sprs {
u64 spurr;
u64 dscr;
u64 wort;
+ u64 ciabr;
u64 mmcra;
u32 mmcr0;
@@ -668,6 +669,7 @@ static unsigned long power9_idle_stop(unsigned long psscr, bool mmu_on)
sprs.spurr = mfspr(SPRN_SPURR);
sprs.dscr = mfspr(SPRN_DSCR);
sprs.wort = mfspr(SPRN_WORT);
+ sprs.ciabr = mfspr(SPRN_CIABR);
sprs.mmcra = mfspr(SPRN_MMCRA);
sprs.mmcr0 = mfspr(SPRN_MMCR0);
@@ -785,6 +787,7 @@ core_woken:
mtspr(SPRN_SPURR, sprs.spurr);
mtspr(SPRN_DSCR, sprs.dscr);
mtspr(SPRN_WORT, sprs.wort);
+ mtspr(SPRN_CIABR, sprs.ciabr);
mtspr(SPRN_MMCRA, sprs.mmcra);
mtspr(SPRN_MMCR0, sprs.mmcr0);
diff --git a/arch/powerpc/platforms/powernv/memtrace.c b/arch/powerpc/platforms/powernv/memtrace.c
index 6828108486f8..5fc9408bb0b3 100644
--- a/arch/powerpc/platforms/powernv/memtrace.c
+++ b/arch/powerpc/platforms/powernv/memtrace.c
@@ -30,6 +30,7 @@ struct memtrace_entry {
char name[16];
};
+static DEFINE_MUTEX(memtrace_mutex);
static u64 memtrace_size;
static struct memtrace_entry *memtrace_array;
@@ -50,84 +51,52 @@ static const struct file_operations memtrace_fops = {
.open = simple_open,
};
-static int check_memblock_online(struct memory_block *mem, void *arg)
+static void memtrace_clear_range(unsigned long start_pfn,
+ unsigned long nr_pages)
{
- if (mem->state != MEM_ONLINE)
- return -1;
+ unsigned long pfn;
- return 0;
-}
-
-static int change_memblock_state(struct memory_block *mem, void *arg)
-{
- unsigned long state = (unsigned long)arg;
-
- mem->state = state;
-
- return 0;
-}
-
-/* called with device_hotplug_lock held */
-static bool memtrace_offline_pages(u32 nid, u64 start_pfn, u64 nr_pages)
-{
- const unsigned long start = PFN_PHYS(start_pfn);
- const unsigned long size = PFN_PHYS(nr_pages);
-
- if (walk_memory_blocks(start, size, NULL, check_memblock_online))
- return false;
-
- walk_memory_blocks(start, size, (void *)MEM_GOING_OFFLINE,
- change_memblock_state);
-
- if (offline_pages(start_pfn, nr_pages)) {
- walk_memory_blocks(start, size, (void *)MEM_ONLINE,
- change_memblock_state);
- return false;
+ /* As HIGHMEM does not apply, use clear_page() directly. */
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++) {
+ if (IS_ALIGNED(pfn, PAGES_PER_SECTION))
+ cond_resched();
+ clear_page(__va(PFN_PHYS(pfn)));
}
-
- walk_memory_blocks(start, size, (void *)MEM_OFFLINE,
- change_memblock_state);
-
-
- return true;
}
static u64 memtrace_alloc_node(u32 nid, u64 size)
{
- u64 start_pfn, end_pfn, nr_pages, pfn;
- u64 base_pfn;
- u64 bytes = memory_block_size_bytes();
+ const unsigned long nr_pages = PHYS_PFN(size);
+ unsigned long pfn, start_pfn;
+ struct page *page;
- if (!node_spanned_pages(nid))
+ /*
+ * Trace memory needs to be aligned to the size, which is guaranteed
+ * by alloc_contig_pages().
+ */
+ page = alloc_contig_pages(nr_pages, GFP_KERNEL | __GFP_THISNODE |
+ __GFP_NOWARN, nid, NULL);
+ if (!page)
return 0;
+ start_pfn = page_to_pfn(page);
- start_pfn = node_start_pfn(nid);
- end_pfn = node_end_pfn(nid);
- nr_pages = size >> PAGE_SHIFT;
-
- /* Trace memory needs to be aligned to the size */
- end_pfn = round_down(end_pfn - nr_pages, nr_pages);
-
- lock_device_hotplug();
- for (base_pfn = end_pfn; base_pfn > start_pfn; base_pfn -= nr_pages) {
- if (memtrace_offline_pages(nid, base_pfn, nr_pages) == true) {
- /*
- * Remove memory in memory block size chunks so that
- * iomem resources are always split to the same size and
- * we never try to remove memory that spans two iomem
- * resources.
- */
- end_pfn = base_pfn + nr_pages;
- for (pfn = base_pfn; pfn < end_pfn; pfn += bytes>> PAGE_SHIFT) {
- __remove_memory(nid, pfn << PAGE_SHIFT, bytes);
- }
- unlock_device_hotplug();
- return base_pfn << PAGE_SHIFT;
- }
- }
- unlock_device_hotplug();
+ /*
+ * Clear the range while we still have a linear mapping.
+ *
+ * TODO: use __GFP_ZERO with alloc_contig_pages() once supported.
+ */
+ memtrace_clear_range(start_pfn, nr_pages);
- return 0;
+ /*
+ * Set pages PageOffline(), to indicate that nobody (e.g., hibernation,
+ * dumping, ...) should be touching these pages.
+ */
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
+ __SetPageOffline(pfn_to_page(pfn));
+
+ arch_remove_linear_mapping(PFN_PHYS(start_pfn), size);
+
+ return PFN_PHYS(start_pfn);
}
static int memtrace_init_regions_runtime(u64 size)
@@ -197,16 +166,30 @@ static int memtrace_init_debugfs(void)
return ret;
}
-static int online_mem_block(struct memory_block *mem, void *arg)
+static int memtrace_free(int nid, u64 start, u64 size)
{
- return device_online(&mem->dev);
+ struct mhp_params params = { .pgprot = PAGE_KERNEL };
+ const unsigned long nr_pages = PHYS_PFN(size);
+ const unsigned long start_pfn = PHYS_PFN(start);
+ unsigned long pfn;
+ int ret;
+
+ ret = arch_create_linear_mapping(nid, start, size, &params);
+ if (ret)
+ return ret;
+
+ for (pfn = start_pfn; pfn < start_pfn + nr_pages; pfn++)
+ __ClearPageOffline(pfn_to_page(pfn));
+
+ free_contig_range(start_pfn, nr_pages);
+ return 0;
}
/*
- * Iterate through the chunks of memory we have removed from the kernel
- * and attempt to add them back to the kernel.
+ * Iterate through the chunks of memory we allocated and attempt to expose
+ * them back to the kernel.
*/
-static int memtrace_online(void)
+static int memtrace_free_regions(void)
{
int i, ret = 0;
struct memtrace_entry *ent;
@@ -214,7 +197,7 @@ static int memtrace_online(void)
for (i = memtrace_array_nr - 1; i >= 0; i--) {
ent = &memtrace_array[i];
- /* We have onlined this chunk previously */
+ /* We have freed this chunk previously */
if (ent->nid == NUMA_NO_NODE)
continue;
@@ -224,30 +207,25 @@ static int memtrace_online(void)
ent->mem = 0;
}
- if (add_memory(ent->nid, ent->start, ent->size, MHP_NONE)) {
- pr_err("Failed to add trace memory to node %d\n",
+ if (memtrace_free(ent->nid, ent->start, ent->size)) {
+ pr_err("Failed to free trace memory on node %d\n",
ent->nid);
ret += 1;
continue;
}
- lock_device_hotplug();
- walk_memory_blocks(ent->start, ent->size, NULL,
- online_mem_block);
- unlock_device_hotplug();
-
/*
- * Memory was added successfully so clean up references to it
- * so on reentry we can tell that this chunk was added.
+ * Memory was freed successfully so clean up references to it
+ * so on reentry we can tell that this chunk was freed.
*/
debugfs_remove_recursive(ent->dir);
- pr_info("Added trace memory back to node %d\n", ent->nid);
+ pr_info("Freed trace memory back on node %d\n", ent->nid);
ent->size = ent->start = ent->nid = NUMA_NO_NODE;
}
if (ret)
return ret;
- /* If all chunks of memory were added successfully, reset globals */
+ /* If all chunks of memory were freed successfully, reset globals */
kfree(memtrace_array);
memtrace_array = NULL;
memtrace_size = 0;
@@ -257,6 +235,7 @@ static int memtrace_online(void)
static int memtrace_enable_set(void *data, u64 val)
{
+ int rc = -EAGAIN;
u64 bytes;
/*
@@ -269,25 +248,29 @@ static int memtrace_enable_set(void *data, u64 val)
return -EINVAL;
}
- /* Re-add/online previously removed/offlined memory */
- if (memtrace_size) {
- if (memtrace_online())
- return -EAGAIN;
- }
+ mutex_lock(&memtrace_mutex);
- if (!val)
- return 0;
+ /* Free all previously allocated memory. */
+ if (memtrace_size && memtrace_free_regions())
+ goto out_unlock;
+
+ if (!val) {
+ rc = 0;
+ goto out_unlock;
+ }
- /* Offline and remove memory */
+ /* Allocate memory. */
if (memtrace_init_regions_runtime(val))
- return -EINVAL;
+ goto out_unlock;
if (memtrace_init_debugfs())
- return -EINVAL;
+ goto out_unlock;
memtrace_size = val;
-
- return 0;
+ rc = 0;
+out_unlock:
+ mutex_unlock(&memtrace_mutex);
+ return rc;
}
static int memtrace_enable_get(void *data, u64 *val)
diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c
index abeaa533b976..b711dc3262a3 100644
--- a/arch/powerpc/platforms/powernv/npu-dma.c
+++ b/arch/powerpc/platforms/powernv/npu-dma.c
@@ -385,7 +385,8 @@ static void pnv_npu_peers_take_ownership(struct iommu_table_group *table_group)
for (i = 0; i < npucomp->pe_num; ++i) {
struct pnv_ioda_pe *pe = npucomp->pe[i];
- if (!pe->table_group.ops->take_ownership)
+ if (!pe->table_group.ops ||
+ !pe->table_group.ops->take_ownership)
continue;
pe->table_group.ops->take_ownership(&pe->table_group);
}
@@ -401,7 +402,8 @@ static void pnv_npu_peers_release_ownership(
for (i = 0; i < npucomp->pe_num; ++i) {
struct pnv_ioda_pe *pe = npucomp->pe[i];
- if (!pe->table_group.ops->release_ownership)
+ if (!pe->table_group.ops ||
+ !pe->table_group.ops->release_ownership)
continue;
pe->table_group.ops->release_ownership(&pe->table_group);
}
@@ -623,6 +625,11 @@ int pnv_npu2_map_lpar_dev(struct pci_dev *gpdev, unsigned int lparid,
return -ENODEV;
hose = pci_bus_to_host(npdev->bus);
+ if (hose->npu == NULL) {
+ dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
+ return 0;
+ }
+
nphb = hose->private_data;
dev_dbg(&gpdev->dev, "Map LPAR opalid=%llu lparid=%u\n",
@@ -670,6 +677,11 @@ int pnv_npu2_unmap_lpar_dev(struct pci_dev *gpdev)
return -ENODEV;
hose = pci_bus_to_host(npdev->bus);
+ if (hose->npu == NULL) {
+ dev_info_once(&npdev->dev, "Nvlink1 does not support contexts");
+ return 0;
+ }
+
nphb = hose->private_data;
dev_dbg(&gpdev->dev, "destroy context opalid=%llu\n",
diff --git a/arch/powerpc/platforms/powernv/ocxl.c b/arch/powerpc/platforms/powernv/ocxl.c
index ecdad219d704..9105efcf242a 100644
--- a/arch/powerpc/platforms/powernv/ocxl.c
+++ b/arch/powerpc/platforms/powernv/ocxl.c
@@ -483,3 +483,117 @@ int pnv_ocxl_spa_remove_pe_from_cache(void *platform_data, int pe_handle)
return rc;
}
EXPORT_SYMBOL_GPL(pnv_ocxl_spa_remove_pe_from_cache);
+
+int pnv_ocxl_map_lpar(struct pci_dev *dev, uint64_t lparid,
+ uint64_t lpcr, void __iomem **arva)
+{
+ struct pci_controller *hose = pci_bus_to_host(dev->bus);
+ struct pnv_phb *phb = hose->private_data;
+ u64 mmio_atsd;
+ int rc;
+
+ /* ATSD physical address.
+ * ATSD LAUNCH register: write access initiates a shoot down to
+ * initiate the TLB Invalidate command.
+ */
+ rc = of_property_read_u64_index(hose->dn, "ibm,mmio-atsd",
+ 0, &mmio_atsd);
+ if (rc) {
+ dev_info(&dev->dev, "No available ATSD found\n");
+ return rc;
+ }
+
+ /* Assign a register set to a Logical Partition and MMIO ATSD
+ * LPARID register to the required value.
+ */
+ rc = opal_npu_map_lpar(phb->opal_id, pci_dev_id(dev),
+ lparid, lpcr);
+ if (rc) {
+ dev_err(&dev->dev, "Error mapping device to LPAR: %d\n", rc);
+ return rc;
+ }
+
+ *arva = ioremap(mmio_atsd, 24);
+ if (!(*arva)) {
+ dev_warn(&dev->dev, "ioremap failed - mmio_atsd: %#llx\n", mmio_atsd);
+ rc = -ENOMEM;
+ }
+
+ return rc;
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_map_lpar);
+
+void pnv_ocxl_unmap_lpar(void __iomem *arva)
+{
+ iounmap(arva);
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_unmap_lpar);
+
+void pnv_ocxl_tlb_invalidate(void __iomem *arva,
+ unsigned long pid,
+ unsigned long addr,
+ unsigned long page_size)
+{
+ unsigned long timeout = jiffies + (HZ * PNV_OCXL_ATSD_TIMEOUT);
+ u64 val = 0ull;
+ int pend;
+ u8 size;
+
+ if (!(arva))
+ return;
+
+ if (addr) {
+ /* load Abbreviated Virtual Address register with
+ * the necessary value
+ */
+ val |= FIELD_PREP(PNV_OCXL_ATSD_AVA_AVA, addr >> (63-51));
+ out_be64(arva + PNV_OCXL_ATSD_AVA, val);
+ }
+
+ /* Write access initiates a shoot down to initiate the
+ * TLB Invalidate command
+ */
+ val = PNV_OCXL_ATSD_LNCH_R;
+ val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_RIC, 0b10);
+ if (addr)
+ val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b00);
+ else {
+ val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_IS, 0b01);
+ val |= PNV_OCXL_ATSD_LNCH_OCAPI_SINGLETON;
+ }
+ val |= PNV_OCXL_ATSD_LNCH_PRS;
+ /* Actual Page Size to be invalidated
+ * 000 4KB
+ * 101 64KB
+ * 001 2MB
+ * 010 1GB
+ */
+ size = 0b101;
+ if (page_size == 0x1000)
+ size = 0b000;
+ if (page_size == 0x200000)
+ size = 0b001;
+ if (page_size == 0x40000000)
+ size = 0b010;
+ val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_AP, size);
+ val |= FIELD_PREP(PNV_OCXL_ATSD_LNCH_PID, pid);
+ out_be64(arva + PNV_OCXL_ATSD_LNCH, val);
+
+ /* Poll the ATSD status register to determine when the
+ * TLB Invalidate has been completed.
+ */
+ val = in_be64(arva + PNV_OCXL_ATSD_STAT);
+ pend = val >> 63;
+
+ while (pend) {
+ if (time_after_eq(jiffies, timeout)) {
+ pr_err("%s - Timeout while reading XTS MMIO ATSD status register (val=%#llx, pidr=0x%lx)\n",
+ __func__, val, pid);
+ return;
+ }
+ cpu_relax();
+ val = in_be64(arva + PNV_OCXL_ATSD_STAT);
+ pend = val >> 63;
+ }
+}
+EXPORT_SYMBOL_GPL(pnv_ocxl_tlb_invalidate);
diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c
index 37b380eef41a..5821b0fa8614 100644
--- a/arch/powerpc/platforms/powernv/opal-elog.c
+++ b/arch/powerpc/platforms/powernv/opal-elog.c
@@ -171,8 +171,8 @@ static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj,
opal_rc = opal_read_elog(__pa(elog->buffer),
elog->size, elog->id);
if (opal_rc != OPAL_SUCCESS) {
- pr_err("ELOG: log read failed for log-id=%llx\n",
- elog->id);
+ pr_err_ratelimited("ELOG: log read failed for log-id=%llx\n",
+ elog->id);
kfree(elog->buffer);
elog->buffer = NULL;
return -EIO;
diff --git a/arch/powerpc/platforms/powernv/opal-hmi.c b/arch/powerpc/platforms/powernv/opal-hmi.c
index 3e1f064a18db..f0c1830deb51 100644
--- a/arch/powerpc/platforms/powernv/opal-hmi.c
+++ b/arch/powerpc/platforms/powernv/opal-hmi.c
@@ -213,6 +213,8 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
"A hypervisor resource error occurred",
"CAPP recovery process is in progress",
};
+ static DEFINE_RATELIMIT_STATE(rs, DEFAULT_RATELIMIT_INTERVAL,
+ DEFAULT_RATELIMIT_BURST);
/* Print things out */
if (hmi_evt->version < OpalHMIEvt_V1) {
@@ -240,19 +242,22 @@ static void print_hmi_event_info(struct OpalHMIEvent *hmi_evt)
break;
}
- printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
- level, sevstr,
- hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
- "Recovered" : "Not recovered");
- error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
- hmi_error_types[hmi_evt->type]
- : "Unknown";
- printk("%s Error detail: %s\n", level, error_info);
- printk("%s HMER: %016llx\n", level, be64_to_cpu(hmi_evt->hmer));
- if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
- (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
- printk("%s TFMR: %016llx\n", level,
+ if (hmi_evt->severity != OpalHMI_SEV_NO_ERROR || __ratelimit(&rs)) {
+ printk("%s%s Hypervisor Maintenance interrupt [%s]\n",
+ level, sevstr,
+ hmi_evt->disposition == OpalHMI_DISPOSITION_RECOVERED ?
+ "Recovered" : "Not recovered");
+ error_info = hmi_evt->type < ARRAY_SIZE(hmi_error_types) ?
+ hmi_error_types[hmi_evt->type]
+ : "Unknown";
+ printk("%s Error detail: %s\n", level, error_info);
+ printk("%s HMER: %016llx\n", level,
+ be64_to_cpu(hmi_evt->hmer));
+ if ((hmi_evt->type == OpalHMI_ERROR_TFAC) ||
+ (hmi_evt->type == OpalHMI_ERROR_TFMR_PARITY))
+ printk("%s TFMR: %016llx\n", level,
be64_to_cpu(hmi_evt->tfmr));
+ }
if (hmi_evt->version < OpalHMIEvt_V2)
return;
diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c
index d95954ad4c0a..c61c3b62c8c6 100644
--- a/arch/powerpc/platforms/powernv/opal.c
+++ b/arch/powerpc/platforms/powernv/opal.c
@@ -731,7 +731,7 @@ int opal_hmi_exception_early2(struct pt_regs *regs)
return 1;
}
-/* HMI exception handler called in virtual mode during check_irq_replay. */
+/* HMI exception handler called in virtual mode when irqs are next enabled. */
int opal_handle_hmi_exception(struct pt_regs *regs)
{
/*
diff --git a/arch/powerpc/platforms/powernv/pci-ioda.c b/arch/powerpc/platforms/powernv/pci-ioda.c
index 2b4ceb5e6ce4..c4f72cdc9b51 100644
--- a/arch/powerpc/platforms/powernv/pci-ioda.c
+++ b/arch/powerpc/platforms/powernv/pci-ioda.c
@@ -2613,8 +2613,10 @@ static bool pnv_pci_enable_device_hook(struct pci_dev *dev)
return true;
pdn = pci_get_pdn(dev);
- if (!pdn || pdn->pe_number == IODA_INVALID_PE)
+ if (!pdn || pdn->pe_number == IODA_INVALID_PE) {
+ pci_err(dev, "pci_enable_device() blocked, no PE assigned.\n");
return false;
+ }
return true;
}
diff --git a/arch/powerpc/platforms/powernv/pci-sriov.c b/arch/powerpc/platforms/powernv/pci-sriov.c
index c4434f20f42f..28aac933a439 100644
--- a/arch/powerpc/platforms/powernv/pci-sriov.c
+++ b/arch/powerpc/platforms/powernv/pci-sriov.c
@@ -422,7 +422,7 @@ static int pnv_pci_vf_assign_m64(struct pci_dev *pdev, u16 num_vfs)
{
struct pnv_iov_data *iov;
struct pnv_phb *phb;
- unsigned int win;
+ int win;
struct resource *res;
int i, j;
int64_t rc;
diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c
index c62aaa29a9d5..b431f41c6cb5 100644
--- a/arch/powerpc/platforms/ps3/system-bus.c
+++ b/arch/powerpc/platforms/ps3/system-bus.c
@@ -382,7 +382,6 @@ static int ps3_system_bus_probe(struct device *_dev)
static int ps3_system_bus_remove(struct device *_dev)
{
- int result = 0;
struct ps3_system_bus_device *dev = ps3_dev_to_system_bus_dev(_dev);
struct ps3_system_bus_driver *drv;
@@ -393,13 +392,13 @@ static int ps3_system_bus_remove(struct device *_dev)
BUG_ON(!drv);
if (drv->remove)
- result = drv->remove(dev);
+ drv->remove(dev);
else
dev_dbg(&dev->core, "%s:%d %s: no remove method\n",
__func__, __LINE__, drv->core.name);
pr_debug(" <- %s:%d: %s\n", __func__, __LINE__, dev_name(&dev->core));
- return result;
+ return 0;
}
static void ps3_system_bus_shutdown(struct device *_dev)
diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c
index a02012f1b04a..12cbffd3c2e3 100644
--- a/arch/powerpc/platforms/pseries/hotplug-cpu.c
+++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c
@@ -746,6 +746,7 @@ static int dlpar_cpu_add_by_count(u32 cpus_to_add)
parent = of_find_node_by_path("/cpus");
if (!parent) {
pr_warn("Could not find CPU root node in device tree\n");
+ kfree(cpu_drcs);
return -1;
}
diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c
index 7efe6ec5d14a..8377f1f7c78e 100644
--- a/arch/powerpc/platforms/pseries/hotplug-memory.c
+++ b/arch/powerpc/platforms/pseries/hotplug-memory.c
@@ -479,7 +479,7 @@ static int dlpar_memory_remove_by_index(u32 drc_index)
int lmb_found;
int rc;
- pr_info("Attempting to hot-remove LMB, drc index %x\n", drc_index);
+ pr_debug("Attempting to hot-remove LMB, drc index %x\n", drc_index);
lmb_found = 0;
for_each_drmem_lmb(lmb) {
@@ -497,10 +497,10 @@ static int dlpar_memory_remove_by_index(u32 drc_index)
rc = -EINVAL;
if (rc)
- pr_info("Failed to hot-remove memory at %llx\n",
- lmb->base_addr);
+ pr_debug("Failed to hot-remove memory at %llx\n",
+ lmb->base_addr);
else
- pr_info("Memory at %llx was hot-removed\n", lmb->base_addr);
+ pr_debug("Memory at %llx was hot-removed\n", lmb->base_addr);
return rc;
}
@@ -717,8 +717,8 @@ static int dlpar_memory_add_by_count(u32 lmbs_to_add)
if (!drmem_lmb_reserved(lmb))
continue;
- pr_info("Memory at %llx (drc index %x) was hot-added\n",
- lmb->base_addr, lmb->drc_index);
+ pr_debug("Memory at %llx (drc index %x) was hot-added\n",
+ lmb->base_addr, lmb->drc_index);
drmem_remove_lmb_reservation(lmb);
}
rc = 0;
diff --git a/arch/powerpc/platforms/pseries/mobility.c b/arch/powerpc/platforms/pseries/mobility.c
index 2f73cb5bf12d..ea4d6a660e0d 100644
--- a/arch/powerpc/platforms/pseries/mobility.c
+++ b/arch/powerpc/platforms/pseries/mobility.c
@@ -12,9 +12,11 @@
#include <linux/cpu.h>
#include <linux/kernel.h>
#include <linux/kobject.h>
+#include <linux/nmi.h>
#include <linux/sched.h>
#include <linux/smp.h>
#include <linux/stat.h>
+#include <linux/stop_machine.h>
#include <linux/completion.h>
#include <linux/device.h>
#include <linux/delay.h>
@@ -59,18 +61,10 @@ static int mobility_rtas_call(int token, char *buf, s32 scope)
return rc;
}
-static int delete_dt_node(__be32 phandle)
+static int delete_dt_node(struct device_node *dn)
{
- struct device_node *dn;
-
- dn = of_find_node_by_phandle(be32_to_cpu(phandle));
- if (!dn)
- return -ENOENT;
-
pr_debug("removing node %pOFfp\n", dn);
-
dlpar_detach_node(dn);
- of_node_put(dn);
return 0;
}
@@ -135,10 +129,9 @@ static int update_dt_property(struct device_node *dn, struct property **prop,
return 0;
}
-static int update_dt_node(__be32 phandle, s32 scope)
+static int update_dt_node(struct device_node *dn, s32 scope)
{
struct update_props_workarea *upwa;
- struct device_node *dn;
struct property *prop = NULL;
int i, rc, rtas_rc;
char *prop_data;
@@ -155,14 +148,8 @@ static int update_dt_node(__be32 phandle, s32 scope)
if (!rtas_buf)
return -ENOMEM;
- dn = of_find_node_by_phandle(be32_to_cpu(phandle));
- if (!dn) {
- kfree(rtas_buf);
- return -ENOENT;
- }
-
upwa = (struct update_props_workarea *)&rtas_buf[0];
- upwa->phandle = phandle;
+ upwa->phandle = cpu_to_be32(dn->phandle);
do {
rtas_rc = mobility_rtas_call(update_properties_token, rtas_buf,
@@ -208,11 +195,12 @@ static int update_dt_node(__be32 phandle, s32 scope)
rc = update_dt_property(dn, &prop, prop_name,
vd, prop_data);
if (rc) {
- printk(KERN_ERR "Could not update %s"
- " property\n", prop_name);
+ pr_err("updating %s property failed: %d\n",
+ prop_name, rc);
}
prop_data += vd;
+ break;
}
cond_resched();
@@ -221,26 +209,18 @@ static int update_dt_node(__be32 phandle, s32 scope)
cond_resched();
} while (rtas_rc == 1);
- of_node_put(dn);
kfree(rtas_buf);
return 0;
}
-static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
+static int add_dt_node(struct device_node *parent_dn, __be32 drc_index)
{
struct device_node *dn;
- struct device_node *parent_dn;
int rc;
- parent_dn = of_find_node_by_phandle(be32_to_cpu(parent_phandle));
- if (!parent_dn)
- return -ENOENT;
-
dn = dlpar_configure_connector(drc_index, parent_dn);
- if (!dn) {
- of_node_put(parent_dn);
+ if (!dn)
return -ENOENT;
- }
rc = dlpar_attach_node(dn, parent_dn);
if (rc)
@@ -248,7 +228,6 @@ static int add_dt_node(__be32 parent_phandle, __be32 drc_index)
pr_debug("added node %pOFfp\n", dn);
- of_node_put(parent_dn);
return rc;
}
@@ -261,7 +240,7 @@ int pseries_devicetree_update(s32 scope)
update_nodes_token = rtas_token("ibm,update-nodes");
if (update_nodes_token == RTAS_UNKNOWN_SERVICE)
- return -EINVAL;
+ return 0;
rtas_buf = kzalloc(RTAS_DATA_BUF_SIZE, GFP_KERNEL);
if (!rtas_buf)
@@ -281,22 +260,31 @@ int pseries_devicetree_update(s32 scope)
data++;
for (i = 0; i < node_count; i++) {
+ struct device_node *np;
__be32 phandle = *data++;
__be32 drc_index;
+ np = of_find_node_by_phandle(be32_to_cpu(phandle));
+ if (!np) {
+ pr_warn("Failed lookup: phandle 0x%x for action 0x%x\n",
+ be32_to_cpu(phandle), action);
+ continue;
+ }
+
switch (action) {
case DELETE_DT_NODE:
- delete_dt_node(phandle);
+ delete_dt_node(np);
break;
case UPDATE_DT_NODE:
- update_dt_node(phandle, scope);
+ update_dt_node(np, scope);
break;
case ADD_DT_NODE:
drc_index = *data++;
- add_dt_node(phandle, drc_index);
+ add_dt_node(np, drc_index);
break;
}
+ of_node_put(np);
cond_resched();
}
}
@@ -311,21 +299,8 @@ int pseries_devicetree_update(s32 scope)
void post_mobility_fixup(void)
{
int rc;
- int activate_fw_token;
-
- activate_fw_token = rtas_token("ibm,activate-firmware");
- if (activate_fw_token == RTAS_UNKNOWN_SERVICE) {
- printk(KERN_ERR "Could not make post-mobility "
- "activate-fw call.\n");
- return;
- }
-
- do {
- rc = rtas_call(activate_fw_token, 0, 1, NULL);
- } while (rtas_busy_delay(rc));
- if (rc)
- printk(KERN_ERR "Post-mobility activate-fw failed: %d\n", rc);
+ rtas_activate_firmware();
/*
* We don't want CPUs to go online/offline while the device
@@ -342,8 +317,7 @@ void post_mobility_fixup(void)
rc = pseries_devicetree_update(MIGRATION_SCOPE);
if (rc)
- printk(KERN_ERR "Post-mobility device tree update "
- "failed: %d\n", rc);
+ pr_err("device tree update failed: %d\n", rc);
cacheinfo_rebuild();
@@ -358,6 +332,279 @@ void post_mobility_fixup(void)
return;
}
+static int poll_vasi_state(u64 handle, unsigned long *res)
+{
+ unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
+ long hvrc;
+ int ret;
+
+ hvrc = plpar_hcall(H_VASI_STATE, retbuf, handle);
+ switch (hvrc) {
+ case H_SUCCESS:
+ ret = 0;
+ *res = retbuf[0];
+ break;
+ case H_PARAMETER:
+ ret = -EINVAL;
+ break;
+ case H_FUNCTION:
+ ret = -EOPNOTSUPP;
+ break;
+ case H_HARDWARE:
+ default:
+ pr_err("unexpected H_VASI_STATE result %ld\n", hvrc);
+ ret = -EIO;
+ break;
+ }
+ return ret;
+}
+
+static int wait_for_vasi_session_suspending(u64 handle)
+{
+ unsigned long state;
+ int ret;
+
+ /*
+ * Wait for transition from H_VASI_ENABLED to
+ * H_VASI_SUSPENDING. Treat anything else as an error.
+ */
+ while (true) {
+ ret = poll_vasi_state(handle, &state);
+
+ if (ret != 0 || state == H_VASI_SUSPENDING) {
+ break;
+ } else if (state == H_VASI_ENABLED) {
+ ssleep(1);
+ } else {
+ pr_err("unexpected H_VASI_STATE result %lu\n", state);
+ ret = -EIO;
+ break;
+ }
+ }
+
+ /*
+ * Proceed even if H_VASI_STATE is unavailable. If H_JOIN or
+ * ibm,suspend-me are also unimplemented, we'll recover then.
+ */
+ if (ret == -EOPNOTSUPP)
+ ret = 0;
+
+ return ret;
+}
+
+static void prod_single(unsigned int target_cpu)
+{
+ long hvrc;
+ int hwid;
+
+ hwid = get_hard_smp_processor_id(target_cpu);
+ hvrc = plpar_hcall_norets(H_PROD, hwid);
+ if (hvrc == H_SUCCESS)
+ return;
+ pr_err_ratelimited("H_PROD of CPU %u (hwid %d) error: %ld\n",
+ target_cpu, hwid, hvrc);
+}
+
+static void prod_others(void)
+{
+ unsigned int cpu;
+
+ for_each_online_cpu(cpu) {
+ if (cpu != smp_processor_id())
+ prod_single(cpu);
+ }
+}
+
+static u16 clamp_slb_size(void)
+{
+ u16 prev = mmu_slb_size;
+
+ slb_set_size(SLB_MIN_SIZE);
+
+ return prev;
+}
+
+static int do_suspend(void)
+{
+ u16 saved_slb_size;
+ int status;
+ int ret;
+
+ pr_info("calling ibm,suspend-me on CPU %i\n", smp_processor_id());
+
+ /*
+ * The destination processor model may have fewer SLB entries
+ * than the source. We reduce mmu_slb_size to a safe minimum
+ * before suspending in order to minimize the possibility of
+ * programming non-existent entries on the destination. If
+ * suspend fails, we restore it before returning. On success
+ * the OF reconfig path will update it from the new device
+ * tree after resuming on the destination.
+ */
+ saved_slb_size = clamp_slb_size();
+
+ ret = rtas_ibm_suspend_me(&status);
+ if (ret != 0) {
+ pr_err("ibm,suspend-me error: %d\n", status);
+ slb_set_size(saved_slb_size);
+ }
+
+ return ret;
+}
+
+static int do_join(void *arg)
+{
+ atomic_t *counter = arg;
+ long hvrc;
+ int ret;
+
+ /* Must ensure MSR.EE off for H_JOIN. */
+ hard_irq_disable();
+ hvrc = plpar_hcall_norets(H_JOIN);
+
+ switch (hvrc) {
+ case H_CONTINUE:
+ /*
+ * All other CPUs are offline or in H_JOIN. This CPU
+ * attempts the suspend.
+ */
+ ret = do_suspend();
+ break;
+ case H_SUCCESS:
+ /*
+ * The suspend is complete and this cpu has received a
+ * prod.
+ */
+ ret = 0;
+ break;
+ case H_BAD_MODE:
+ case H_HARDWARE:
+ default:
+ ret = -EIO;
+ pr_err_ratelimited("H_JOIN error %ld on CPU %i\n",
+ hvrc, smp_processor_id());
+ break;
+ }
+
+ if (atomic_inc_return(counter) == 1) {
+ pr_info("CPU %u waking all threads\n", smp_processor_id());
+ prod_others();
+ }
+ /*
+ * Execution may have been suspended for several seconds, so
+ * reset the watchdog.
+ */
+ touch_nmi_watchdog();
+ return ret;
+}
+
+/*
+ * Abort reason code byte 0. We use only the 'Migrating partition' value.
+ */
+enum vasi_aborting_entity {
+ ORCHESTRATOR = 1,
+ VSP_SOURCE = 2,
+ PARTITION_FIRMWARE = 3,
+ PLATFORM_FIRMWARE = 4,
+ VSP_TARGET = 5,
+ MIGRATING_PARTITION = 6,
+};
+
+static void pseries_cancel_migration(u64 handle, int err)
+{
+ u32 reason_code;
+ u32 detail;
+ u8 entity;
+ long hvrc;
+
+ entity = MIGRATING_PARTITION;
+ detail = abs(err) & 0xffffff;
+ reason_code = (entity << 24) | detail;
+
+ hvrc = plpar_hcall_norets(H_VASI_SIGNAL, handle,
+ H_VASI_SIGNAL_CANCEL, reason_code);
+ if (hvrc)
+ pr_err("H_VASI_SIGNAL error: %ld\n", hvrc);
+}
+
+static int pseries_suspend(u64 handle)
+{
+ const unsigned int max_attempts = 5;
+ unsigned int retry_interval_ms = 1;
+ unsigned int attempt = 1;
+ int ret;
+
+ while (true) {
+ atomic_t counter = ATOMIC_INIT(0);
+ unsigned long vasi_state;
+ int vasi_err;
+
+ ret = stop_machine(do_join, &counter, cpu_online_mask);
+ if (ret == 0)
+ break;
+ /*
+ * Encountered an error. If the VASI stream is still
+ * in Suspending state, it's likely a transient
+ * condition related to some device in the partition
+ * and we can retry in the hope that the cause has
+ * cleared after some delay.
+ *
+ * A better design would allow drivers etc to prepare
+ * for the suspend and avoid conditions which prevent
+ * the suspend from succeeding. For now, we have this
+ * mitigation.
+ */
+ pr_notice("Partition suspend attempt %u of %u error: %d\n",
+ attempt, max_attempts, ret);
+
+ if (attempt == max_attempts)
+ break;
+
+ vasi_err = poll_vasi_state(handle, &vasi_state);
+ if (vasi_err == 0) {
+ if (vasi_state != H_VASI_SUSPENDING) {
+ pr_notice("VASI state %lu after failed suspend\n",
+ vasi_state);
+ break;
+ }
+ } else if (vasi_err != -EOPNOTSUPP) {
+ pr_err("VASI state poll error: %d", vasi_err);
+ break;
+ }
+
+ pr_notice("Will retry partition suspend after %u ms\n",
+ retry_interval_ms);
+
+ msleep(retry_interval_ms);
+ retry_interval_ms *= 10;
+ attempt++;
+ }
+
+ return ret;
+}
+
+static int pseries_migrate_partition(u64 handle)
+{
+ int ret;
+
+ ret = wait_for_vasi_session_suspending(handle);
+ if (ret)
+ return ret;
+
+ ret = pseries_suspend(handle);
+ if (ret == 0)
+ post_mobility_fixup();
+ else
+ pseries_cancel_migration(handle, ret);
+
+ return ret;
+}
+
+int rtas_syscall_dispatch_ibm_suspend_me(u64 handle)
+{
+ return pseries_migrate_partition(handle);
+}
+
static ssize_t migration_store(struct class *class,
struct class_attribute *attr, const char *buf,
size_t count)
@@ -369,17 +616,10 @@ static ssize_t migration_store(struct class *class,
if (rc)
return rc;
- do {
- rc = rtas_ibm_suspend_me(streamid);
- if (rc == -EAGAIN)
- ssleep(1);
- } while (rc == -EAGAIN);
-
+ rc = pseries_migrate_partition(streamid);
if (rc)
return rc;
- post_mobility_fixup();
-
return count;
}
diff --git a/arch/powerpc/platforms/pseries/pci.c b/arch/powerpc/platforms/pseries/pci.c
index 911534b89c85..72a4d4167849 100644
--- a/arch/powerpc/platforms/pseries/pci.c
+++ b/arch/powerpc/platforms/pseries/pci.c
@@ -290,6 +290,25 @@ static void fixup_winbond_82c105(struct pci_dev* dev)
DECLARE_PCI_FIXUP_HEADER(PCI_VENDOR_ID_WINBOND, PCI_DEVICE_ID_WINBOND_82C105,
fixup_winbond_82c105);
+static enum pci_bus_speed prop_to_pci_speed(u32 prop)
+{
+ switch (prop) {
+ case 0x01:
+ return PCIE_SPEED_2_5GT;
+ case 0x02:
+ return PCIE_SPEED_5_0GT;
+ case 0x04:
+ return PCIE_SPEED_8_0GT;
+ case 0x08:
+ return PCIE_SPEED_16_0GT;
+ case 0x10:
+ return PCIE_SPEED_32_0GT;
+ default:
+ pr_debug("Unexpected PCI link speed property value\n");
+ return PCI_SPEED_UNKNOWN;
+ }
+}
+
int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
{
struct device_node *dn, *pdn;
@@ -322,35 +341,7 @@ int pseries_root_bridge_prepare(struct pci_host_bridge *bridge)
return 0;
}
- switch (pcie_link_speed_stats[0]) {
- case 0x01:
- bus->max_bus_speed = PCIE_SPEED_2_5GT;
- break;
- case 0x02:
- bus->max_bus_speed = PCIE_SPEED_5_0GT;
- break;
- case 0x04:
- bus->max_bus_speed = PCIE_SPEED_8_0GT;
- break;
- default:
- bus->max_bus_speed = PCI_SPEED_UNKNOWN;
- break;
- }
-
- switch (pcie_link_speed_stats[1]) {
- case 0x01:
- bus->cur_bus_speed = PCIE_SPEED_2_5GT;
- break;
- case 0x02:
- bus->cur_bus_speed = PCIE_SPEED_5_0GT;
- break;
- case 0x04:
- bus->cur_bus_speed = PCIE_SPEED_8_0GT;
- break;
- default:
- bus->cur_bus_speed = PCI_SPEED_UNKNOWN;
- break;
- }
-
+ bus->max_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[0]);
+ bus->cur_bus_speed = prop_to_pci_speed(pcie_link_speed_stats[1]);
return 0;
}
diff --git a/arch/powerpc/platforms/pseries/ras.c b/arch/powerpc/platforms/pseries/ras.c
index b2b245b25edb..149cec2212e6 100644
--- a/arch/powerpc/platforms/pseries/ras.c
+++ b/arch/powerpc/platforms/pseries/ras.c
@@ -526,8 +526,11 @@ static int mce_handle_err_realmode(int disposition, u8 error_type)
#ifdef CONFIG_PPC_BOOK3S_64
if (disposition == RTAS_DISP_NOT_RECOVERED) {
switch (error_type) {
- case MC_ERROR_TYPE_SLB:
case MC_ERROR_TYPE_ERAT:
+ flush_erat();
+ disposition = RTAS_DISP_FULLY_RECOVERED;
+ break;
+ case MC_ERROR_TYPE_SLB:
/*
* Store the old slb content in paca before flushing.
* Print this when we go to virtual mode.
diff --git a/arch/powerpc/platforms/pseries/smp.c b/arch/powerpc/platforms/pseries/smp.c
index 92922491a81c..c70b4be9f0a5 100644
--- a/arch/powerpc/platforms/pseries/smp.c
+++ b/arch/powerpc/platforms/pseries/smp.c
@@ -42,6 +42,7 @@
#include <asm/plpar_wrappers.h>
#include <asm/code-patching.h>
#include <asm/svm.h>
+#include <asm/kvm_guest.h>
#include "pseries.h"
@@ -210,7 +211,7 @@ static __init void pSeries_smp_probe(void)
if (!cpu_has_feature(CPU_FTR_SMT))
return;
- if (is_kvm_guest()) {
+ if (check_kvm_guest()) {
/*
* KVM emulates doorbells by disabling FSCR[MSGP] so msgsndp
* faults to the hypervisor which then reads the instruction
diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c
index 81e0ac58d620..1b902cbf85c5 100644
--- a/arch/powerpc/platforms/pseries/suspend.c
+++ b/arch/powerpc/platforms/pseries/suspend.c
@@ -13,13 +13,8 @@
#include <asm/mmu.h>
#include <asm/rtas.h>
#include <asm/topology.h>
-#include "../../kernel/cacheinfo.h"
-static u64 stream_id;
static struct device suspend_dev;
-static DECLARE_COMPLETION(suspend_work);
-static struct rtas_suspend_me_data suspend_data;
-static atomic_t suspending;
/**
* pseries_suspend_begin - First phase of hibernation
@@ -29,7 +24,7 @@ static atomic_t suspending;
* Return value:
* 0 on success / other on failure
**/
-static int pseries_suspend_begin(suspend_state_t state)
+static int pseries_suspend_begin(u64 stream_id)
{
long vasi_state, rc;
unsigned long retbuf[PLPAR_HCALL_BUFSIZE];
@@ -49,41 +44,10 @@ static int pseries_suspend_begin(suspend_state_t state)
vasi_state);
return -EIO;
}
-
- return 0;
-}
-
-/**
- * pseries_suspend_cpu - Suspend a single CPU
- *
- * Makes the H_JOIN call to suspend the CPU
- *
- **/
-static int pseries_suspend_cpu(void)
-{
- if (atomic_read(&suspending))
- return rtas_suspend_cpu(&suspend_data);
return 0;
}
/**
- * pseries_suspend_enable_irqs
- *
- * Post suspend configuration updates
- *
- **/
-static void pseries_suspend_enable_irqs(void)
-{
- /*
- * Update configuration which can be modified based on device tree
- * changes during resume.
- */
- cacheinfo_cpu_offline(smp_processor_id());
- post_mobility_fixup();
- cacheinfo_cpu_online(smp_processor_id());
-}
-
-/**
* pseries_suspend_enter - Final phase of hibernation
*
* Return value:
@@ -91,28 +55,7 @@ static void pseries_suspend_enable_irqs(void)
**/
static int pseries_suspend_enter(suspend_state_t state)
{
- int rc = rtas_suspend_last_cpu(&suspend_data);
-
- atomic_set(&suspending, 0);
- atomic_set(&suspend_data.done, 1);
- return rc;
-}
-
-/**
- * pseries_prepare_late - Prepare to suspend all other CPUs
- *
- * Return value:
- * 0 on success / other on failure
- **/
-static int pseries_prepare_late(void)
-{
- atomic_set(&suspending, 1);
- atomic_set(&suspend_data.working, 0);
- atomic_set(&suspend_data.done, 0);
- atomic_set(&suspend_data.error, 0);
- suspend_data.complete = &suspend_work;
- reinit_completion(&suspend_work);
- return 0;
+ return rtas_ibm_suspend_me(NULL);
}
/**
@@ -132,6 +75,7 @@ static ssize_t store_hibernate(struct device *dev,
struct device_attribute *attr,
const char *buf, size_t count)
{
+ u64 stream_id;
int rc;
if (!capable(CAP_SYS_ADMIN))
@@ -140,7 +84,7 @@ static ssize_t store_hibernate(struct device *dev,
stream_id = simple_strtoul(buf, NULL, 16);
do {
- rc = pseries_suspend_begin(PM_SUSPEND_MEM);
+ rc = pseries_suspend_begin(stream_id);
if (rc == -EAGAIN)
ssleep(1);
} while (rc == -EAGAIN);
@@ -148,10 +92,11 @@ static ssize_t store_hibernate(struct device *dev,
if (!rc)
rc = pm_suspend(PM_SUSPEND_MEM);
- stream_id = 0;
-
- if (!rc)
+ if (!rc) {
rc = count;
+ post_mobility_fixup();
+ }
+
return rc;
}
@@ -187,8 +132,6 @@ static struct bus_type suspend_subsys = {
static const struct platform_suspend_ops pseries_suspend_ops = {
.valid = suspend_valid_only_mem,
- .begin = pseries_suspend_begin,
- .prepare_late = pseries_prepare_late,
.enter = pseries_suspend_enter,
};
@@ -231,15 +174,9 @@ static int __init pseries_suspend_init(void)
if (!firmware_has_feature(FW_FEATURE_LPAR))
return 0;
- suspend_data.token = rtas_token("ibm,suspend-me");
- if (suspend_data.token == RTAS_UNKNOWN_SERVICE)
- return 0;
-
if ((rc = pseries_suspend_sysfs_register(&suspend_dev)))
return rc;
- ppc_md.suspend_disable_cpu = pseries_suspend_cpu;
- ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs;
suspend_set_ops(&pseries_suspend_ops);
return 0;
}