summaryrefslogtreecommitdiffstats
path: root/arch/powerpc/kernel
diff options
context:
space:
mode:
Diffstat (limited to 'arch/powerpc/kernel')
-rw-r--r--arch/powerpc/kernel/Makefile11
-rw-r--r--arch/powerpc/kernel/asm-offsets.c3
-rw-r--r--arch/powerpc/kernel/cpu_setup_power.S2
-rw-r--r--arch/powerpc/kernel/eeh.c84
-rw-r--r--arch/powerpc/kernel/eeh_driver.c12
-rw-r--r--arch/powerpc/kernel/eeh_event.c4
-rw-r--r--arch/powerpc/kernel/eeh_pe.c4
-rw-r--r--arch/powerpc/kernel/entry_32.S6
-rw-r--r--arch/powerpc/kernel/exceptions-64e.S6
-rw-r--r--arch/powerpc/kernel/exceptions-64s.S2
-rw-r--r--arch/powerpc/kernel/ftrace.c12
-rw-r--r--arch/powerpc/kernel/head_64.S19
-rw-r--r--arch/powerpc/kernel/head_8xx.S119
-rw-r--r--arch/powerpc/kernel/ibmebus.c759
-rw-r--r--arch/powerpc/kernel/kexec_elf_64.c663
-rw-r--r--arch/powerpc/kernel/kprobes.c31
-rw-r--r--arch/powerpc/kernel/machine_kexec_64.c2
-rw-r--r--arch/powerpc/kernel/machine_kexec_file_64.c338
-rw-r--r--arch/powerpc/kernel/mce.c3
-rw-r--r--arch/powerpc/kernel/misc_32.S2
-rw-r--r--arch/powerpc/kernel/misc_64.S6
-rw-r--r--arch/powerpc/kernel/module_64.c5
-rw-r--r--arch/powerpc/kernel/of_platform.c7
-rw-r--r--arch/powerpc/kernel/process.c24
-rw-r--r--arch/powerpc/kernel/prom.c23
-rw-r--r--arch/powerpc/kernel/prom_init.c295
-rw-r--r--arch/powerpc/kernel/setup-common.c2
-rw-r--r--arch/powerpc/kernel/setup_64.c4
-rw-r--r--arch/powerpc/kernel/smp.c6
-rw-r--r--arch/powerpc/kernel/traps.c48
-rw-r--r--arch/powerpc/kernel/vio.c1702
31 files changed, 1462 insertions, 2742 deletions
diff --git a/arch/powerpc/kernel/Makefile b/arch/powerpc/kernel/Makefile
index 1925341dbb9c..a3a6047fd395 100644
--- a/arch/powerpc/kernel/Makefile
+++ b/arch/powerpc/kernel/Makefile
@@ -19,6 +19,10 @@ CFLAGS_init.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_btext.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
CFLAGS_prom.o += $(DISABLE_LATENT_ENTROPY_PLUGIN)
+# -fstack-protector triggers protection checks in this code,
+# but it is being used too early to link to meaningful stack_chk logic.
+CFLAGS_prom_init.o += $(call cc-option, -fno-stack-protector)
+
ifdef CONFIG_FUNCTION_TRACER
# Do not trace early boot code
CFLAGS_REMOVE_cputable.o = -mno-sched-epilog $(CC_FLAGS_FTRACE)
@@ -58,8 +62,6 @@ obj-$(CONFIG_PPC_RTAS) += rtas.o rtas-rtc.o $(rtaspci-y-y)
obj-$(CONFIG_PPC_RTAS_DAEMON) += rtasd.o
obj-$(CONFIG_RTAS_FLASH) += rtas_flash.o
obj-$(CONFIG_RTAS_PROC) += rtas-proc.o
-obj-$(CONFIG_IBMVIO) += vio.o
-obj-$(CONFIG_IBMEBUS) += ibmebus.o
obj-$(CONFIG_EEH) += eeh.o eeh_pe.o eeh_dev.o eeh_cache.o \
eeh_driver.o eeh_event.o eeh_sysfs.o
obj-$(CONFIG_GENERIC_TBSYNC) += smp-tbsync.o
@@ -107,8 +109,9 @@ pci64-$(CONFIG_PPC64) += pci_dn.o pci-hotplug.o isa-bridge.o
obj-$(CONFIG_PCI) += pci_$(BITS).o $(pci64-y) \
pci-common.o pci_of_scan.o
obj-$(CONFIG_PCI_MSI) += msi.o
-obj-$(CONFIG_KEXEC) += machine_kexec.o crash.o \
+obj-$(CONFIG_KEXEC_CORE) += machine_kexec.o crash.o \
machine_kexec_$(BITS).o
+obj-$(CONFIG_KEXEC_FILE) += machine_kexec_file_$(BITS).o kexec_elf_$(BITS).o
obj-$(CONFIG_AUDIT) += audit.o
obj64-$(CONFIG_AUDIT) += compat_audit.o
@@ -128,7 +131,7 @@ obj64-$(CONFIG_PPC_TRANSACTIONAL_MEM) += tm.o
obj-$(CONFIG_PPC64) += $(obj64-y)
obj-$(CONFIG_PPC32) += $(obj32-y)
-ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC),)
+ifneq ($(CONFIG_XMON)$(CONFIG_KEXEC_CORE),)
obj-y += ppc_save_regs.o
endif
diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c
index 195a9fc8f81c..0601e6a7297c 100644
--- a/arch/powerpc/kernel/asm-offsets.c
+++ b/arch/powerpc/kernel/asm-offsets.c
@@ -91,6 +91,9 @@ int main(void)
DEFINE(TI_livepatch_sp, offsetof(struct thread_info, livepatch_sp));
#endif
+#ifdef CONFIG_CC_STACKPROTECTOR
+ DEFINE(TSK_STACK_CANARY, offsetof(struct task_struct, stack_canary));
+#endif
DEFINE(KSP, offsetof(struct thread_struct, ksp));
DEFINE(PT_REGS, offsetof(struct thread_struct, regs));
#ifdef CONFIG_BOOKE
diff --git a/arch/powerpc/kernel/cpu_setup_power.S b/arch/powerpc/kernel/cpu_setup_power.S
index f3e1f5d29dce..917188615bf5 100644
--- a/arch/powerpc/kernel/cpu_setup_power.S
+++ b/arch/powerpc/kernel/cpu_setup_power.S
@@ -96,6 +96,7 @@ _GLOBAL(__setup_cpu_power9)
mtlr r11
beqlr
li r0,0
+ mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
@@ -116,6 +117,7 @@ _GLOBAL(__restore_cpu_power9)
mtlr r11
beqlr
li r0,0
+ mtspr SPRN_PSSCR,r0
mtspr SPRN_LPID,r0
mfspr r3,SPRN_LPCR
LOAD_REG_IMMEDIATE(r4, LPCR_PECEDH | LPCR_PECE_HVEE | LPCR_HVICE)
diff --git a/arch/powerpc/kernel/eeh.c b/arch/powerpc/kernel/eeh.c
index f25731627d7f..8180bfd7ab93 100644
--- a/arch/powerpc/kernel/eeh.c
+++ b/arch/powerpc/kernel/eeh.c
@@ -372,7 +372,7 @@ static int eeh_phb_check_failure(struct eeh_pe *pe)
/* Find the PHB PE */
phb_pe = eeh_phb_pe_get(pe->phb);
if (!phb_pe) {
- pr_warn("%s Can't find PE for PHB#%d\n",
+ pr_warn("%s Can't find PE for PHB#%x\n",
__func__, pe->phb->global_number);
return -EEXIST;
}
@@ -664,7 +664,7 @@ int eeh_pci_enable(struct eeh_pe *pe, int function)
rc = eeh_ops->set_option(pe, function);
if (rc)
pr_warn("%s: Unexpected state change %d on "
- "PHB#%d-PE#%x, err=%d\n",
+ "PHB#%x-PE#%x, err=%d\n",
__func__, function, pe->phb->global_number,
pe->addr, rc);
@@ -808,76 +808,67 @@ static void *eeh_set_dev_freset(void *data, void *flag)
}
/**
- * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second
+ * eeh_pe_reset_full - Complete a full reset process on the indicated PE
* @pe: EEH PE
*
- * Assert the PCI #RST line for 1/4 second.
+ * This function executes a full reset procedure on a PE, including setting
+ * the appropriate flags, performing a fundamental or hot reset, and then
+ * deactivating the reset status. It is designed to be used within the EEH
+ * subsystem, as opposed to eeh_pe_reset which is exported to drivers and
+ * only performs a single operation at a time.
+ *
+ * This function will attempt to reset a PE three times before failing.
*/
-static void eeh_reset_pe_once(struct eeh_pe *pe)
+int eeh_pe_reset_full(struct eeh_pe *pe)
{
+ int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
+ int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
+ int type = EEH_RESET_HOT;
unsigned int freset = 0;
+ int i, state, ret;
- /* Determine type of EEH reset required for
- * Partitionable Endpoint, a hot-reset (1)
- * or a fundamental reset (3).
- * A fundamental reset required by any device under
- * Partitionable Endpoint trumps hot-reset.
+ /*
+ * Determine the type of reset to perform - hot or fundamental.
+ * Hot reset is the default operation, unless any device under the
+ * PE requires a fundamental reset.
*/
eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset);
if (freset)
- eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL);
- else
- eeh_ops->reset(pe, EEH_RESET_HOT);
-
- eeh_ops->reset(pe, EEH_RESET_DEACTIVATE);
-}
-
-/**
- * eeh_reset_pe - Reset the indicated PE
- * @pe: EEH PE
- *
- * This routine should be called to reset indicated device, including
- * PE. A PE might include multiple PCI devices and sometimes PCI bridges
- * might be involved as well.
- */
-int eeh_reset_pe(struct eeh_pe *pe)
-{
- int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE);
- int i, state, ret;
+ type = EEH_RESET_FUNDAMENTAL;
- /* Mark as reset and block config space */
- eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
+ /* Mark the PE as in reset state and block config space accesses */
+ eeh_pe_state_mark(pe, reset_state);
- /* Take three shots at resetting the bus */
+ /* Make three attempts at resetting the bus */
for (i = 0; i < 3; i++) {
- eeh_reset_pe_once(pe);
+ ret = eeh_pe_reset(pe, type);
+ if (ret)
+ break;
- /*
- * EEH_PE_ISOLATED is expected to be removed after
- * BAR restore.
- */
+ ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE);
+ if (ret)
+ break;
+
+ /* Wait until the PE is in a functioning state */
state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC);
- if ((state & flags) == flags) {
- ret = 0;
- goto out;
- }
+ if ((state & active_flags) == active_flags)
+ break;
if (state < 0) {
- pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x",
+ pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x",
__func__, pe->phb->global_number, pe->addr);
ret = -ENOTRECOVERABLE;
- goto out;
+ break;
}
- /* We might run out of credits */
+ /* Set error in case this is our last attempt */
ret = -EIO;
pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n",
__func__, state, pe->phb->global_number, pe->addr, (i + 1));
}
-out:
- eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED);
+ eeh_pe_state_clear(pe, reset_state);
return ret;
}
@@ -1601,6 +1592,7 @@ static int eeh_pe_reenable_devices(struct eeh_pe *pe)
return eeh_unfreeze_pe(pe, true);
}
+
/**
* eeh_pe_reset - Issue PE reset according to specified type
* @pe: EEH PE
diff --git a/arch/powerpc/kernel/eeh_driver.c b/arch/powerpc/kernel/eeh_driver.c
index 5c31369435f2..d88573bdd090 100644
--- a/arch/powerpc/kernel/eeh_driver.c
+++ b/arch/powerpc/kernel/eeh_driver.c
@@ -588,7 +588,7 @@ int eeh_pe_reset_and_recover(struct eeh_pe *pe)
eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
/* Issue reset */
- ret = eeh_reset_pe(pe);
+ ret = eeh_pe_reset_full(pe);
if (ret) {
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
return ret;
@@ -659,7 +659,7 @@ static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
* config accesses. So we prefer to block them. However, controlled
* PCI config accesses initiated from EEH itself are allowed.
*/
- rc = eeh_reset_pe(pe);
+ rc = eeh_pe_reset_full(pe);
if (rc)
return rc;
@@ -734,7 +734,7 @@ static void eeh_handle_normal_event(struct eeh_pe *pe)
frozen_bus = eeh_pe_bus_get(pe);
if (!frozen_bus) {
- pr_err("%s: Cannot find PCI bus for PHB#%d-PE#%x\n",
+ pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
__func__, pe->phb->global_number, pe->addr);
return;
}
@@ -878,7 +878,7 @@ excess_failures:
* are due to poorly seated PCI cards. Only 10% or so are
* due to actual, failed cards.
*/
- pr_err("EEH: PHB#%d-PE#%x has failed %d times in the\n"
+ pr_err("EEH: PHB#%x-PE#%x has failed %d times in the\n"
"last hour and has been permanently disabled.\n"
"Please try reseating or replacing it.\n",
pe->phb->global_number, pe->addr,
@@ -886,7 +886,7 @@ excess_failures:
goto perm_error;
hard_fail:
- pr_err("EEH: Unable to recover from failure from PHB#%d-PE#%x.\n"
+ pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
"Please try reseating or replacing it\n",
pe->phb->global_number, pe->addr);
@@ -1000,7 +1000,7 @@ static void eeh_handle_special_event(void)
bus = eeh_pe_bus_get(phb_pe);
if (!bus) {
pr_err("%s: Cannot find PCI bus for "
- "PHB#%d-PE#%x\n",
+ "PHB#%x-PE#%x\n",
__func__,
pe->phb->global_number,
pe->addr);
diff --git a/arch/powerpc/kernel/eeh_event.c b/arch/powerpc/kernel/eeh_event.c
index 82e7327e3cd0..accbf8b5fd46 100644
--- a/arch/powerpc/kernel/eeh_event.c
+++ b/arch/powerpc/kernel/eeh_event.c
@@ -75,11 +75,11 @@ static int eeh_event_handler(void * dummy)
if (pe) {
eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
if (pe->type & EEH_PE_PHB)
- pr_info("EEH: Detected error on PHB#%d\n",
+ pr_info("EEH: Detected error on PHB#%x\n",
pe->phb->global_number);
else
pr_info("EEH: Detected PCI bus error on "
- "PHB#%d-PE#%x\n",
+ "PHB#%x-PE#%x\n",
pe->phb->global_number, pe->addr);
eeh_handle_event(pe);
eeh_pe_state_clear(pe, EEH_PE_RECOVERING);
diff --git a/arch/powerpc/kernel/eeh_pe.c b/arch/powerpc/kernel/eeh_pe.c
index de7d091c4c31..cc4b206f77e4 100644
--- a/arch/powerpc/kernel/eeh_pe.c
+++ b/arch/powerpc/kernel/eeh_pe.c
@@ -104,7 +104,7 @@ int eeh_phb_pe_create(struct pci_controller *phb)
/* Put it into the list */
list_add_tail(&pe->child, &eeh_phb_pe);
- pr_debug("EEH: Add PE for PHB#%d\n", phb->global_number);
+ pr_debug("EEH: Add PE for PHB#%x\n", phb->global_number);
return 0;
}
@@ -333,7 +333,7 @@ int eeh_add_to_parent_pe(struct eeh_dev *edev)
/* Check if the PE number is valid */
if (!eeh_has_flag(EEH_VALID_PE_ZERO) && !edev->pe_config_addr) {
- pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%d\n",
+ pr_err("%s: Invalid PE#0 for edev 0x%x on PHB#%x\n",
__func__, edev->config_addr, edev->phb->global_number);
return -EINVAL;
}
diff --git a/arch/powerpc/kernel/entry_32.S b/arch/powerpc/kernel/entry_32.S
index 3841d749a430..5742dbdbee46 100644
--- a/arch/powerpc/kernel/entry_32.S
+++ b/arch/powerpc/kernel/entry_32.S
@@ -674,7 +674,11 @@ BEGIN_FTR_SECTION
mtspr SPRN_SPEFSCR,r0 /* restore SPEFSCR reg */
END_FTR_SECTION_IFSET(CPU_FTR_SPE)
#endif /* CONFIG_SPE */
-
+#if defined(CONFIG_CC_STACKPROTECTOR) && !defined(CONFIG_SMP)
+ lwz r0,TSK_STACK_CANARY(r2)
+ lis r4,__stack_chk_guard@ha
+ stw r0,__stack_chk_guard@l(r4)
+#endif
lwz r0,_CCR(r1)
mtcrf 0xFF,r0
/* r3-r12 are destroyed -- Cort */
diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S
index 38a1f96430e1..45b453e4d0c8 100644
--- a/arch/powerpc/kernel/exceptions-64e.S
+++ b/arch/powerpc/kernel/exceptions-64e.S
@@ -923,10 +923,10 @@ kernel_dbg_exc:
PROLOG_ADDITION_NONE)
EXCEPTION_COMMON(0x340)
addi r3,r1,STACK_FRAME_OVERHEAD
- bl .save_nvgprs
+ bl save_nvgprs
INTS_RESTORE_HARD
- bl .unknown_exception
- b .ret_from_except
+ bl unknown_exception
+ b ret_from_except
/*
* An interrupt came in while soft-disabled; We mark paca->irq_happened
diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S
index 1ba82ea90230..d39d6118c6e9 100644
--- a/arch/powerpc/kernel/exceptions-64s.S
+++ b/arch/powerpc/kernel/exceptions-64s.S
@@ -1408,7 +1408,7 @@ USE_TEXT_SECTION()
/*
* Hash table stuff
*/
- .align 7
+ .balign IFETCH_ALIGN_BYTES
do_hash_page:
#ifdef CONFIG_PPC_STD_MMU_64
andis. r0,r4,0xa410 /* weird error? */
diff --git a/arch/powerpc/kernel/ftrace.c b/arch/powerpc/kernel/ftrace.c
index a95639b8d4ac..5c9f50c1aa99 100644
--- a/arch/powerpc/kernel/ftrace.c
+++ b/arch/powerpc/kernel/ftrace.c
@@ -47,13 +47,11 @@ ftrace_modify_code(unsigned long ip, unsigned int old, unsigned int new)
unsigned int replaced;
/*
- * Note: Due to modules and __init, code can
- * disappear and change, we need to protect against faulting
- * as well as code changing. We do this by using the
- * probe_kernel_* functions.
- *
- * No real locking needed, this code is run through
- * kstop_machine, or before SMP starts.
+ * Note:
+ * We are paranoid about modifying text, as if a bug was to happen, it
+ * could cause us to read or write to someplace that could cause harm.
+ * Carefully read and modify the code with probe_kernel_*(), and make
+ * sure what we read is what we expected it to be before modifying it.
*/
/* read the text we want to modify */
diff --git a/arch/powerpc/kernel/head_64.S b/arch/powerpc/kernel/head_64.S
index 04c546e20cc0..1dc5eae2ced3 100644
--- a/arch/powerpc/kernel/head_64.S
+++ b/arch/powerpc/kernel/head_64.S
@@ -107,12 +107,19 @@ __secondary_hold_acknowledge:
* crash_kernel region. The loader is responsible for
* observing the alignment requirement.
*/
+
+#ifdef CONFIG_RELOCATABLE_TEST
+#define RUN_AT_LOAD_DEFAULT 1 /* Test relocation, do not copy to 0 */
+#else
+#define RUN_AT_LOAD_DEFAULT 0x72756e30 /* "run0" -- relocate to 0 by default */
+#endif
+
/* Do not move this variable as kexec-tools knows about it. */
. = 0x5c
.globl __run_at_load
__run_at_load:
DEFINE_FIXED_SYMBOL(__run_at_load)
- .long 0x72756e30 /* "run0" -- relocate to 0 by default */
+ .long RUN_AT_LOAD_DEFAULT
#endif
. = 0x60
@@ -153,7 +160,7 @@ __secondary_hold:
cmpdi 0,r12,0
beq 100b
-#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
#ifdef CONFIG_PPC_BOOK3E
tovirt(r12,r12)
#endif
@@ -214,9 +221,9 @@ booting_thread_hwid:
*/
_GLOBAL(book3e_start_thread)
LOAD_REG_IMMEDIATE(r5, MSR_KERNEL)
- cmpi 0, r3, 0
+ cmpwi r3, 0
beq 10f
- cmpi 0, r3, 1
+ cmpwi r3, 1
beq 11f
/* If the thread id is invalid, just exit. */
b 13f
@@ -241,9 +248,9 @@ _GLOBAL(book3e_start_thread)
* r3 = the thread physical id
*/
_GLOBAL(book3e_stop_thread)
- cmpi 0, r3, 0
+ cmpwi r3, 0
beq 10f
- cmpi 0, r3, 1
+ cmpwi r3, 1
beq 10f
/* If the thread id is invalid, just exit. */
b 13f
diff --git a/arch/powerpc/kernel/head_8xx.S b/arch/powerpc/kernel/head_8xx.S
index fb133a163263..1a9c99d3e5d8 100644
--- a/arch/powerpc/kernel/head_8xx.S
+++ b/arch/powerpc/kernel/head_8xx.S
@@ -73,6 +73,9 @@
#define RPN_PATTERN 0x00f0
#endif
+#define PAGE_SHIFT_512K 19
+#define PAGE_SHIFT_8M 23
+
__HEAD
_ENTRY(_stext);
_ENTRY(_start);
@@ -322,7 +325,7 @@ SystemCall:
#endif
InstructionTLBMiss:
-#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
+#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
mtspr SPRN_SPRG_SCRATCH2, r3
#endif
EXCEPTION_PROLOG_0
@@ -332,10 +335,12 @@ InstructionTLBMiss:
*/
mfspr r10, SPRN_SRR0 /* Get effective address of fault */
INVALIDATE_ADJACENT_PAGES_CPU15(r11, r10)
-#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
/* Only modules will cause ITLB Misses as we always
* pin the first 8MB of kernel memory */
+#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
mfcr r3
+#endif
+#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
IS_KERNEL(r11, r10)
#endif
mfspr r11, SPRN_M_TW /* Get level 1 table */
@@ -343,7 +348,6 @@ InstructionTLBMiss:
BRANCH_UNLESS_KERNEL(3f)
lis r11, (swapper_pg_dir-PAGE_OFFSET)@ha
3:
- mtcr r3
#endif
/* Insert level 1 index */
rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
@@ -351,14 +355,25 @@ InstructionTLBMiss:
/* Extract level 2 index */
rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
+#ifdef CONFIG_HUGETLB_PAGE
+ mtcr r11
+ bt- 28, 10f /* bit 28 = Large page (8M) */
+ bt- 29, 20f /* bit 29 = Large page (8M or 512k) */
+#endif
rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
-
+4:
+#if defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
+ mtcr r3
+#endif
/* Insert the APG into the TWC from the Linux PTE. */
rlwimi r11, r10, 0, 25, 26
/* Load the MI_TWC with the attributes for this "segment." */
MTSPR_CPU6(SPRN_MI_TWC, r11, r3) /* Set segment attributes */
+#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
+ rlwimi r10, r11, 1, MI_SPS16K
+#endif
#ifdef CONFIG_SWAP
rlwinm r11, r10, 32-5, _PAGE_PRESENT
and r11, r11, r10
@@ -371,16 +386,45 @@ InstructionTLBMiss:
* set. All other Linux PTE bits control the behavior
* of the MMU.
*/
+#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
+ rlwimi r10, r11, 0, 0x0ff0 /* Set 24-27, clear 20-23 */
+#else
rlwimi r10, r11, 0, 0x0ff8 /* Set 24-27, clear 20-23,28 */
+#endif
MTSPR_CPU6(SPRN_MI_RPN, r10, r3) /* Update TLB entry */
/* Restore registers */
-#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC)
+#if defined(CONFIG_8xx_CPU6) || defined(CONFIG_MODULES) || defined (CONFIG_DEBUG_PAGEALLOC) || defined (CONFIG_HUGETLB_PAGE)
mfspr r3, SPRN_SPRG_SCRATCH2
#endif
EXCEPTION_EPILOG_0
rfi
+#ifdef CONFIG_HUGETLB_PAGE
+10: /* 8M pages */
+#ifdef CONFIG_PPC_16K_PAGES
+ /* Extract level 2 index */
+ rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29
+ /* Add level 2 base */
+ rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1
+#else
+ /* Level 2 base */
+ rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK
+#endif
+ lwz r10, 0(r10) /* Get the pte */
+ rlwinm r11, r11, 0, 0xf
+ b 4b
+
+20: /* 512k pages */
+ /* Extract level 2 index */
+ rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29
+ /* Add level 2 base */
+ rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
+ lwz r10, 0(r10) /* Get the pte */
+ rlwinm r11, r11, 0, 0xf
+ b 4b
+#endif
+
. = 0x1200
DataStoreTLBMiss:
mtspr SPRN_SPRG_SCRATCH2, r3
@@ -407,7 +451,6 @@ _ENTRY(DTLBMiss_jmp)
#endif
blt cr7, DTLBMissLinear
3:
- mtcr r3
mfspr r10, SPRN_MD_EPN
/* Insert level 1 index */
@@ -418,8 +461,15 @@ _ENTRY(DTLBMiss_jmp)
*/
/* Extract level 2 index */
rlwinm r10, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
+#ifdef CONFIG_HUGETLB_PAGE
+ mtcr r11
+ bt- 28, 10f /* bit 28 = Large page (8M) */
+ bt- 29, 20f /* bit 29 = Large page (8M or 512k) */
+#endif
rlwimi r10, r11, 0, 0, 32 - PAGE_SHIFT - 1 /* Add level 2 base */
lwz r10, 0(r10) /* Get the pte */
+4:
+ mtcr r3
/* Insert the Guarded flag and APG into the TWC from the Linux PTE.
* It is bit 26-27 of both the Linux PTE and the TWC (at least
@@ -434,6 +484,11 @@ _ENTRY(DTLBMiss_jmp)
rlwimi r11, r10, 32-5, 30, 30
MTSPR_CPU6(SPRN_MD_TWC, r11, r3)
+ /* In 4k pages mode, SPS (bit 28) in RPN must match PS[1] (bit 29)
+ * In 16k pages mode, SPS is always 1 */
+#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
+ rlwimi r10, r11, 1, MD_SPS16K
+#endif
/* Both _PAGE_ACCESSED and _PAGE_PRESENT has to be set.
* We also need to know if the insn is a load/store, so:
* Clear _PAGE_PRESENT and load that which will
@@ -455,7 +510,11 @@ _ENTRY(DTLBMiss_jmp)
* of the MMU.
*/
li r11, RPN_PATTERN
+#if defined (CONFIG_HUGETLB_PAGE) && defined (CONFIG_PPC_4K_PAGES)
+ rlwimi r10, r11, 0, 24, 27 /* Set 24-27 */
+#else
rlwimi r10, r11, 0, 24, 28 /* Set 24-27, clear 28 */
+#endif
rlwimi r10, r11, 0, 20, 20 /* clear 20 */
MTSPR_CPU6(SPRN_MD_RPN, r10, r3) /* Update TLB entry */
@@ -465,6 +524,30 @@ _ENTRY(DTLBMiss_jmp)
EXCEPTION_EPILOG_0
rfi
+#ifdef CONFIG_HUGETLB_PAGE
+10: /* 8M pages */
+ /* Extract level 2 index */
+#ifdef CONFIG_PPC_16K_PAGES
+ rlwinm r10, r10, 32 - (PAGE_SHIFT_8M - PAGE_SHIFT), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29
+ /* Add level 2 base */
+ rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1
+#else
+ /* Level 2 base */
+ rlwinm r10, r11, 0, ~HUGEPD_SHIFT_MASK
+#endif
+ lwz r10, 0(r10) /* Get the pte */
+ rlwinm r11, r11, 0, 0xf
+ b 4b
+
+20: /* 512k pages */
+ /* Extract level 2 index */
+ rlwinm r10, r10, 32 - (PAGE_SHIFT_512K - PAGE_SHIFT), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29
+ /* Add level 2 base */
+ rlwimi r10, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
+ lwz r10, 0(r10) /* Get the pte */
+ rlwinm r11, r11, 0, 0xf
+ b 4b
+#endif
/* This is an instruction TLB error on the MPC8xx. This could be due
* to many reasons, such as executing guarded memory or illegal instruction
@@ -586,6 +669,9 @@ _ENTRY(FixupDAR_cmp)
/* Insert level 1 index */
3: rlwimi r11, r10, 32 - ((PAGE_SHIFT - 2) << 1), (PAGE_SHIFT - 2) << 1, 29
lwz r11, (swapper_pg_dir-PAGE_OFFSET)@l(r11) /* Get the level 1 entry */
+ mtcr r11
+ bt 28,200f /* bit 28 = Large page (8M) */
+ bt 29,202f /* bit 29 = Large page (8M or 512K) */
rlwinm r11, r11,0,0,19 /* Extract page descriptor page address */
/* Insert level 2 index */
rlwimi r11, r10, 32 - (PAGE_SHIFT - 2), 32 - PAGE_SHIFT, 29
@@ -611,6 +697,27 @@ _ENTRY(FixupDAR_cmp)
141: mfspr r10,SPRN_SPRG_SCRATCH2
b DARFixed /* Nope, go back to normal TLB processing */
+ /* concat physical page address(r11) and page offset(r10) */
+200:
+#ifdef CONFIG_PPC_16K_PAGES
+ rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1) - 1
+ rlwimi r11, r10, 32 - (PAGE_SHIFT_8M - 2), 32 + PAGE_SHIFT_8M - (PAGE_SHIFT << 1), 29
+#else
+ rlwinm r11, r10, 0, ~HUGEPD_SHIFT_MASK
+#endif
+ lwz r11, 0(r11) /* Get the pte */
+ /* concat physical page address(r11) and page offset(r10) */
+ rlwimi r11, r10, 0, 32 - PAGE_SHIFT_8M, 31
+ b 201b
+
+202:
+ rlwinm r11, r11, 0, 0, 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1) - 1
+ rlwimi r11, r10, 32 - (PAGE_SHIFT_512K - 2), 32 + PAGE_SHIFT_512K - (PAGE_SHIFT << 1), 29
+ lwz r11, 0(r11) /* Get the pte */
+ /* concat physical page address(r11) and page offset(r10) */
+ rlwimi r11, r10, 0, 32 - PAGE_SHIFT_512K, 31
+ b 201b
+
144: mfspr r10, SPRN_DSISR
rlwinm r10, r10,0,7,5 /* Clear store bit for buggy dcbst insn */
mtspr SPRN_DSISR, r10
diff --git a/arch/powerpc/kernel/ibmebus.c b/arch/powerpc/kernel/ibmebus.c
deleted file mode 100644
index 6ca9a2ffaac7..000000000000
--- a/arch/powerpc/kernel/ibmebus.c
+++ /dev/null
@@ -1,759 +0,0 @@
-/*
- * IBM PowerPC IBM eBus Infrastructure Support.
- *
- * Copyright (c) 2005 IBM Corporation
- * Joachim Fenkes <fenkes@de.ibm.com>
- * Heiko J Schick <schickhj@de.ibm.com>
- *
- * All rights reserved.
- *
- * This source code is distributed under a dual license of GPL v2.0 and OpenIB
- * BSD.
- *
- * OpenIB BSD License
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions are met:
- *
- * Redistributions of source code must retain the above copyright notice, this
- * list of conditions and the following disclaimer.
- *
- * Redistributions in binary form must reproduce the above copyright notice,
- * this list of conditions and the following disclaimer in the documentation
- * and/or other materials
- * provided with the distribution.
- *
- * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
- * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
- * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
- * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
- * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR
- * BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
- * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
- * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
- * POSSIBILITY OF SUCH DAMAGE.
- */
-
-#include <linux/init.h>
-#include <linux/export.h>
-#include <linux/console.h>
-#include <linux/kobject.h>
-#include <linux/dma-mapping.h>
-#include <linux/interrupt.h>
-#include <linux/of.h>
-#include <linux/slab.h>
-#include <linux/stat.h>
-#include <linux/of_platform.h>
-#include <asm/ibmebus.h>
-
-static struct device ibmebus_bus_device = { /* fake "parent" device */
- .init_name = "ibmebus",
-};
-
-struct bus_type ibmebus_bus_type;
-
-/* These devices will automatically be added to the bus during init */
-static const struct of_device_id ibmebus_matches[] __initconst = {
- { .compatible = "IBM,lhca" },
- { .compatible = "IBM,lhea" },
- {},
-};
-
-static void *ibmebus_alloc_coherent(struct device *dev,
- size_t size,
- dma_addr_t *dma_handle,
- gfp_t flag,
- unsigned long attrs)
-{
- void *mem;
-
- mem = kmalloc(size, flag);
- *dma_handle = (dma_addr_t)mem;
-
- return mem;
-}
-
-static void ibmebus_free_coherent(struct device *dev,
- size_t size, void *vaddr,
- dma_addr_t dma_handle,
- unsigned long attrs)
-{
- kfree(vaddr);
-}
-
-static dma_addr_t ibmebus_map_page(struct device *dev,
- struct page *page,
- unsigned long offset,
- size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- return (dma_addr_t)(page_address(page) + offset);
-}
-
-static void ibmebus_unmap_page(struct device *dev,
- dma_addr_t dma_addr,
- size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- return;
-}
-
-static int ibmebus_map_sg(struct device *dev,
- struct scatterlist *sgl,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct scatterlist *sg;
- int i;
-
- for_each_sg(sgl, sg, nents, i) {
- sg->dma_address = (dma_addr_t) sg_virt(sg);
- sg->dma_length = sg->length;
- }
-
- return nents;
-}
-
-static void ibmebus_unmap_sg(struct device *dev,
- struct scatterlist *sg,
- int nents, enum dma_data_direction direction,
- unsigned long attrs)
-{
- return;
-}
-
-static int ibmebus_dma_supported(struct device *dev, u64 mask)
-{
- return mask == DMA_BIT_MASK(64);
-}
-
-static u64 ibmebus_dma_get_required_mask(struct device *dev)
-{
- return DMA_BIT_MASK(64);
-}
-
-static struct dma_map_ops ibmebus_dma_ops = {
- .alloc = ibmebus_alloc_coherent,
- .free = ibmebus_free_coherent,
- .map_sg = ibmebus_map_sg,
- .unmap_sg = ibmebus_unmap_sg,
- .dma_supported = ibmebus_dma_supported,
- .get_required_mask = ibmebus_dma_get_required_mask,
- .map_page = ibmebus_map_page,
- .unmap_page = ibmebus_unmap_page,
-};
-
-static int ibmebus_match_path(struct device *dev, void *data)
-{
- struct device_node *dn = to_platform_device(dev)->dev.of_node;
- return (dn->full_name &&
- (strcasecmp((char *)data, dn->full_name) == 0));
-}
-
-static int ibmebus_match_node(struct device *dev, void *data)
-{
- return to_platform_device(dev)->dev.of_node == data;
-}
-
-static int ibmebus_create_device(struct device_node *dn)
-{
- struct platform_device *dev;
- int ret;
-
- dev = of_device_alloc(dn, NULL, &ibmebus_bus_device);
- if (!dev)
- return -ENOMEM;
-
- dev->dev.bus = &ibmebus_bus_type;
- dev->dev.archdata.dma_ops = &ibmebus_dma_ops;
-
- ret = of_device_add(dev);
- if (ret)
- platform_device_put(dev);
- return ret;
-}
-
-static int ibmebus_create_devices(const struct of_device_id *matches)
-{
- struct device_node *root, *child;
- int ret = 0;
-
- root = of_find_node_by_path("/");
-
- for_each_child_of_node(root, child) {
- if (!of_match_node(matches, child))
- continue;
-
- if (bus_find_device(&ibmebus_bus_type, NULL, child,
- ibmebus_match_node))
- continue;
-
- ret = ibmebus_create_device(child);
- if (ret) {
- printk(KERN_ERR "%s: failed to create device (%i)",
- __func__, ret);
- of_node_put(child);
- break;
- }
- }
-
- of_node_put(root);
- return ret;
-}
-
-int ibmebus_register_driver(struct platform_driver *drv)
-{
- /* If the driver uses devices that ibmebus doesn't know, add them */
- ibmebus_create_devices(drv->driver.of_match_table);
-
- drv->driver.bus = &ibmebus_bus_type;
- return driver_register(&drv->driver);
-}
-EXPORT_SYMBOL(ibmebus_register_driver);
-
-void ibmebus_unregister_driver(struct platform_driver *drv)
-{
- driver_unregister(&drv->driver);
-}
-EXPORT_SYMBOL(ibmebus_unregister_driver);
-
-int ibmebus_request_irq(u32 ist, irq_handler_t handler,
- unsigned long irq_flags, const char *devname,
- void *dev_id)
-{
- unsigned int irq = irq_create_mapping(NULL, ist);
-
- if (!irq)
- return -EINVAL;
-
- return request_irq(irq, handler, irq_flags, devname, dev_id);
-}
-EXPORT_SYMBOL(ibmebus_request_irq);
-
-void ibmebus_free_irq(u32 ist, void *dev_id)
-{
- unsigned int irq = irq_find_mapping(NULL, ist);
-
- free_irq(irq, dev_id);
- irq_dispose_mapping(irq);
-}
-EXPORT_SYMBOL(ibmebus_free_irq);
-
-static char *ibmebus_chomp(const char *in, size_t count)
-{
- char *out = kmalloc(count + 1, GFP_KERNEL);
-
- if (!out)
- return NULL;
-
- memcpy(out, in, count);
- out[count] = '\0';
- if (out[count - 1] == '\n')
- out[count - 1] = '\0';
-
- return out;
-}
-
-static ssize_t ibmebus_store_probe(struct bus_type *bus,
- const char *buf, size_t count)
-{
- struct device_node *dn = NULL;
- char *path;
- ssize_t rc = 0;
-
- path = ibmebus_chomp(buf, count);
- if (!path)
- return -ENOMEM;
-
- if (bus_find_device(&ibmebus_bus_type, NULL, path,
- ibmebus_match_path)) {
- printk(KERN_WARNING "%s: %s has already been probed\n",
- __func__, path);
- rc = -EEXIST;
- goto out;
- }
-
- if ((dn = of_find_node_by_path(path))) {
- rc = ibmebus_create_device(dn);
- of_node_put(dn);
- } else {
- printk(KERN_WARNING "%s: no such device node: %s\n",
- __func__, path);
- rc = -ENODEV;
- }
-
-out:
- kfree(path);
- if (rc)
- return rc;
- return count;
-}
-static BUS_ATTR(probe, S_IWUSR, NULL, ibmebus_store_probe);
-
-static ssize_t ibmebus_store_remove(struct bus_type *bus,
- const char *buf, size_t count)
-{
- struct device *dev;
- char *path;
-
- path = ibmebus_chomp(buf, count);
- if (!path)
- return -ENOMEM;
-
- if ((dev = bus_find_device(&ibmebus_bus_type, NULL, path,
- ibmebus_match_path))) {
- of_device_unregister(to_platform_device(dev));
-
- kfree(path);
- return count;
- } else {
- printk(KERN_WARNING "%s: %s not on the bus\n",
- __func__, path);
-
- kfree(path);
- return -ENODEV;
- }
-}
-static BUS_ATTR(remove, S_IWUSR, NULL, ibmebus_store_remove);
-
-static struct attribute *ibmbus_bus_attrs[] = {
- &bus_attr_probe.attr,
- &bus_attr_remove.attr,
- NULL,
-};
-ATTRIBUTE_GROUPS(ibmbus_bus);
-
-static int ibmebus_bus_bus_match(struct device *dev, struct device_driver *drv)
-{
- const struct of_device_id *matches = drv->of_match_table;
-
- if (!matches)
- return 0;
-
- return of_match_device(matches, dev) != NULL;
-}
-
-static int ibmebus_bus_device_probe(struct device *dev)
-{
- int error = -ENODEV;
- struct platform_driver *drv;
- struct platform_device *of_dev;
-
- drv = to_platform_driver(dev->driver);
- of_dev = to_platform_device(dev);
-
- if (!drv->probe)
- return error;
-
- of_dev_get(of_dev);
-
- if (of_driver_match_device(dev, dev->driver))
- error = drv->probe(of_dev);
- if (error)
- of_dev_put(of_dev);
-
- return error;
-}
-
-static int ibmebus_bus_device_remove(struct device *dev)
-{
- struct platform_device *of_dev = to_platform_device(dev);
- struct platform_driver *drv = to_platform_driver(dev->driver);
-
- if (dev->driver && drv->remove)
- drv->remove(of_dev);
- return 0;
-}
-
-static void ibmebus_bus_device_shutdown(struct device *dev)
-{
- struct platform_device *of_dev = to_platform_device(dev);
- struct platform_driver *drv = to_platform_driver(dev->driver);
-
- if (dev->driver && drv->shutdown)
- drv->shutdown(of_dev);
-}
-
-/*
- * ibmebus_bus_device_attrs
- */
-static ssize_t devspec_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct platform_device *ofdev;
-
- ofdev = to_platform_device(dev);
- return sprintf(buf, "%s\n", ofdev->dev.of_node->full_name);
-}
-
-static ssize_t name_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct platform_device *ofdev;
-
- ofdev = to_platform_device(dev);
- return sprintf(buf, "%s\n", ofdev->dev.of_node->name);
-}
-
-static ssize_t modalias_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- ssize_t len = of_device_get_modalias(dev, buf, PAGE_SIZE - 2);
- buf[len] = '\n';
- buf[len+1] = 0;
- return len+1;
-}
-
-static struct device_attribute ibmebus_bus_device_attrs[] = {
- __ATTR_RO(devspec),
- __ATTR_RO(name),
- __ATTR_RO(modalias),
- __ATTR_NULL
-};
-
-#ifdef CONFIG_PM_SLEEP
-static int ibmebus_bus_legacy_suspend(struct device *dev, pm_message_t mesg)
-{
- struct platform_device *of_dev = to_platform_device(dev);
- struct platform_driver *drv = to_platform_driver(dev->driver);
- int ret = 0;
-
- if (dev->driver && drv->suspend)
- ret = drv->suspend(of_dev, mesg);
- return ret;
-}
-
-static int ibmebus_bus_legacy_resume(struct device *dev)
-{
- struct platform_device *of_dev = to_platform_device(dev);
- struct platform_driver *drv = to_platform_driver(dev->driver);
- int ret = 0;
-
- if (dev->driver && drv->resume)
- ret = drv->resume(of_dev);
- return ret;
-}
-
-static int ibmebus_bus_pm_prepare(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (drv && drv->pm && drv->pm->prepare)
- ret = drv->pm->prepare(dev);
-
- return ret;
-}
-
-static void ibmebus_bus_pm_complete(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
-
- if (drv && drv->pm && drv->pm->complete)
- drv->pm->complete(dev);
-}
-
-#ifdef CONFIG_SUSPEND
-
-static int ibmebus_bus_pm_suspend(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->suspend)
- ret = drv->pm->suspend(dev);
- } else {
- ret = ibmebus_bus_legacy_suspend(dev, PMSG_SUSPEND);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_suspend_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->suspend_noirq)
- ret = drv->pm->suspend_noirq(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_resume(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->resume)
- ret = drv->pm->resume(dev);
- } else {
- ret = ibmebus_bus_legacy_resume(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_resume_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->resume_noirq)
- ret = drv->pm->resume_noirq(dev);
- }
-
- return ret;
-}
-
-#else /* !CONFIG_SUSPEND */
-
-#define ibmebus_bus_pm_suspend NULL
-#define ibmebus_bus_pm_resume NULL
-#define ibmebus_bus_pm_suspend_noirq NULL
-#define ibmebus_bus_pm_resume_noirq NULL
-
-#endif /* !CONFIG_SUSPEND */
-
-#ifdef CONFIG_HIBERNATE_CALLBACKS
-
-static int ibmebus_bus_pm_freeze(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->freeze)
- ret = drv->pm->freeze(dev);
- } else {
- ret = ibmebus_bus_legacy_suspend(dev, PMSG_FREEZE);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_freeze_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->freeze_noirq)
- ret = drv->pm->freeze_noirq(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_thaw(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->thaw)
- ret = drv->pm->thaw(dev);
- } else {
- ret = ibmebus_bus_legacy_resume(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_thaw_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->thaw_noirq)
- ret = drv->pm->thaw_noirq(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_poweroff(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->poweroff)
- ret = drv->pm->poweroff(dev);
- } else {
- ret = ibmebus_bus_legacy_suspend(dev, PMSG_HIBERNATE);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_poweroff_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->poweroff_noirq)
- ret = drv->pm->poweroff_noirq(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_restore(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->restore)
- ret = drv->pm->restore(dev);
- } else {
- ret = ibmebus_bus_legacy_resume(dev);
- }
-
- return ret;
-}
-
-static int ibmebus_bus_pm_restore_noirq(struct device *dev)
-{
- struct device_driver *drv = dev->driver;
- int ret = 0;
-
- if (!drv)
- return 0;
-
- if (drv->pm) {
- if (drv->pm->restore_noirq)
- ret = drv->pm->restore_noirq(dev);
- }
-
- return ret;
-}
-
-#else /* !CONFIG_HIBERNATE_CALLBACKS */
-
-#define ibmebus_bus_pm_freeze NULL
-#define ibmebus_bus_pm_thaw NULL
-#define ibmebus_bus_pm_poweroff NULL
-#define ibmebus_bus_pm_restore NULL
-#define ibmebus_bus_pm_freeze_noirq NULL
-#define ibmebus_bus_pm_thaw_noirq NULL
-#define ibmebus_bus_pm_poweroff_noirq NULL
-#define ibmebus_bus_pm_restore_noirq NULL
-
-#endif /* !CONFIG_HIBERNATE_CALLBACKS */
-
-static struct dev_pm_ops ibmebus_bus_dev_pm_ops = {
- .prepare = ibmebus_bus_pm_prepare,
- .complete = ibmebus_bus_pm_complete,
- .suspend = ibmebus_bus_pm_suspend,
- .resume = ibmebus_bus_pm_resume,
- .freeze = ibmebus_bus_pm_freeze,
- .thaw = ibmebus_bus_pm_thaw,
- .poweroff = ibmebus_bus_pm_poweroff,
- .restore = ibmebus_bus_pm_restore,
- .suspend_noirq = ibmebus_bus_pm_suspend_noirq,
- .resume_noirq = ibmebus_bus_pm_resume_noirq,
- .freeze_noirq = ibmebus_bus_pm_freeze_noirq,
- .thaw_noirq = ibmebus_bus_pm_thaw_noirq,
- .poweroff_noirq = ibmebus_bus_pm_poweroff_noirq,
- .restore_noirq = ibmebus_bus_pm_restore_noirq,
-};
-
-#define IBMEBUS_BUS_PM_OPS_PTR (&ibmebus_bus_dev_pm_ops)
-
-#else /* !CONFIG_PM_SLEEP */
-
-#define IBMEBUS_BUS_PM_OPS_PTR NULL
-
-#endif /* !CONFIG_PM_SLEEP */
-
-struct bus_type ibmebus_bus_type = {
- .name = "ibmebus",
- .uevent = of_device_uevent_modalias,
- .bus_groups = ibmbus_bus_groups,
- .match = ibmebus_bus_bus_match,
- .probe = ibmebus_bus_device_probe,
- .remove = ibmebus_bus_device_remove,
- .shutdown = ibmebus_bus_device_shutdown,
- .dev_attrs = ibmebus_bus_device_attrs,
- .pm = IBMEBUS_BUS_PM_OPS_PTR,
-};
-EXPORT_SYMBOL(ibmebus_bus_type);
-
-static int __init ibmebus_bus_init(void)
-{
- int err;
-
- printk(KERN_INFO "IBM eBus Device Driver\n");
-
- err = bus_register(&ibmebus_bus_type);
- if (err) {
- printk(KERN_ERR "%s: failed to register IBM eBus.\n",
- __func__);
- return err;
- }
-
- err = device_register(&ibmebus_bus_device);
- if (err) {
- printk(KERN_WARNING "%s: device_register returned %i\n",
- __func__, err);
- bus_unregister(&ibmebus_bus_type);
-
- return err;
- }
-
- err = ibmebus_create_devices(ibmebus_matches);
- if (err) {
- device_unregister(&ibmebus_bus_device);
- bus_unregister(&ibmebus_bus_type);
- return err;
- }
-
- return 0;
-}
-postcore_initcall(ibmebus_bus_init);
diff --git a/arch/powerpc/kernel/kexec_elf_64.c b/arch/powerpc/kernel/kexec_elf_64.c
new file mode 100644
index 000000000000..6acffd34a70f
--- /dev/null
+++ b/arch/powerpc/kernel/kexec_elf_64.c
@@ -0,0 +1,663 @@
+/*
+ * Load ELF vmlinux file for the kexec_file_load syscall.
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-exec.c and kexec-elf-ppc64.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#define pr_fmt(fmt) "kexec_elf: " fmt
+
+#include <linux/elf.h>
+#include <linux/kexec.h>
+#include <linux/libfdt.h>
+#include <linux/module.h>
+#include <linux/of_fdt.h>
+#include <linux/slab.h>
+#include <linux/types.h>
+
+#define PURGATORY_STACK_SIZE (16 * 1024)
+
+#define elf_addr_to_cpu elf64_to_cpu
+
+#ifndef Elf_Rel
+#define Elf_Rel Elf64_Rel
+#endif /* Elf_Rel */
+
+struct elf_info {
+ /*
+ * Where the ELF binary contents are kept.
+ * Memory managed by the user of the struct.
+ */
+ const char *buffer;
+
+ const struct elfhdr *ehdr;
+ const struct elf_phdr *proghdrs;
+ struct elf_shdr *sechdrs;
+};
+
+static inline bool elf_is_elf_file(const struct elfhdr *ehdr)
+{
+ return memcmp(ehdr->e_ident, ELFMAG, SELFMAG) == 0;
+}
+
+static uint64_t elf64_to_cpu(const struct elfhdr *ehdr, uint64_t value)
+{
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+ value = le64_to_cpu(value);
+ else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+ value = be64_to_cpu(value);
+
+ return value;
+}
+
+static uint16_t elf16_to_cpu(const struct elfhdr *ehdr, uint16_t value)
+{
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+ value = le16_to_cpu(value);
+ else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+ value = be16_to_cpu(value);
+
+ return value;
+}
+
+static uint32_t elf32_to_cpu(const struct elfhdr *ehdr, uint32_t value)
+{
+ if (ehdr->e_ident[EI_DATA] == ELFDATA2LSB)
+ value = le32_to_cpu(value);
+ else if (ehdr->e_ident[EI_DATA] == ELFDATA2MSB)
+ value = be32_to_cpu(value);
+
+ return value;
+}
+
+/**
+ * elf_is_ehdr_sane - check that it is safe to use the ELF header
+ * @buf_len: size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_ehdr_sane(const struct elfhdr *ehdr, size_t buf_len)
+{
+ if (ehdr->e_phnum > 0 && ehdr->e_phentsize != sizeof(struct elf_phdr)) {
+ pr_debug("Bad program header size.\n");
+ return false;
+ } else if (ehdr->e_shnum > 0 &&
+ ehdr->e_shentsize != sizeof(struct elf_shdr)) {
+ pr_debug("Bad section header size.\n");
+ return false;
+ } else if (ehdr->e_ident[EI_VERSION] != EV_CURRENT ||
+ ehdr->e_version != EV_CURRENT) {
+ pr_debug("Unknown ELF version.\n");
+ return false;
+ }
+
+ if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
+ size_t phdr_size;
+
+ /*
+ * e_phnum is at most 65535 so calculating the size of the
+ * program header cannot overflow.
+ */
+ phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
+
+ /* Sanity check the program header table location. */
+ if (ehdr->e_phoff + phdr_size < ehdr->e_phoff) {
+ pr_debug("Program headers at invalid location.\n");
+ return false;
+ } else if (ehdr->e_phoff + phdr_size > buf_len) {
+ pr_debug("Program headers truncated.\n");
+ return false;
+ }
+ }
+
+ if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
+ size_t shdr_size;
+
+ /*
+ * e_shnum is at most 65536 so calculating
+ * the size of the section header cannot overflow.
+ */
+ shdr_size = sizeof(struct elf_shdr) * ehdr->e_shnum;
+
+ /* Sanity check the section header table location. */
+ if (ehdr->e_shoff + shdr_size < ehdr->e_shoff) {
+ pr_debug("Section headers at invalid location.\n");
+ return false;
+ } else if (ehdr->e_shoff + shdr_size > buf_len) {
+ pr_debug("Section headers truncated.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int elf_read_ehdr(const char *buf, size_t len, struct elfhdr *ehdr)
+{
+ struct elfhdr *buf_ehdr;
+
+ if (len < sizeof(*buf_ehdr)) {
+ pr_debug("Buffer is too small to hold ELF header.\n");
+ return -ENOEXEC;
+ }
+
+ memset(ehdr, 0, sizeof(*ehdr));
+ memcpy(ehdr->e_ident, buf, sizeof(ehdr->e_ident));
+ if (!elf_is_elf_file(ehdr)) {
+ pr_debug("No ELF header magic.\n");
+ return -ENOEXEC;
+ }
+
+ if (ehdr->e_ident[EI_CLASS] != ELF_CLASS) {
+ pr_debug("Not a supported ELF class.\n");
+ return -ENOEXEC;
+ } else if (ehdr->e_ident[EI_DATA] != ELFDATA2LSB &&
+ ehdr->e_ident[EI_DATA] != ELFDATA2MSB) {
+ pr_debug("Not a supported ELF data format.\n");
+ return -ENOEXEC;
+ }
+
+ buf_ehdr = (struct elfhdr *) buf;
+ if (elf16_to_cpu(ehdr, buf_ehdr->e_ehsize) != sizeof(*buf_ehdr)) {
+ pr_debug("Bad ELF header size.\n");
+ return -ENOEXEC;
+ }
+
+ ehdr->e_type = elf16_to_cpu(ehdr, buf_ehdr->e_type);
+ ehdr->e_machine = elf16_to_cpu(ehdr, buf_ehdr->e_machine);
+ ehdr->e_version = elf32_to_cpu(ehdr, buf_ehdr->e_version);
+ ehdr->e_entry = elf_addr_to_cpu(ehdr, buf_ehdr->e_entry);
+ ehdr->e_phoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_phoff);
+ ehdr->e_shoff = elf_addr_to_cpu(ehdr, buf_ehdr->e_shoff);
+ ehdr->e_flags = elf32_to_cpu(ehdr, buf_ehdr->e_flags);
+ ehdr->e_phentsize = elf16_to_cpu(ehdr, buf_ehdr->e_phentsize);
+ ehdr->e_phnum = elf16_to_cpu(ehdr, buf_ehdr->e_phnum);
+ ehdr->e_shentsize = elf16_to_cpu(ehdr, buf_ehdr->e_shentsize);
+ ehdr->e_shnum = elf16_to_cpu(ehdr, buf_ehdr->e_shnum);
+ ehdr->e_shstrndx = elf16_to_cpu(ehdr, buf_ehdr->e_shstrndx);
+
+ return elf_is_ehdr_sane(ehdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_is_phdr_sane - check that it is safe to use the program header
+ * @buf_len: size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_phdr_sane(const struct elf_phdr *phdr, size_t buf_len)
+{
+
+ if (phdr->p_offset + phdr->p_filesz < phdr->p_offset) {
+ pr_debug("ELF segment location wraps around.\n");
+ return false;
+ } else if (phdr->p_offset + phdr->p_filesz > buf_len) {
+ pr_debug("ELF segment not in file.\n");
+ return false;
+ } else if (phdr->p_paddr + phdr->p_memsz < phdr->p_paddr) {
+ pr_debug("ELF segment address wraps around.\n");
+ return false;
+ }
+
+ return true;
+}
+
+static int elf_read_phdr(const char *buf, size_t len, struct elf_info *elf_info,
+ int idx)
+{
+ /* Override the const in proghdrs, we are the ones doing the loading. */
+ struct elf_phdr *phdr = (struct elf_phdr *) &elf_info->proghdrs[idx];
+ const char *pbuf;
+ struct elf_phdr *buf_phdr;
+
+ pbuf = buf + elf_info->ehdr->e_phoff + (idx * sizeof(*buf_phdr));
+ buf_phdr = (struct elf_phdr *) pbuf;
+
+ phdr->p_type = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_type);
+ phdr->p_offset = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_offset);
+ phdr->p_paddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_paddr);
+ phdr->p_vaddr = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_vaddr);
+ phdr->p_flags = elf32_to_cpu(elf_info->ehdr, buf_phdr->p_flags);
+
+ /*
+ * The following fields have a type equivalent to Elf_Addr
+ * both in 32 bit and 64 bit ELF.
+ */
+ phdr->p_filesz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_filesz);
+ phdr->p_memsz = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_memsz);
+ phdr->p_align = elf_addr_to_cpu(elf_info->ehdr, buf_phdr->p_align);
+
+ return elf_is_phdr_sane(phdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_read_phdrs - read the program headers from the buffer
+ *
+ * This function assumes that the program header table was checked for sanity.
+ * Use elf_is_ehdr_sane() if it wasn't.
+ */
+static int elf_read_phdrs(const char *buf, size_t len,
+ struct elf_info *elf_info)
+{
+ size_t phdr_size, i;
+ const struct elfhdr *ehdr = elf_info->ehdr;
+
+ /*
+ * e_phnum is at most 65535 so calculating the size of the
+ * program header cannot overflow.
+ */
+ phdr_size = sizeof(struct elf_phdr) * ehdr->e_phnum;
+
+ elf_info->proghdrs = kzalloc(phdr_size, GFP_KERNEL);
+ if (!elf_info->proghdrs)
+ return -ENOMEM;
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ int ret;
+
+ ret = elf_read_phdr(buf, len, elf_info, i);
+ if (ret) {
+ kfree(elf_info->proghdrs);
+ elf_info->proghdrs = NULL;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * elf_is_shdr_sane - check that it is safe to use the section header
+ * @buf_len: size of the buffer in which the ELF file is loaded.
+ */
+static bool elf_is_shdr_sane(const struct elf_shdr *shdr, size_t buf_len)
+{
+ bool size_ok;
+
+ /* SHT_NULL headers have undefined values, so we can't check them. */
+ if (shdr->sh_type == SHT_NULL)
+ return true;
+
+ /* Now verify sh_entsize */
+ switch (shdr->sh_type) {
+ case SHT_SYMTAB:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Sym);
+ break;
+ case SHT_RELA:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Rela);
+ break;
+ case SHT_DYNAMIC:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Dyn);
+ break;
+ case SHT_REL:
+ size_ok = shdr->sh_entsize == sizeof(Elf_Rel);
+ break;
+ case SHT_NOTE:
+ case SHT_PROGBITS:
+ case SHT_HASH:
+ case SHT_NOBITS:
+ default:
+ /*
+ * This is a section whose entsize requirements
+ * I don't care about. If I don't know about
+ * the section I can't care about it's entsize
+ * requirements.
+ */
+ size_ok = true;
+ break;
+ }
+
+ if (!size_ok) {
+ pr_debug("ELF section with wrong entry size.\n");
+ return false;
+ } else if (shdr->sh_addr + shdr->sh_size < shdr->sh_addr) {
+ pr_debug("ELF section address wraps around.\n");
+ return false;
+ }
+
+ if (shdr->sh_type != SHT_NOBITS) {
+ if (shdr->sh_offset + shdr->sh_size < shdr->sh_offset) {
+ pr_debug("ELF section location wraps around.\n");
+ return false;
+ } else if (shdr->sh_offset + shdr->sh_size > buf_len) {
+ pr_debug("ELF section not in file.\n");
+ return false;
+ }
+ }
+
+ return true;
+}
+
+static int elf_read_shdr(const char *buf, size_t len, struct elf_info *elf_info,
+ int idx)
+{
+ struct elf_shdr *shdr = &elf_info->sechdrs[idx];
+ const struct elfhdr *ehdr = elf_info->ehdr;
+ const char *sbuf;
+ struct elf_shdr *buf_shdr;
+
+ sbuf = buf + ehdr->e_shoff + idx * sizeof(*buf_shdr);
+ buf_shdr = (struct elf_shdr *) sbuf;
+
+ shdr->sh_name = elf32_to_cpu(ehdr, buf_shdr->sh_name);
+ shdr->sh_type = elf32_to_cpu(ehdr, buf_shdr->sh_type);
+ shdr->sh_addr = elf_addr_to_cpu(ehdr, buf_shdr->sh_addr);
+ shdr->sh_offset = elf_addr_to_cpu(ehdr, buf_shdr->sh_offset);
+ shdr->sh_link = elf32_to_cpu(ehdr, buf_shdr->sh_link);
+ shdr->sh_info = elf32_to_cpu(ehdr, buf_shdr->sh_info);
+
+ /*
+ * The following fields have a type equivalent to Elf_Addr
+ * both in 32 bit and 64 bit ELF.
+ */
+ shdr->sh_flags = elf_addr_to_cpu(ehdr, buf_shdr->sh_flags);
+ shdr->sh_size = elf_addr_to_cpu(ehdr, buf_shdr->sh_size);
+ shdr->sh_addralign = elf_addr_to_cpu(ehdr, buf_shdr->sh_addralign);
+ shdr->sh_entsize = elf_addr_to_cpu(ehdr, buf_shdr->sh_entsize);
+
+ return elf_is_shdr_sane(shdr, len) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_read_shdrs - read the section headers from the buffer
+ *
+ * This function assumes that the section header table was checked for sanity.
+ * Use elf_is_ehdr_sane() if it wasn't.
+ */
+static int elf_read_shdrs(const char *buf, size_t len,
+ struct elf_info *elf_info)
+{
+ size_t shdr_size, i;
+
+ /*
+ * e_shnum is at most 65536 so calculating
+ * the size of the section header cannot overflow.
+ */
+ shdr_size = sizeof(struct elf_shdr) * elf_info->ehdr->e_shnum;
+
+ elf_info->sechdrs = kzalloc(shdr_size, GFP_KERNEL);
+ if (!elf_info->sechdrs)
+ return -ENOMEM;
+
+ for (i = 0; i < elf_info->ehdr->e_shnum; i++) {
+ int ret;
+
+ ret = elf_read_shdr(buf, len, elf_info, i);
+ if (ret) {
+ kfree(elf_info->sechdrs);
+ elf_info->sechdrs = NULL;
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * elf_read_from_buffer - read ELF file and sets up ELF header and ELF info
+ * @buf: Buffer to read ELF file from.
+ * @len: Size of @buf.
+ * @ehdr: Pointer to existing struct which will be populated.
+ * @elf_info: Pointer to existing struct which will be populated.
+ *
+ * This function allows reading ELF files with different byte order than
+ * the kernel, byte-swapping the fields as needed.
+ *
+ * Return:
+ * On success returns 0, and the caller should call elf_free_info(elf_info) to
+ * free the memory allocated for the section and program headers.
+ */
+int elf_read_from_buffer(const char *buf, size_t len, struct elfhdr *ehdr,
+ struct elf_info *elf_info)
+{
+ int ret;
+
+ ret = elf_read_ehdr(buf, len, ehdr);
+ if (ret)
+ return ret;
+
+ elf_info->buffer = buf;
+ elf_info->ehdr = ehdr;
+ if (ehdr->e_phoff > 0 && ehdr->e_phnum > 0) {
+ ret = elf_read_phdrs(buf, len, elf_info);
+ if (ret)
+ return ret;
+ }
+ if (ehdr->e_shoff > 0 && ehdr->e_shnum > 0) {
+ ret = elf_read_shdrs(buf, len, elf_info);
+ if (ret) {
+ kfree(elf_info->proghdrs);
+ return ret;
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * elf_free_info - free memory allocated by elf_read_from_buffer
+ */
+void elf_free_info(struct elf_info *elf_info)
+{
+ kfree(elf_info->proghdrs);
+ kfree(elf_info->sechdrs);
+ memset(elf_info, 0, sizeof(*elf_info));
+}
+/**
+ * build_elf_exec_info - read ELF executable and check that we can use it
+ */
+static int build_elf_exec_info(const char *buf, size_t len, struct elfhdr *ehdr,
+ struct elf_info *elf_info)
+{
+ int i;
+ int ret;
+
+ ret = elf_read_from_buffer(buf, len, ehdr, elf_info);
+ if (ret)
+ return ret;
+
+ /* Big endian vmlinux has type ET_DYN. */
+ if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN) {
+ pr_err("Not an ELF executable.\n");
+ goto error;
+ } else if (!elf_info->proghdrs) {
+ pr_err("No ELF program header.\n");
+ goto error;
+ }
+
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ /*
+ * Kexec does not support loading interpreters.
+ * In addition this check keeps us from attempting
+ * to kexec ordinay executables.
+ */
+ if (elf_info->proghdrs[i].p_type == PT_INTERP) {
+ pr_err("Requires an ELF interpreter.\n");
+ goto error;
+ }
+ }
+
+ return 0;
+error:
+ elf_free_info(elf_info);
+ return -ENOEXEC;
+}
+
+static int elf64_probe(const char *buf, unsigned long len)
+{
+ struct elfhdr ehdr;
+ struct elf_info elf_info;
+ int ret;
+
+ ret = build_elf_exec_info(buf, len, &ehdr, &elf_info);
+ if (ret)
+ return ret;
+
+ elf_free_info(&elf_info);
+
+ return elf_check_arch(&ehdr) ? 0 : -ENOEXEC;
+}
+
+/**
+ * elf_exec_load - load ELF executable image
+ * @lowest_load_addr: On return, will be the address where the first PT_LOAD
+ * section will be loaded in memory.
+ *
+ * Return:
+ * 0 on success, negative value on failure.
+ */
+static int elf_exec_load(struct kimage *image, struct elfhdr *ehdr,
+ struct elf_info *elf_info,
+ unsigned long *lowest_load_addr)
+{
+ unsigned long base = 0, lowest_addr = UINT_MAX;
+ int ret;
+ size_t i;
+ struct kexec_buf kbuf = { .image = image, .buf_max = ppc64_rma_size,
+ .top_down = false };
+
+ /* Read in the PT_LOAD segments. */
+ for (i = 0; i < ehdr->e_phnum; i++) {
+ unsigned long load_addr;
+ size_t size;
+ const struct elf_phdr *phdr;
+
+ phdr = &elf_info->proghdrs[i];
+ if (phdr->p_type != PT_LOAD)
+ continue;
+
+ size = phdr->p_filesz;
+ if (size > phdr->p_memsz)
+ size = phdr->p_memsz;
+
+ kbuf.buffer = (void *) elf_info->buffer + phdr->p_offset;
+ kbuf.bufsz = size;
+ kbuf.memsz = phdr->p_memsz;
+ kbuf.buf_align = phdr->p_align;
+ kbuf.buf_min = phdr->p_paddr + base;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ load_addr = kbuf.mem;
+
+ if (load_addr < lowest_addr)
+ lowest_addr = load_addr;
+ }
+
+ /* Update entry point to reflect new load address. */
+ ehdr->e_entry += base;
+
+ *lowest_load_addr = lowest_addr;
+ ret = 0;
+ out:
+ return ret;
+}
+
+static void *elf64_load(struct kimage *image, char *kernel_buf,
+ unsigned long kernel_len, char *initrd,
+ unsigned long initrd_len, char *cmdline,
+ unsigned long cmdline_len)
+{
+ int ret;
+ unsigned int fdt_size;
+ unsigned long kernel_load_addr, purgatory_load_addr;
+ unsigned long initrd_load_addr = 0, fdt_load_addr;
+ void *fdt;
+ const void *slave_code;
+ struct elfhdr ehdr;
+ struct elf_info elf_info;
+ struct kexec_buf kbuf = { .image = image, .buf_min = 0,
+ .buf_max = ppc64_rma_size };
+
+ ret = build_elf_exec_info(kernel_buf, kernel_len, &ehdr, &elf_info);
+ if (ret)
+ goto out;
+
+ ret = elf_exec_load(image, &ehdr, &elf_info, &kernel_load_addr);
+ if (ret)
+ goto out;
+
+ pr_debug("Loaded the kernel at 0x%lx\n", kernel_load_addr);
+
+ ret = kexec_load_purgatory(image, 0, ppc64_rma_size, true,
+ &purgatory_load_addr);
+ if (ret) {
+ pr_err("Loading purgatory failed.\n");
+ goto out;
+ }
+
+ pr_debug("Loaded purgatory at 0x%lx\n", purgatory_load_addr);
+
+ if (initrd != NULL) {
+ kbuf.buffer = initrd;
+ kbuf.bufsz = kbuf.memsz = initrd_len;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = false;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ initrd_load_addr = kbuf.mem;
+
+ pr_debug("Loaded initrd at 0x%lx\n", initrd_load_addr);
+ }
+
+ fdt_size = fdt_totalsize(initial_boot_params) * 2;
+ fdt = kmalloc(fdt_size, GFP_KERNEL);
+ if (!fdt) {
+ pr_err("Not enough memory for the device tree.\n");
+ ret = -ENOMEM;
+ goto out;
+ }
+ ret = fdt_open_into(initial_boot_params, fdt, fdt_size);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ ret = -EINVAL;
+ goto out;
+ }
+
+ ret = setup_new_fdt(fdt, initrd_load_addr, initrd_len, cmdline);
+ if (ret)
+ goto out;
+
+ fdt_pack(fdt);
+
+ kbuf.buffer = fdt;
+ kbuf.bufsz = kbuf.memsz = fdt_size;
+ kbuf.buf_align = PAGE_SIZE;
+ kbuf.top_down = true;
+ ret = kexec_add_buffer(&kbuf);
+ if (ret)
+ goto out;
+ fdt_load_addr = kbuf.mem;
+
+ pr_debug("Loaded device tree at 0x%lx\n", fdt_load_addr);
+
+ slave_code = elf_info.buffer + elf_info.proghdrs[0].p_offset;
+ ret = setup_purgatory(image, slave_code, fdt, kernel_load_addr,
+ fdt_load_addr);
+ if (ret)
+ pr_err("Error setting up the purgatory.\n");
+
+out:
+ elf_free_info(&elf_info);
+
+ /* Make kimage_file_post_load_cleanup free the fdt buffer for us. */
+ return ret ? ERR_PTR(ret) : fdt;
+}
+
+struct kexec_file_ops kexec_elf64_ops = {
+ .probe = elf64_probe,
+ .load = elf64_load,
+};
diff --git a/arch/powerpc/kernel/kprobes.c b/arch/powerpc/kernel/kprobes.c
index e785cc9e1ecd..ad108b842669 100644
--- a/arch/powerpc/kernel/kprobes.c
+++ b/arch/powerpc/kernel/kprobes.c
@@ -140,13 +140,16 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri,
regs->link = (unsigned long)kretprobe_trampoline;
}
-static int __kprobes kprobe_handler(struct pt_regs *regs)
+int __kprobes kprobe_handler(struct pt_regs *regs)
{
struct kprobe *p;
int ret = 0;
unsigned int *addr = (unsigned int *)regs->nip;
struct kprobe_ctlblk *kcb;
+ if (user_mode(regs))
+ return 0;
+
/*
* We don't want to be preempted for the entire
* duration of kprobe processing
@@ -359,12 +362,12 @@ static int __kprobes trampoline_probe_handler(struct kprobe *p,
* single-stepped a copy of the instruction. The address of this
* copy is p->ainsn.insn.
*/
-static int __kprobes post_kprobe_handler(struct pt_regs *regs)
+int __kprobes kprobe_post_handler(struct pt_regs *regs)
{
struct kprobe *cur = kprobe_running();
struct kprobe_ctlblk *kcb = get_kprobe_ctlblk();
- if (!cur)
+ if (!cur || user_mode(regs))
return 0;
/* make sure we got here for instruction we have a kprobe on */
@@ -449,7 +452,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
* zero, try to fix up.
*/
if ((entry = search_exception_tables(regs->nip)) != NULL) {
- regs->nip = entry->fixup;
+ regs->nip = extable_fixup(entry);
return 1;
}
@@ -470,25 +473,7 @@ int __kprobes kprobe_fault_handler(struct pt_regs *regs, int trapnr)
int __kprobes kprobe_exceptions_notify(struct notifier_block *self,
unsigned long val, void *data)
{
- struct die_args *args = (struct die_args *)data;
- int ret = NOTIFY_DONE;
-
- if (args->regs && user_mode(args->regs))
- return ret;
-
- switch (val) {
- case DIE_BPT:
- if (kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- case DIE_SSTEP:
- if (post_kprobe_handler(args->regs))
- ret = NOTIFY_STOP;
- break;
- default:
- break;
- }
- return ret;
+ return NOTIFY_DONE;
}
unsigned long arch_deref_entry_point(void *entry)
diff --git a/arch/powerpc/kernel/machine_kexec_64.c b/arch/powerpc/kernel/machine_kexec_64.c
index a205fa3d9bf3..5c12e21d0d1a 100644
--- a/arch/powerpc/kernel/machine_kexec_64.c
+++ b/arch/powerpc/kernel/machine_kexec_64.c
@@ -310,7 +310,7 @@ void default_machine_kexec(struct kimage *image)
if (!kdump_in_progress())
kexec_prepare_cpus();
- pr_debug("kexec: Starting switchover sequence.\n");
+ printk("kexec: Starting switchover sequence.\n");
/* switch to a staticly allocated stack. Based on irq stack code.
* We setup preempt_count to avoid using VMX in memcpy.
diff --git a/arch/powerpc/kernel/machine_kexec_file_64.c b/arch/powerpc/kernel/machine_kexec_file_64.c
new file mode 100644
index 000000000000..7abc8a75ee48
--- /dev/null
+++ b/arch/powerpc/kernel/machine_kexec_file_64.c
@@ -0,0 +1,338 @@
+/*
+ * ppc64 code to implement the kexec_file_load syscall
+ *
+ * Copyright (C) 2004 Adam Litke (agl@us.ibm.com)
+ * Copyright (C) 2004 IBM Corp.
+ * Copyright (C) 2004,2005 Milton D Miller II, IBM Corporation
+ * Copyright (C) 2005 R Sharada (sharada@in.ibm.com)
+ * Copyright (C) 2006 Mohan Kumar M (mohan@in.ibm.com)
+ * Copyright (C) 2016 IBM Corporation
+ *
+ * Based on kexec-tools' kexec-elf-ppc64.c, fs2dt.c.
+ * Heavily modified for the kernel by
+ * Thiago Jung Bauermann <bauerman@linux.vnet.ibm.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License as published by
+ * the Free Software Foundation (version 2 of the License).
+ *
+ * This program is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+ * GNU General Public License for more details.
+ */
+
+#include <linux/slab.h>
+#include <linux/kexec.h>
+#include <linux/memblock.h>
+#include <linux/of_fdt.h>
+#include <linux/libfdt.h>
+
+#define SLAVE_CODE_SIZE 256
+
+static struct kexec_file_ops *kexec_file_loaders[] = {
+ &kexec_elf64_ops,
+};
+
+int arch_kexec_kernel_image_probe(struct kimage *image, void *buf,
+ unsigned long buf_len)
+{
+ int i, ret = -ENOEXEC;
+ struct kexec_file_ops *fops;
+
+ /* We don't support crash kernels yet. */
+ if (image->type == KEXEC_TYPE_CRASH)
+ return -ENOTSUPP;
+
+ for (i = 0; i < ARRAY_SIZE(kexec_file_loaders); i++) {
+ fops = kexec_file_loaders[i];
+ if (!fops || !fops->probe)
+ continue;
+
+ ret = fops->probe(buf, buf_len);
+ if (!ret) {
+ image->fops = fops;
+ return ret;
+ }
+ }
+
+ return ret;
+}
+
+void *arch_kexec_kernel_image_load(struct kimage *image)
+{
+ if (!image->fops || !image->fops->load)
+ return ERR_PTR(-ENOEXEC);
+
+ return image->fops->load(image, image->kernel_buf,
+ image->kernel_buf_len, image->initrd_buf,
+ image->initrd_buf_len, image->cmdline_buf,
+ image->cmdline_buf_len);
+}
+
+int arch_kimage_file_post_load_cleanup(struct kimage *image)
+{
+ if (!image->fops || !image->fops->cleanup)
+ return 0;
+
+ return image->fops->cleanup(image->image_loader_data);
+}
+
+/**
+ * arch_kexec_walk_mem - call func(data) for each unreserved memory block
+ * @kbuf: Context info for the search. Also passed to @func.
+ * @func: Function to call for each memory block.
+ *
+ * This function is used by kexec_add_buffer and kexec_locate_mem_hole
+ * to find unreserved memory to load kexec segments into.
+ *
+ * Return: The memory walk will stop when func returns a non-zero value
+ * and that value will be returned. If all free regions are visited without
+ * func returning non-zero, then zero will be returned.
+ */
+int arch_kexec_walk_mem(struct kexec_buf *kbuf, int (*func)(u64, u64, void *))
+{
+ int ret = 0;
+ u64 i;
+ phys_addr_t mstart, mend;
+
+ if (kbuf->top_down) {
+ for_each_free_mem_range_reverse(i, NUMA_NO_NODE, 0,
+ &mstart, &mend, NULL) {
+ /*
+ * In memblock, end points to the first byte after the
+ * range while in kexec, end points to the last byte
+ * in the range.
+ */
+ ret = func(mstart, mend - 1, kbuf);
+ if (ret)
+ break;
+ }
+ } else {
+ for_each_free_mem_range(i, NUMA_NO_NODE, 0, &mstart, &mend,
+ NULL) {
+ /*
+ * In memblock, end points to the first byte after the
+ * range while in kexec, end points to the last byte
+ * in the range.
+ */
+ ret = func(mstart, mend - 1, kbuf);
+ if (ret)
+ break;
+ }
+ }
+
+ return ret;
+}
+
+/**
+ * setup_purgatory - initialize the purgatory's global variables
+ * @image: kexec image.
+ * @slave_code: Slave code for the purgatory.
+ * @fdt: Flattened device tree for the next kernel.
+ * @kernel_load_addr: Address where the kernel is loaded.
+ * @fdt_load_addr: Address where the flattened device tree is loaded.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_purgatory(struct kimage *image, const void *slave_code,
+ const void *fdt, unsigned long kernel_load_addr,
+ unsigned long fdt_load_addr)
+{
+ unsigned int *slave_code_buf, master_entry;
+ int ret;
+
+ slave_code_buf = kmalloc(SLAVE_CODE_SIZE, GFP_KERNEL);
+ if (!slave_code_buf)
+ return -ENOMEM;
+
+ /* Get the slave code from the new kernel and put it in purgatory. */
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+ slave_code_buf, SLAVE_CODE_SIZE,
+ true);
+ if (ret) {
+ kfree(slave_code_buf);
+ return ret;
+ }
+
+ master_entry = slave_code_buf[0];
+ memcpy(slave_code_buf, slave_code, SLAVE_CODE_SIZE);
+ slave_code_buf[0] = master_entry;
+ ret = kexec_purgatory_get_set_symbol(image, "purgatory_start",
+ slave_code_buf, SLAVE_CODE_SIZE,
+ false);
+ kfree(slave_code_buf);
+
+ ret = kexec_purgatory_get_set_symbol(image, "kernel", &kernel_load_addr,
+ sizeof(kernel_load_addr), false);
+ if (ret)
+ return ret;
+ ret = kexec_purgatory_get_set_symbol(image, "dt_offset", &fdt_load_addr,
+ sizeof(fdt_load_addr), false);
+ if (ret)
+ return ret;
+
+ return 0;
+}
+
+/**
+ * delete_fdt_mem_rsv - delete memory reservation with given address and size
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+static int delete_fdt_mem_rsv(void *fdt, unsigned long start, unsigned long size)
+{
+ int i, ret, num_rsvs = fdt_num_mem_rsv(fdt);
+
+ for (i = 0; i < num_rsvs; i++) {
+ uint64_t rsv_start, rsv_size;
+
+ ret = fdt_get_mem_rsv(fdt, i, &rsv_start, &rsv_size);
+ if (ret) {
+ pr_err("Malformed device tree.\n");
+ return -EINVAL;
+ }
+
+ if (rsv_start == start && rsv_size == size) {
+ ret = fdt_del_mem_rsv(fdt, i);
+ if (ret) {
+ pr_err("Error deleting device tree reservation.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+ }
+ }
+
+ return -ENOENT;
+}
+
+/*
+ * setup_new_fdt - modify /chosen and memory reservation for the next kernel
+ * @fdt: Flattened device tree for the next kernel.
+ * @initrd_load_addr: Address where the next initrd will be loaded.
+ * @initrd_len: Size of the next initrd, or 0 if there will be none.
+ * @cmdline: Command line for the next kernel, or NULL if there will
+ * be none.
+ *
+ * Return: 0 on success, or negative errno on error.
+ */
+int setup_new_fdt(void *fdt, unsigned long initrd_load_addr,
+ unsigned long initrd_len, const char *cmdline)
+{
+ int ret, chosen_node;
+ const void *prop;
+
+ /* Remove memory reservation for the current device tree. */
+ ret = delete_fdt_mem_rsv(fdt, __pa(initial_boot_params),
+ fdt_totalsize(initial_boot_params));
+ if (ret == 0)
+ pr_debug("Removed old device tree reservation.\n");
+ else if (ret != -ENOENT)
+ return ret;
+
+ chosen_node = fdt_path_offset(fdt, "/chosen");
+ if (chosen_node == -FDT_ERR_NOTFOUND) {
+ chosen_node = fdt_add_subnode(fdt, fdt_path_offset(fdt, "/"),
+ "chosen");
+ if (chosen_node < 0) {
+ pr_err("Error creating /chosen.\n");
+ return -EINVAL;
+ }
+ } else if (chosen_node < 0) {
+ pr_err("Malformed device tree: error reading /chosen.\n");
+ return -EINVAL;
+ }
+
+ /* Did we boot using an initrd? */
+ prop = fdt_getprop(fdt, chosen_node, "linux,initrd-start", NULL);
+ if (prop) {
+ uint64_t tmp_start, tmp_end, tmp_size;
+
+ tmp_start = fdt64_to_cpu(*((const fdt64_t *) prop));
+
+ prop = fdt_getprop(fdt, chosen_node, "linux,initrd-end", NULL);
+ if (!prop) {
+ pr_err("Malformed device tree.\n");
+ return -EINVAL;
+ }
+ tmp_end = fdt64_to_cpu(*((const fdt64_t *) prop));
+
+ /*
+ * kexec reserves exact initrd size, while firmware may
+ * reserve a multiple of PAGE_SIZE, so check for both.
+ */
+ tmp_size = tmp_end - tmp_start;
+ ret = delete_fdt_mem_rsv(fdt, tmp_start, tmp_size);
+ if (ret == -ENOENT)
+ ret = delete_fdt_mem_rsv(fdt, tmp_start,
+ round_up(tmp_size, PAGE_SIZE));
+ if (ret == 0)
+ pr_debug("Removed old initrd reservation.\n");
+ else if (ret != -ENOENT)
+ return ret;
+
+ /* If there's no new initrd, delete the old initrd's info. */
+ if (initrd_len == 0) {
+ ret = fdt_delprop(fdt, chosen_node,
+ "linux,initrd-start");
+ if (ret) {
+ pr_err("Error deleting linux,initrd-start.\n");
+ return -EINVAL;
+ }
+
+ ret = fdt_delprop(fdt, chosen_node, "linux,initrd-end");
+ if (ret) {
+ pr_err("Error deleting linux,initrd-end.\n");
+ return -EINVAL;
+ }
+ }
+ }
+
+ if (initrd_len) {
+ ret = fdt_setprop_u64(fdt, chosen_node,
+ "linux,initrd-start",
+ initrd_load_addr);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+
+ /* initrd-end is the first address after the initrd image. */
+ ret = fdt_setprop_u64(fdt, chosen_node, "linux,initrd-end",
+ initrd_load_addr + initrd_len);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+
+ ret = fdt_add_mem_rsv(fdt, initrd_load_addr, initrd_len);
+ if (ret) {
+ pr_err("Error reserving initrd memory: %s\n",
+ fdt_strerror(ret));
+ return -EINVAL;
+ }
+ }
+
+ if (cmdline != NULL) {
+ ret = fdt_setprop_string(fdt, chosen_node, "bootargs", cmdline);
+ if (ret < 0) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+ } else {
+ ret = fdt_delprop(fdt, chosen_node, "bootargs");
+ if (ret && ret != -FDT_ERR_NOTFOUND) {
+ pr_err("Error deleting bootargs.\n");
+ return -EINVAL;
+ }
+ }
+
+ ret = fdt_setprop(fdt, chosen_node, "linux,booted-from-kexec", NULL, 0);
+ if (ret) {
+ pr_err("Error setting up the new device tree.\n");
+ return -EINVAL;
+ }
+
+ return 0;
+}
diff --git a/arch/powerpc/kernel/mce.c b/arch/powerpc/kernel/mce.c
index 5e7ece0fda9f..c6923ff45131 100644
--- a/arch/powerpc/kernel/mce.c
+++ b/arch/powerpc/kernel/mce.c
@@ -72,7 +72,6 @@ void save_mce_event(struct pt_regs *regs, long handled,
struct mce_error_info *mce_err,
uint64_t nip, uint64_t addr)
{
- uint64_t srr1;
int index = __this_cpu_inc_return(mce_nest_count) - 1;
struct machine_check_event *mce = this_cpu_ptr(&mce_event[index]);
@@ -99,8 +98,6 @@ void save_mce_event(struct pt_regs *regs, long handled,
mce->disposition = MCE_DISPOSITION_NOT_RECOVERED;
mce->severity = MCE_SEV_ERROR_SYNC;
- srr1 = regs->msr;
-
/*
* Populate the mce error_type and type-specific error_type.
*/
diff --git a/arch/powerpc/kernel/misc_32.S b/arch/powerpc/kernel/misc_32.S
index 93cf7a5846a6..1863324c6a3c 100644
--- a/arch/powerpc/kernel/misc_32.S
+++ b/arch/powerpc/kernel/misc_32.S
@@ -614,7 +614,7 @@ _GLOBAL(start_secondary_resume)
_GLOBAL(__main)
blr
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
/*
* Must be relocatable PIC code callable as a C function.
*/
diff --git a/arch/powerpc/kernel/misc_64.S b/arch/powerpc/kernel/misc_64.S
index 4f178671f230..32be2a844947 100644
--- a/arch/powerpc/kernel/misc_64.S
+++ b/arch/powerpc/kernel/misc_64.S
@@ -478,7 +478,7 @@ _GLOBAL(kexec_wait)
addi r5,r5,kexec_flag-1b
99: HMT_LOW
-#ifdef CONFIG_KEXEC /* use no memory without kexec */
+#ifdef CONFIG_KEXEC_CORE /* use no memory without kexec */
lwz r4,0(r5)
cmpwi 0,r4,0
beq 99b
@@ -503,7 +503,7 @@ kexec_flag:
.long 0
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
#ifdef CONFIG_PPC_BOOK3E
/*
* BOOK3E has no real MMU mode, so we have to setup the initial TLB
@@ -716,4 +716,4 @@ _GLOBAL(kexec_sequence)
mtlr 4
li r5,0
blr /* image->start(physid, image->start, 0); */
-#endif /* CONFIG_KEXEC */
+#endif /* CONFIG_KEXEC_CORE */
diff --git a/arch/powerpc/kernel/module_64.c b/arch/powerpc/kernel/module_64.c
index 183368e008cf..bb1807184bad 100644
--- a/arch/powerpc/kernel/module_64.c
+++ b/arch/powerpc/kernel/module_64.c
@@ -652,6 +652,11 @@ int apply_relocate_add(Elf64_Shdr *sechdrs,
*location = value - (unsigned long)location;
break;
+ case R_PPC64_REL32:
+ /* 32 bits relative (used by relative exception tables) */
+ *(u32 *)location = value - (unsigned long)location;
+ break;
+
case R_PPC64_TOCSAVE:
/*
* Marker reloc indicates we don't have to save r2.
diff --git a/arch/powerpc/kernel/of_platform.c b/arch/powerpc/kernel/of_platform.c
index b60a67d92ebd..34aeac54f120 100644
--- a/arch/powerpc/kernel/of_platform.c
+++ b/arch/powerpc/kernel/of_platform.c
@@ -114,11 +114,6 @@ static struct platform_driver of_pci_phb_driver = {
},
};
-static __init int of_pci_phb_init(void)
-{
- return platform_driver_register(&of_pci_phb_driver);
-}
-
-device_initcall(of_pci_phb_init);
+builtin_platform_driver(of_pci_phb_driver);
#endif /* CONFIG_PPC_OF_PLATFORM_PCI */
diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c
index 49a680d5ae37..04885cec24df 100644
--- a/arch/powerpc/kernel/process.c
+++ b/arch/powerpc/kernel/process.c
@@ -64,6 +64,12 @@
#include <linux/kprobes.h>
#include <linux/kdebug.h>
+#ifdef CONFIG_CC_STACKPROTECTOR
+#include <linux/stackprotector.h>
+unsigned long __stack_chk_guard __read_mostly;
+EXPORT_SYMBOL(__stack_chk_guard);
+#endif
+
/* Transactional Memory debug */
#ifdef TM_DEBUG_SW
#define TM_DEBUG(x...) printk(KERN_INFO x)
@@ -1051,14 +1057,6 @@ static inline void save_sprs(struct thread_struct *t)
*/
t->tar = mfspr(SPRN_TAR);
}
-
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- /* Conditionally save Load Monitor registers, if enabled */
- if (t->fscr & FSCR_LM) {
- t->lmrr = mfspr(SPRN_LMRR);
- t->lmser = mfspr(SPRN_LMSER);
- }
- }
#endif
}
@@ -1094,16 +1092,6 @@ static inline void restore_sprs(struct thread_struct *old_thread,
if (old_thread->tar != new_thread->tar)
mtspr(SPRN_TAR, new_thread->tar);
}
-
- if (cpu_has_feature(CPU_FTR_ARCH_300)) {
- /* Conditionally restore Load Monitor registers, if enabled */
- if (new_thread->fscr & FSCR_LM) {
- if (old_thread->lmrr != new_thread->lmrr)
- mtspr(SPRN_LMRR, new_thread->lmrr);
- if (old_thread->lmser != new_thread->lmser)
- mtspr(SPRN_LMSER, new_thread->lmser);
- }
- }
#endif
}
diff --git a/arch/powerpc/kernel/prom.c b/arch/powerpc/kernel/prom.c
index b0245bed6f54..f5d399e46193 100644
--- a/arch/powerpc/kernel/prom.c
+++ b/arch/powerpc/kernel/prom.c
@@ -156,21 +156,22 @@ static struct ibm_pa_feature {
unsigned char pabit; /* bit number (big-endian) */
unsigned char invert; /* if 1, pa bit set => clear feature */
} ibm_pa_features[] __initdata = {
- {0, 0, PPC_FEATURE_HAS_MMU, 0, 0, 0, 0},
- {0, 0, PPC_FEATURE_HAS_FPU, 0, 0, 1, 0},
- {CPU_FTR_CTRL, 0, 0, 0, 0, 3, 0},
- {CPU_FTR_NOEXECUTE, 0, 0, 0, 0, 6, 0},
- {CPU_FTR_NODSISRALIGN, 0, 0, 0, 1, 1, 1},
- {0, MMU_FTR_CI_LARGE_PAGE, 0, 0, 1, 2, 0},
- {CPU_FTR_REAL_LE, 0, PPC_FEATURE_TRUE_LE, 0, 5, 0, 0},
+ { .pabyte = 0, .pabit = 0, .cpu_user_ftrs = PPC_FEATURE_HAS_MMU },
+ { .pabyte = 0, .pabit = 1, .cpu_user_ftrs = PPC_FEATURE_HAS_FPU },
+ { .pabyte = 0, .pabit = 3, .cpu_features = CPU_FTR_CTRL },
+ { .pabyte = 0, .pabit = 6, .cpu_features = CPU_FTR_NOEXECUTE },
+ { .pabyte = 1, .pabit = 2, .mmu_features = MMU_FTR_CI_LARGE_PAGE },
+ { .pabyte = 40, .pabit = 0, .mmu_features = MMU_FTR_TYPE_RADIX },
+ { .pabyte = 1, .pabit = 1, .invert = 1, .cpu_features = CPU_FTR_NODSISRALIGN },
+ { .pabyte = 5, .pabit = 0, .cpu_features = CPU_FTR_REAL_LE,
+ .cpu_user_ftrs = PPC_FEATURE_TRUE_LE },
/*
* If the kernel doesn't support TM (ie CONFIG_PPC_TRANSACTIONAL_MEM=n),
* we don't want to turn on TM here, so we use the *_COMP versions
* which are 0 if the kernel doesn't support TM.
*/
- {CPU_FTR_TM_COMP, 0, 0,
- PPC_FEATURE2_HTM_COMP|PPC_FEATURE2_HTM_NOSC_COMP, 22, 0, 0},
- {0, MMU_FTR_TYPE_RADIX, 0, 0, 40, 0, 0},
+ { .pabyte = 22, .pabit = 0, .cpu_features = CPU_FTR_TM_COMP,
+ .cpu_user_ftrs2 = PPC_FEATURE2_HTM_COMP | PPC_FEATURE2_HTM_NOSC_COMP },
};
static void __init scan_features(unsigned long node, const unsigned char *ftrs,
@@ -427,7 +428,7 @@ static int __init early_init_dt_scan_chosen_ppc(unsigned long node,
tce_alloc_end = *lprop;
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
lprop = of_get_flat_dt_prop(node, "linux,crashkernel-base", NULL);
if (lprop)
crashk_res.start = *lprop;
diff --git a/arch/powerpc/kernel/prom_init.c b/arch/powerpc/kernel/prom_init.c
index 88ac964f4858..ec47a939cbdd 100644
--- a/arch/powerpc/kernel/prom_init.c
+++ b/arch/powerpc/kernel/prom_init.c
@@ -461,14 +461,14 @@ static int __init prom_next_node(phandle *nodep)
}
}
-static int inline prom_getprop(phandle node, const char *pname,
+static inline int prom_getprop(phandle node, const char *pname,
void *value, size_t valuelen)
{
return call_prom("getprop", 4, 1, node, ADDR(pname),
(u32)(unsigned long) value, (u32) valuelen);
}
-static int inline prom_getproplen(phandle node, const char *pname)
+static inline int prom_getproplen(phandle node, const char *pname)
{
return call_prom("getproplen", 2, 1, node, ADDR(pname));
}
@@ -635,13 +635,7 @@ static void __init early_cmdline_parse(void)
*
* See prom.h for the definition of the bits specified in the
* architecture vector.
- *
- * Because the description vector contains a mix of byte and word
- * values, we declare it as an unsigned char array, and use this
- * macro to put word values in.
*/
-#define W(x) ((x) >> 24) & 0xff, ((x) >> 16) & 0xff, \
- ((x) >> 8) & 0xff, (x) & 0xff
/* Firmware expects the value to be n - 1, where n is the # of vectors */
#define NUM_VECTORS(n) ((n) - 1)
@@ -652,92 +646,205 @@ static void __init early_cmdline_parse(void)
*/
#define VECTOR_LENGTH(n) (1 + (n) - 2)
-unsigned char ibm_architecture_vec[] = {
- W(0xfffe0000), W(0x003a0000), /* POWER5/POWER5+ */
- W(0xffff0000), W(0x003e0000), /* POWER6 */
- W(0xffff0000), W(0x003f0000), /* POWER7 */
- W(0xffff0000), W(0x004b0000), /* POWER8E */
- W(0xffff0000), W(0x004c0000), /* POWER8NVL */
- W(0xffff0000), W(0x004d0000), /* POWER8 */
- W(0xffffffff), W(0x0f000004), /* all 2.07-compliant */
- W(0xffffffff), W(0x0f000003), /* all 2.06-compliant */
- W(0xffffffff), W(0x0f000002), /* all 2.05-compliant */
- W(0xfffffffe), W(0x0f000001), /* all 2.04-compliant and earlier */
- NUM_VECTORS(6), /* 6 option vectors */
-
- /* option vector 1: processor architectures supported */
- VECTOR_LENGTH(2), /* length */
- 0, /* don't ignore, don't halt */
- OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
- OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
+struct option_vector1 {
+ u8 byte1;
+ u8 arch_versions;
+} __packed;
+
+struct option_vector2 {
+ u8 byte1;
+ __be16 reserved;
+ __be32 real_base;
+ __be32 real_size;
+ __be32 virt_base;
+ __be32 virt_size;
+ __be32 load_base;
+ __be32 min_rma;
+ __be32 min_load;
+ u8 min_rma_percent;
+ u8 max_pft_size;
+} __packed;
+
+struct option_vector3 {
+ u8 byte1;
+ u8 byte2;
+} __packed;
+
+struct option_vector4 {
+ u8 byte1;
+ u8 min_vp_cap;
+} __packed;
+
+struct option_vector5 {
+ u8 byte1;
+ u8 byte2;
+ u8 byte3;
+ u8 cmo;
+ u8 associativity;
+ u8 bin_opts;
+ u8 micro_checkpoint;
+ u8 reserved0;
+ __be32 max_cpus;
+ __be16 papr_level;
+ __be16 reserved1;
+ u8 platform_facilities;
+ u8 reserved2;
+ __be16 reserved3;
+ u8 subprocessors;
+} __packed;
+
+struct option_vector6 {
+ u8 reserved;
+ u8 secondary_pteg;
+ u8 os_name;
+} __packed;
+
+struct ibm_arch_vec {
+ struct { u32 mask, val; } pvrs[10];
+
+ u8 num_vectors;
+
+ u8 vec1_len;
+ struct option_vector1 vec1;
+
+ u8 vec2_len;
+ struct option_vector2 vec2;
+
+ u8 vec3_len;
+ struct option_vector3 vec3;
+
+ u8 vec4_len;
+ struct option_vector4 vec4;
+
+ u8 vec5_len;
+ struct option_vector5 vec5;
+
+ u8 vec6_len;
+ struct option_vector6 vec6;
+} __packed;
+
+struct ibm_arch_vec __cacheline_aligned ibm_architecture_vec = {
+ .pvrs = {
+ {
+ .mask = cpu_to_be32(0xfffe0000), /* POWER5/POWER5+ */
+ .val = cpu_to_be32(0x003a0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER6 */
+ .val = cpu_to_be32(0x003e0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER7 */
+ .val = cpu_to_be32(0x003f0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER8E */
+ .val = cpu_to_be32(0x004b0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER8NVL */
+ .val = cpu_to_be32(0x004c0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffff0000), /* POWER8 */
+ .val = cpu_to_be32(0x004d0000),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 2.07-compliant */
+ .val = cpu_to_be32(0x0f000004),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 2.06-compliant */
+ .val = cpu_to_be32(0x0f000003),
+ },
+ {
+ .mask = cpu_to_be32(0xffffffff), /* all 2.05-compliant */
+ .val = cpu_to_be32(0x0f000002),
+ },
+ {
+ .mask = cpu_to_be32(0xfffffffe), /* all 2.04-compliant and earlier */
+ .val = cpu_to_be32(0x0f000001),
+ },
+ },
+
+ .num_vectors = NUM_VECTORS(6),
+ .vec1_len = VECTOR_LENGTH(sizeof(struct option_vector1)),
+ .vec1 = {
+ .byte1 = 0,
+ .arch_versions = OV1_PPC_2_00 | OV1_PPC_2_01 | OV1_PPC_2_02 | OV1_PPC_2_03 |
+ OV1_PPC_2_04 | OV1_PPC_2_05 | OV1_PPC_2_06 | OV1_PPC_2_07,
+ },
+
+ .vec2_len = VECTOR_LENGTH(sizeof(struct option_vector2)),
/* option vector 2: Open Firmware options supported */
- VECTOR_LENGTH(33), /* length */
- OV2_REAL_MODE,
- 0, 0,
- W(0xffffffff), /* real_base */
- W(0xffffffff), /* real_size */
- W(0xffffffff), /* virt_base */
- W(0xffffffff), /* virt_size */
- W(0xffffffff), /* load_base */
- W(256), /* 256MB min RMA */
- W(0xffffffff), /* full client load */
- 0, /* min RMA percentage of total RAM */
- 48, /* max log_2(hash table size) */
+ .vec2 = {
+ .byte1 = OV2_REAL_MODE,
+ .reserved = 0,
+ .real_base = cpu_to_be32(0xffffffff),
+ .real_size = cpu_to_be32(0xffffffff),
+ .virt_base = cpu_to_be32(0xffffffff),
+ .virt_size = cpu_to_be32(0xffffffff),
+ .load_base = cpu_to_be32(0xffffffff),
+ .min_rma = cpu_to_be32(256), /* 256MB min RMA */
+ .min_load = cpu_to_be32(0xffffffff), /* full client load */
+ .min_rma_percent = 0, /* min RMA percentage of total RAM */
+ .max_pft_size = 48, /* max log_2(hash table size) */
+ },
+ .vec3_len = VECTOR_LENGTH(sizeof(struct option_vector3)),
/* option vector 3: processor options supported */
- VECTOR_LENGTH(2), /* length */
- 0, /* don't ignore, don't halt */
- OV3_FP | OV3_VMX | OV3_DFP,
+ .vec3 = {
+ .byte1 = 0, /* don't ignore, don't halt */
+ .byte2 = OV3_FP | OV3_VMX | OV3_DFP,
+ },
+ .vec4_len = VECTOR_LENGTH(sizeof(struct option_vector4)),
/* option vector 4: IBM PAPR implementation */
- VECTOR_LENGTH(2), /* length */
- 0, /* don't halt */
- OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */
+ .vec4 = {
+ .byte1 = 0, /* don't halt */
+ .min_vp_cap = OV4_MIN_ENT_CAP, /* minimum VP entitled capacity */
+ },
+ .vec5_len = VECTOR_LENGTH(sizeof(struct option_vector5)),
/* option vector 5: PAPR/OF options */
- VECTOR_LENGTH(21), /* length */
- 0, /* don't ignore, don't halt */
- OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
- OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
+ .vec5 = {
+ .byte1 = 0, /* don't ignore, don't halt */
+ .byte2 = OV5_FEAT(OV5_LPAR) | OV5_FEAT(OV5_SPLPAR) | OV5_FEAT(OV5_LARGE_PAGES) |
+ OV5_FEAT(OV5_DRCONF_MEMORY) | OV5_FEAT(OV5_DONATE_DEDICATE_CPU) |
#ifdef CONFIG_PCI_MSI
- /* PCIe/MSI support. Without MSI full PCIe is not supported */
- OV5_FEAT(OV5_MSI),
+ /* PCIe/MSI support. Without MSI full PCIe is not supported */
+ OV5_FEAT(OV5_MSI),
#else
- 0,
+ 0,
#endif
- 0,
+ .byte3 = 0,
+ .cmo =
#ifdef CONFIG_PPC_SMLPAR
- OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO),
+ OV5_FEAT(OV5_CMO) | OV5_FEAT(OV5_XCMO),
#else
- 0,
+ 0,
#endif
- OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
- 0,
- 0,
- 0,
- /* WARNING: The offset of the "number of cores" field below
- * must match by the macro below. Update the definition if
- * the structure layout changes.
- */
-#define IBM_ARCH_VEC_NRCORES_OFFSET 133
- W(NR_CPUS), /* number of cores supported */
- 0,
- 0,
- 0,
- 0,
- OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) |
- OV5_FEAT(OV5_PFO_HW_842), /* Byte 17 */
- 0, /* Byte 18 */
- 0, /* Byte 19 */
- 0, /* Byte 20 */
- OV5_FEAT(OV5_SUB_PROCESSORS), /* Byte 21 */
+ .associativity = OV5_FEAT(OV5_TYPE1_AFFINITY) | OV5_FEAT(OV5_PRRN),
+ .bin_opts = 0,
+ .micro_checkpoint = 0,
+ .reserved0 = 0,
+ .max_cpus = cpu_to_be32(NR_CPUS), /* number of cores supported */
+ .papr_level = 0,
+ .reserved1 = 0,
+ .platform_facilities = OV5_FEAT(OV5_PFO_HW_RNG) | OV5_FEAT(OV5_PFO_HW_ENCR) | OV5_FEAT(OV5_PFO_HW_842),
+ .reserved2 = 0,
+ .reserved3 = 0,
+ .subprocessors = 1,
+ },
/* option vector 6: IBM PAPR hints */
- VECTOR_LENGTH(3), /* length */
- 0,
- 0,
- OV6_LINUX,
+ .vec6_len = VECTOR_LENGTH(sizeof(struct option_vector6)),
+ .vec6 = {
+ .reserved = 0,
+ .secondary_pteg = 0,
+ .os_name = OV6_LINUX,
+ },
};
/* Old method - ELF header with PT_NOTE sections only works on BE */
@@ -873,7 +980,6 @@ static void __init prom_send_capabilities(void)
ihandle root;
prom_arg_t ret;
u32 cores;
- unsigned char *ptcores;
root = call_prom("open", 1, 1, ADDR("/"));
if (root != 0) {
@@ -884,37 +990,18 @@ static void __init prom_send_capabilities(void)
* divide NR_CPUS.
*/
- /* The core value may start at an odd address. If such a word
- * access is made at a cache line boundary, this leads to an
- * exception which may not be handled at this time.
- * Forcing a per byte access to avoid exception.
- */
- ptcores = &ibm_architecture_vec[IBM_ARCH_VEC_NRCORES_OFFSET];
- cores = 0;
- cores |= ptcores[0] << 24;
- cores |= ptcores[1] << 16;
- cores |= ptcores[2] << 8;
- cores |= ptcores[3];
- if (cores != NR_CPUS) {
- prom_printf("WARNING ! "
- "ibm_architecture_vec structure inconsistent: %lu!\n",
- cores);
- } else {
- cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
- prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n",
- cores, NR_CPUS);
- ptcores[0] = (cores >> 24) & 0xff;
- ptcores[1] = (cores >> 16) & 0xff;
- ptcores[2] = (cores >> 8) & 0xff;
- ptcores[3] = cores & 0xff;
- }
+ cores = DIV_ROUND_UP(NR_CPUS, prom_count_smt_threads());
+ prom_printf("Max number of cores passed to firmware: %lu (NR_CPUS = %lu)\n",
+ cores, NR_CPUS);
+
+ ibm_architecture_vec.vec5.max_cpus = cpu_to_be32(cores);
/* try calling the ibm,client-architecture-support method */
prom_printf("Calling ibm,client-architecture-support...");
if (call_prom_ret("call-method", 3, 2, &ret,
ADDR("ibm,client-architecture-support"),
root,
- ADDR(ibm_architecture_vec)) == 0) {
+ ADDR(&ibm_architecture_vec)) == 0) {
/* the call exists... */
if (ret)
prom_printf("\nWARNING: ibm,client-architecture"
diff --git a/arch/powerpc/kernel/setup-common.c b/arch/powerpc/kernel/setup-common.c
index 270ee30abdcf..f516ac508ae3 100644
--- a/arch/powerpc/kernel/setup-common.c
+++ b/arch/powerpc/kernel/setup-common.c
@@ -915,7 +915,7 @@ void __init setup_arch(char **cmdline_p)
init_mm.context.pte_frag = NULL;
#endif
#ifdef CONFIG_SPAPR_TCE_IOMMU
- mm_iommu_init(&init_mm.context);
+ mm_iommu_init(&init_mm);
#endif
irqstack_early_init();
exc_lvl_early_init();
diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c
index 8d586cff8a41..6824157e4d2e 100644
--- a/arch/powerpc/kernel/setup_64.c
+++ b/arch/powerpc/kernel/setup_64.c
@@ -354,7 +354,7 @@ void early_setup_secondary(void)
#endif /* CONFIG_SMP */
-#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_SMP) || defined(CONFIG_KEXEC_CORE)
static bool use_spinloop(void)
{
if (!IS_ENABLED(CONFIG_PPC_BOOK3E))
@@ -399,7 +399,7 @@ void smp_release_cpus(void)
DBG(" <- smp_release_cpus()\n");
}
-#endif /* CONFIG_SMP || CONFIG_KEXEC */
+#endif /* CONFIG_SMP || CONFIG_KEXEC_CORE */
/*
* Initialize some remaining members of the ppc64_caches and systemcfg
diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c
index 9c6f3fd58059..893bd7f79be6 100644
--- a/arch/powerpc/kernel/smp.c
+++ b/arch/powerpc/kernel/smp.c
@@ -193,7 +193,7 @@ int smp_request_message_ipi(int virq, int msg)
if (msg < 0 || msg > PPC_MSG_DEBUGGER_BREAK) {
return -EINVAL;
}
-#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC)
+#if !defined(CONFIG_DEBUGGER) && !defined(CONFIG_KEXEC_CORE)
if (msg == PPC_MSG_DEBUGGER_BREAK) {
return 1;
}
@@ -325,7 +325,7 @@ void tick_broadcast(const struct cpumask *mask)
}
#endif
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
void smp_send_debugger_break(void)
{
int cpu;
@@ -340,7 +340,7 @@ void smp_send_debugger_break(void)
}
#endif
-#ifdef CONFIG_KEXEC
+#ifdef CONFIG_KEXEC_CORE
void crash_send_ipi(void (*crash_ipi_callback)(struct pt_regs *))
{
crash_ipi_function_ptr = crash_ipi_callback;
diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c
index 023a462725b5..4239aaf74886 100644
--- a/arch/powerpc/kernel/traps.c
+++ b/arch/powerpc/kernel/traps.c
@@ -64,8 +64,9 @@
#include <asm/asm-prototypes.h>
#include <asm/hmi.h>
#include <sysdev/fsl_pci.h>
+#include <asm/kprobes.h>
-#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC)
+#if defined(CONFIG_DEBUGGER) || defined(CONFIG_KEXEC_CORE)
int (*__debugger)(struct pt_regs *regs) __read_mostly;
int (*__debugger_ipi)(struct pt_regs *regs) __read_mostly;
int (*__debugger_bpt)(struct pt_regs *regs) __read_mostly;
@@ -122,9 +123,6 @@ static unsigned long oops_begin(struct pt_regs *regs)
int cpu;
unsigned long flags;
- if (debugger(regs))
- return 1;
-
oops_enter();
/* racy, but better than risking deadlock. */
@@ -150,14 +148,15 @@ static void oops_end(unsigned long flags, struct pt_regs *regs,
int signr)
{
bust_spinlocks(0);
- die_owner = -1;
add_taint(TAINT_DIE, LOCKDEP_NOW_UNRELIABLE);
die_nest_count--;
oops_exit();
printk("\n");
- if (!die_nest_count)
+ if (!die_nest_count) {
/* Nest count reaches zero, release the lock. */
+ die_owner = -1;
arch_spin_unlock(&die_lock);
+ }
raw_local_irq_restore(flags);
crash_fadump(regs, "die oops");
@@ -227,8 +226,12 @@ NOKPROBE_SYMBOL(__die);
void die(const char *str, struct pt_regs *regs, long err)
{
- unsigned long flags = oops_begin(regs);
+ unsigned long flags;
+ if (debugger(regs))
+ return;
+
+ flags = oops_begin(regs);
if (__die(str, regs, err))
err = 0;
oops_end(flags, regs, err);
@@ -365,7 +368,7 @@ static inline int check_io_access(struct pt_regs *regs)
(*nip & 0x100)? "OUT to": "IN from",
regs->gpr[rb] - _IO_BASE, nip);
regs->msr |= MSR_RI;
- regs->nip = entry->fixup;
+ regs->nip = extable_fixup(entry);
return 1;
}
}
@@ -824,6 +827,9 @@ void single_step_exception(struct pt_regs *regs)
clear_single_step(regs);
+ if (kprobe_post_handler(regs))
+ return;
+
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP)
goto bail;
@@ -1177,6 +1183,9 @@ void program_check_exception(struct pt_regs *regs)
if (debugger_bpt(regs))
goto bail;
+ if (kprobe_handler(regs))
+ goto bail;
+
/* trap exception */
if (notify_die(DIE_BPT, "breakpoint", regs, 5, 5, SIGTRAP)
== NOTIFY_STOP)
@@ -1430,7 +1439,6 @@ void facility_unavailable_exception(struct pt_regs *regs)
[FSCR_TM_LG] = "TM",
[FSCR_EBB_LG] = "EBB",
[FSCR_TAR_LG] = "TAR",
- [FSCR_LM_LG] = "LM",
};
char *facility = "unknown";
u64 value;
@@ -1488,14 +1496,6 @@ void facility_unavailable_exception(struct pt_regs *regs)
emulate_single_step(regs);
}
return;
- } else if ((status == FSCR_LM_LG) && cpu_has_feature(CPU_FTR_ARCH_300)) {
- /*
- * This process has touched LM, so turn it on forever
- * for this process
- */
- current->thread.fscr |= FSCR_LM;
- mtspr(SPRN_FSCR, current->thread.fscr);
- return;
}
if (status == FSCR_TM_LG) {
@@ -1519,7 +1519,8 @@ void facility_unavailable_exception(struct pt_regs *regs)
return;
}
- if ((status < ARRAY_SIZE(facility_strings)) &&
+ if ((hv || status >= 2) &&
+ (status < ARRAY_SIZE(facility_strings)) &&
facility_strings[status])
facility = facility_strings[status];
@@ -1527,9 +1528,8 @@ void facility_unavailable_exception(struct pt_regs *regs)
if (!arch_irq_disabled_regs(regs))
local_irq_enable();
- pr_err_ratelimited(
- "%sFacility '%s' unavailable, exception at 0x%lx, MSR=%lx\n",
- hv ? "Hypervisor " : "", facility, regs->nip, regs->msr);
+ pr_err_ratelimited("%sFacility '%s' unavailable (%d), exception at 0x%lx, MSR=%lx\n",
+ hv ? "Hypervisor " : "", facility, status, regs->nip, regs->msr);
out:
if (user_mode(regs)) {
@@ -1754,6 +1754,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status)
return;
}
+ if (kprobe_post_handler(regs))
+ return;
+
if (notify_die(DIE_SSTEP, "block_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP) {
return;
@@ -1768,6 +1771,9 @@ void DebugException(struct pt_regs *regs, unsigned long debug_status)
/* Clear the instruction completion event */
mtspr(SPRN_DBSR, DBSR_IC);
+ if (kprobe_post_handler(regs))
+ return;
+
if (notify_die(DIE_SSTEP, "single_step", regs, 5,
5, SIGTRAP) == NOTIFY_STOP) {
return;
diff --git a/arch/powerpc/kernel/vio.c b/arch/powerpc/kernel/vio.c
deleted file mode 100644
index b3813ddb2fb4..000000000000
--- a/arch/powerpc/kernel/vio.c
+++ /dev/null
@@ -1,1702 +0,0 @@
-/*
- * IBM PowerPC Virtual I/O Infrastructure Support.
- *
- * Copyright (c) 2003,2008 IBM Corp.
- * Dave Engebretsen engebret@us.ibm.com
- * Santiago Leon santil@us.ibm.com
- * Hollis Blanchard <hollisb@us.ibm.com>
- * Stephen Rothwell
- * Robert Jennings <rcjenn@us.ibm.com>
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of the GNU General Public License
- * as published by the Free Software Foundation; either version
- * 2 of the License, or (at your option) any later version.
- */
-
-#include <linux/cpu.h>
-#include <linux/types.h>
-#include <linux/delay.h>
-#include <linux/stat.h>
-#include <linux/device.h>
-#include <linux/init.h>
-#include <linux/slab.h>
-#include <linux/console.h>
-#include <linux/export.h>
-#include <linux/mm.h>
-#include <linux/dma-mapping.h>
-#include <linux/kobject.h>
-
-#include <asm/iommu.h>
-#include <asm/dma.h>
-#include <asm/vio.h>
-#include <asm/prom.h>
-#include <asm/firmware.h>
-#include <asm/tce.h>
-#include <asm/page.h>
-#include <asm/hvcall.h>
-
-static struct vio_dev vio_bus_device = { /* fake "parent" device */
- .name = "vio",
- .type = "",
- .dev.init_name = "vio",
- .dev.bus = &vio_bus_type,
-};
-
-#ifdef CONFIG_PPC_SMLPAR
-/**
- * vio_cmo_pool - A pool of IO memory for CMO use
- *
- * @size: The size of the pool in bytes
- * @free: The amount of free memory in the pool
- */
-struct vio_cmo_pool {
- size_t size;
- size_t free;
-};
-
-/* How many ms to delay queued balance work */
-#define VIO_CMO_BALANCE_DELAY 100
-
-/* Portion out IO memory to CMO devices by this chunk size */
-#define VIO_CMO_BALANCE_CHUNK 131072
-
-/**
- * vio_cmo_dev_entry - A device that is CMO-enabled and requires entitlement
- *
- * @vio_dev: struct vio_dev pointer
- * @list: pointer to other devices on bus that are being tracked
- */
-struct vio_cmo_dev_entry {
- struct vio_dev *viodev;
- struct list_head list;
-};
-
-/**
- * vio_cmo - VIO bus accounting structure for CMO entitlement
- *
- * @lock: spinlock for entire structure
- * @balance_q: work queue for balancing system entitlement
- * @device_list: list of CMO-enabled devices requiring entitlement
- * @entitled: total system entitlement in bytes
- * @reserve: pool of memory from which devices reserve entitlement, incl. spare
- * @excess: pool of excess entitlement not needed for device reserves or spare
- * @spare: IO memory for device hotplug functionality
- * @min: minimum necessary for system operation
- * @desired: desired memory for system operation
- * @curr: bytes currently allocated
- * @high: high water mark for IO data usage
- */
-static struct vio_cmo {
- spinlock_t lock;
- struct delayed_work balance_q;
- struct list_head device_list;
- size_t entitled;
- struct vio_cmo_pool reserve;
- struct vio_cmo_pool excess;
- size_t spare;
- size_t min;
- size_t desired;
- size_t curr;
- size_t high;
-} vio_cmo;
-
-/**
- * vio_cmo_OF_devices - Count the number of OF devices that have DMA windows
- */
-static int vio_cmo_num_OF_devs(void)
-{
- struct device_node *node_vroot;
- int count = 0;
-
- /*
- * Count the number of vdevice entries with an
- * ibm,my-dma-window OF property
- */
- node_vroot = of_find_node_by_name(NULL, "vdevice");
- if (node_vroot) {
- struct device_node *of_node;
- struct property *prop;
-
- for_each_child_of_node(node_vroot, of_node) {
- prop = of_find_property(of_node, "ibm,my-dma-window",
- NULL);
- if (prop)
- count++;
- }
- }
- of_node_put(node_vroot);
- return count;
-}
-
-/**
- * vio_cmo_alloc - allocate IO memory for CMO-enable devices
- *
- * @viodev: VIO device requesting IO memory
- * @size: size of allocation requested
- *
- * Allocations come from memory reserved for the devices and any excess
- * IO memory available to all devices. The spare pool used to service
- * hotplug must be equal to %VIO_CMO_MIN_ENT for the excess pool to be
- * made available.
- *
- * Return codes:
- * 0 for successful allocation and -ENOMEM for a failure
- */
-static inline int vio_cmo_alloc(struct vio_dev *viodev, size_t size)
-{
- unsigned long flags;
- size_t reserve_free = 0;
- size_t excess_free = 0;
- int ret = -ENOMEM;
-
- spin_lock_irqsave(&vio_cmo.lock, flags);
-
- /* Determine the amount of free entitlement available in reserve */
- if (viodev->cmo.entitled > viodev->cmo.allocated)
- reserve_free = viodev->cmo.entitled - viodev->cmo.allocated;
-
- /* If spare is not fulfilled, the excess pool can not be used. */
- if (vio_cmo.spare >= VIO_CMO_MIN_ENT)
- excess_free = vio_cmo.excess.free;
-
- /* The request can be satisfied */
- if ((reserve_free + excess_free) >= size) {
- vio_cmo.curr += size;
- if (vio_cmo.curr > vio_cmo.high)
- vio_cmo.high = vio_cmo.curr;
- viodev->cmo.allocated += size;
- size -= min(reserve_free, size);
- vio_cmo.excess.free -= size;
- ret = 0;
- }
-
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
- return ret;
-}
-
-/**
- * vio_cmo_dealloc - deallocate IO memory from CMO-enable devices
- * @viodev: VIO device freeing IO memory
- * @size: size of deallocation
- *
- * IO memory is freed by the device back to the correct memory pools.
- * The spare pool is replenished first from either memory pool, then
- * the reserve pool is used to reduce device entitlement, the excess
- * pool is used to increase the reserve pool toward the desired entitlement
- * target, and then the remaining memory is returned to the pools.
- *
- */
-static inline void vio_cmo_dealloc(struct vio_dev *viodev, size_t size)
-{
- unsigned long flags;
- size_t spare_needed = 0;
- size_t excess_freed = 0;
- size_t reserve_freed = size;
- size_t tmp;
- int balance = 0;
-
- spin_lock_irqsave(&vio_cmo.lock, flags);
- vio_cmo.curr -= size;
-
- /* Amount of memory freed from the excess pool */
- if (viodev->cmo.allocated > viodev->cmo.entitled) {
- excess_freed = min(reserve_freed, (viodev->cmo.allocated -
- viodev->cmo.entitled));
- reserve_freed -= excess_freed;
- }
-
- /* Remove allocation from device */
- viodev->cmo.allocated -= (reserve_freed + excess_freed);
-
- /* Spare is a subset of the reserve pool, replenish it first. */
- spare_needed = VIO_CMO_MIN_ENT - vio_cmo.spare;
-
- /*
- * Replenish the spare in the reserve pool from the excess pool.
- * This moves entitlement into the reserve pool.
- */
- if (spare_needed && excess_freed) {
- tmp = min(excess_freed, spare_needed);
- vio_cmo.excess.size -= tmp;
- vio_cmo.reserve.size += tmp;
- vio_cmo.spare += tmp;
- excess_freed -= tmp;
- spare_needed -= tmp;
- balance = 1;
- }
-
- /*
- * Replenish the spare in the reserve pool from the reserve pool.
- * This removes entitlement from the device down to VIO_CMO_MIN_ENT,
- * if needed, and gives it to the spare pool. The amount of used
- * memory in this pool does not change.
- */
- if (spare_needed && reserve_freed) {
- tmp = min3(spare_needed, reserve_freed, (viodev->cmo.entitled - VIO_CMO_MIN_ENT));
-
- vio_cmo.spare += tmp;
- viodev->cmo.entitled -= tmp;
- reserve_freed -= tmp;
- spare_needed -= tmp;
- balance = 1;
- }
-
- /*
- * Increase the reserve pool until the desired allocation is met.
- * Move an allocation freed from the excess pool into the reserve
- * pool and schedule a balance operation.
- */
- if (excess_freed && (vio_cmo.desired > vio_cmo.reserve.size)) {
- tmp = min(excess_freed, (vio_cmo.desired - vio_cmo.reserve.size));
-
- vio_cmo.excess.size -= tmp;
- vio_cmo.reserve.size += tmp;
- excess_freed -= tmp;
- balance = 1;
- }
-
- /* Return memory from the excess pool to that pool */
- if (excess_freed)
- vio_cmo.excess.free += excess_freed;
-
- if (balance)
- schedule_delayed_work(&vio_cmo.balance_q, VIO_CMO_BALANCE_DELAY);
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
-}
-
-/**
- * vio_cmo_entitlement_update - Manage system entitlement changes
- *
- * @new_entitlement: new system entitlement to attempt to accommodate
- *
- * Increases in entitlement will be used to fulfill the spare entitlement
- * and the rest is given to the excess pool. Decreases, if they are
- * possible, come from the excess pool and from unused device entitlement
- *
- * Returns: 0 on success, -ENOMEM when change can not be made
- */
-int vio_cmo_entitlement_update(size_t new_entitlement)
-{
- struct vio_dev *viodev;
- struct vio_cmo_dev_entry *dev_ent;
- unsigned long flags;
- size_t avail, delta, tmp;
-
- spin_lock_irqsave(&vio_cmo.lock, flags);
-
- /* Entitlement increases */
- if (new_entitlement > vio_cmo.entitled) {
- delta = new_entitlement - vio_cmo.entitled;
-
- /* Fulfill spare allocation */
- if (vio_cmo.spare < VIO_CMO_MIN_ENT) {
- tmp = min(delta, (VIO_CMO_MIN_ENT - vio_cmo.spare));
- vio_cmo.spare += tmp;
- vio_cmo.reserve.size += tmp;
- delta -= tmp;
- }
-
- /* Remaining new allocation goes to the excess pool */
- vio_cmo.entitled += delta;
- vio_cmo.excess.size += delta;
- vio_cmo.excess.free += delta;
-
- goto out;
- }
-
- /* Entitlement decreases */
- delta = vio_cmo.entitled - new_entitlement;
- avail = vio_cmo.excess.free;
-
- /*
- * Need to check how much unused entitlement each device can
- * sacrifice to fulfill entitlement change.
- */
- list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
- if (avail >= delta)
- break;
-
- viodev = dev_ent->viodev;
- if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
- (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
- avail += viodev->cmo.entitled -
- max_t(size_t, viodev->cmo.allocated,
- VIO_CMO_MIN_ENT);
- }
-
- if (delta <= avail) {
- vio_cmo.entitled -= delta;
-
- /* Take entitlement from the excess pool first */
- tmp = min(vio_cmo.excess.free, delta);
- vio_cmo.excess.size -= tmp;
- vio_cmo.excess.free -= tmp;
- delta -= tmp;
-
- /*
- * Remove all but VIO_CMO_MIN_ENT bytes from devices
- * until entitlement change is served
- */
- list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
- if (!delta)
- break;
-
- viodev = dev_ent->viodev;
- tmp = 0;
- if ((viodev->cmo.entitled > viodev->cmo.allocated) &&
- (viodev->cmo.entitled > VIO_CMO_MIN_ENT))
- tmp = viodev->cmo.entitled -
- max_t(size_t, viodev->cmo.allocated,
- VIO_CMO_MIN_ENT);
- viodev->cmo.entitled -= min(tmp, delta);
- delta -= min(tmp, delta);
- }
- } else {
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
- return -ENOMEM;
- }
-
-out:
- schedule_delayed_work(&vio_cmo.balance_q, 0);
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
- return 0;
-}
-
-/**
- * vio_cmo_balance - Balance entitlement among devices
- *
- * @work: work queue structure for this operation
- *
- * Any system entitlement above the minimum needed for devices, or
- * already allocated to devices, can be distributed to the devices.
- * The list of devices is iterated through to recalculate the desired
- * entitlement level and to determine how much entitlement above the
- * minimum entitlement is allocated to devices.
- *
- * Small chunks of the available entitlement are given to devices until
- * their requirements are fulfilled or there is no entitlement left to give.
- * Upon completion sizes of the reserve and excess pools are calculated.
- *
- * The system minimum entitlement level is also recalculated here.
- * Entitlement will be reserved for devices even after vio_bus_remove to
- * accommodate reloading the driver. The OF tree is walked to count the
- * number of devices present and this will remove entitlement for devices
- * that have actually left the system after having vio_bus_remove called.
- */
-static void vio_cmo_balance(struct work_struct *work)
-{
- struct vio_cmo *cmo;
- struct vio_dev *viodev;
- struct vio_cmo_dev_entry *dev_ent;
- unsigned long flags;
- size_t avail = 0, level, chunk, need;
- int devcount = 0, fulfilled;
-
- cmo = container_of(work, struct vio_cmo, balance_q.work);
-
- spin_lock_irqsave(&vio_cmo.lock, flags);
-
- /* Calculate minimum entitlement and fulfill spare */
- cmo->min = vio_cmo_num_OF_devs() * VIO_CMO_MIN_ENT;
- BUG_ON(cmo->min > cmo->entitled);
- cmo->spare = min_t(size_t, VIO_CMO_MIN_ENT, (cmo->entitled - cmo->min));
- cmo->min += cmo->spare;
- cmo->desired = cmo->min;
-
- /*
- * Determine how much entitlement is available and reset device
- * entitlements
- */
- avail = cmo->entitled - cmo->spare;
- list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
- viodev = dev_ent->viodev;
- devcount++;
- viodev->cmo.entitled = VIO_CMO_MIN_ENT;
- cmo->desired += (viodev->cmo.desired - VIO_CMO_MIN_ENT);
- avail -= max_t(size_t, viodev->cmo.allocated, VIO_CMO_MIN_ENT);
- }
-
- /*
- * Having provided each device with the minimum entitlement, loop
- * over the devices portioning out the remaining entitlement
- * until there is nothing left.
- */
- level = VIO_CMO_MIN_ENT;
- while (avail) {
- fulfilled = 0;
- list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
- viodev = dev_ent->viodev;
-
- if (viodev->cmo.desired <= level) {
- fulfilled++;
- continue;
- }
-
- /*
- * Give the device up to VIO_CMO_BALANCE_CHUNK
- * bytes of entitlement, but do not exceed the
- * desired level of entitlement for the device.
- */
- chunk = min_t(size_t, avail, VIO_CMO_BALANCE_CHUNK);
- chunk = min(chunk, (viodev->cmo.desired -
- viodev->cmo.entitled));
- viodev->cmo.entitled += chunk;
-
- /*
- * If the memory for this entitlement increase was
- * already allocated to the device it does not come
- * from the available pool being portioned out.
- */
- need = max(viodev->cmo.allocated, viodev->cmo.entitled)-
- max(viodev->cmo.allocated, level);
- avail -= need;
-
- }
- if (fulfilled == devcount)
- break;
- level += VIO_CMO_BALANCE_CHUNK;
- }
-
- /* Calculate new reserve and excess pool sizes */
- cmo->reserve.size = cmo->min;
- cmo->excess.free = 0;
- cmo->excess.size = 0;
- need = 0;
- list_for_each_entry(dev_ent, &vio_cmo.device_list, list) {
- viodev = dev_ent->viodev;
- /* Calculated reserve size above the minimum entitlement */
- if (viodev->cmo.entitled)
- cmo->reserve.size += (viodev->cmo.entitled -
- VIO_CMO_MIN_ENT);
- /* Calculated used excess entitlement */
- if (viodev->cmo.allocated > viodev->cmo.entitled)
- need += viodev->cmo.allocated - viodev->cmo.entitled;
- }
- cmo->excess.size = cmo->entitled - cmo->reserve.size;
- cmo->excess.free = cmo->excess.size - need;
-
- cancel_delayed_work(to_delayed_work(work));
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
-}
-
-static void *vio_dma_iommu_alloc_coherent(struct device *dev, size_t size,
- dma_addr_t *dma_handle, gfp_t flag,
- unsigned long attrs)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
- void *ret;
-
- if (vio_cmo_alloc(viodev, roundup(size, PAGE_SIZE))) {
- atomic_inc(&viodev->cmo.allocs_failed);
- return NULL;
- }
-
- ret = dma_iommu_ops.alloc(dev, size, dma_handle, flag, attrs);
- if (unlikely(ret == NULL)) {
- vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
- atomic_inc(&viodev->cmo.allocs_failed);
- }
-
- return ret;
-}
-
-static void vio_dma_iommu_free_coherent(struct device *dev, size_t size,
- void *vaddr, dma_addr_t dma_handle,
- unsigned long attrs)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
-
- dma_iommu_ops.free(dev, size, vaddr, dma_handle, attrs);
-
- vio_cmo_dealloc(viodev, roundup(size, PAGE_SIZE));
-}
-
-static dma_addr_t vio_dma_iommu_map_page(struct device *dev, struct page *page,
- unsigned long offset, size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
- struct iommu_table *tbl;
- dma_addr_t ret = DMA_ERROR_CODE;
-
- tbl = get_iommu_table_base(dev);
- if (vio_cmo_alloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)))) {
- atomic_inc(&viodev->cmo.allocs_failed);
- return ret;
- }
-
- ret = dma_iommu_ops.map_page(dev, page, offset, size, direction, attrs);
- if (unlikely(dma_mapping_error(dev, ret))) {
- vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
- atomic_inc(&viodev->cmo.allocs_failed);
- }
-
- return ret;
-}
-
-static void vio_dma_iommu_unmap_page(struct device *dev, dma_addr_t dma_handle,
- size_t size,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
- struct iommu_table *tbl;
-
- tbl = get_iommu_table_base(dev);
- dma_iommu_ops.unmap_page(dev, dma_handle, size, direction, attrs);
-
- vio_cmo_dealloc(viodev, roundup(size, IOMMU_PAGE_SIZE(tbl)));
-}
-
-static int vio_dma_iommu_map_sg(struct device *dev, struct scatterlist *sglist,
- int nelems, enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
- struct iommu_table *tbl;
- struct scatterlist *sgl;
- int ret, count;
- size_t alloc_size = 0;
-
- tbl = get_iommu_table_base(dev);
- for_each_sg(sglist, sgl, nelems, count)
- alloc_size += roundup(sgl->length, IOMMU_PAGE_SIZE(tbl));
-
- if (vio_cmo_alloc(viodev, alloc_size)) {
- atomic_inc(&viodev->cmo.allocs_failed);
- return 0;
- }
-
- ret = dma_iommu_ops.map_sg(dev, sglist, nelems, direction, attrs);
-
- if (unlikely(!ret)) {
- vio_cmo_dealloc(viodev, alloc_size);
- atomic_inc(&viodev->cmo.allocs_failed);
- return ret;
- }
-
- for_each_sg(sglist, sgl, ret, count)
- alloc_size -= roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
- if (alloc_size)
- vio_cmo_dealloc(viodev, alloc_size);
-
- return ret;
-}
-
-static void vio_dma_iommu_unmap_sg(struct device *dev,
- struct scatterlist *sglist, int nelems,
- enum dma_data_direction direction,
- unsigned long attrs)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
- struct iommu_table *tbl;
- struct scatterlist *sgl;
- size_t alloc_size = 0;
- int count;
-
- tbl = get_iommu_table_base(dev);
- for_each_sg(sglist, sgl, nelems, count)
- alloc_size += roundup(sgl->dma_length, IOMMU_PAGE_SIZE(tbl));
-
- dma_iommu_ops.unmap_sg(dev, sglist, nelems, direction, attrs);
-
- vio_cmo_dealloc(viodev, alloc_size);
-}
-
-static int vio_dma_iommu_dma_supported(struct device *dev, u64 mask)
-{
- return dma_iommu_ops.dma_supported(dev, mask);
-}
-
-static u64 vio_dma_get_required_mask(struct device *dev)
-{
- return dma_iommu_ops.get_required_mask(dev);
-}
-
-static struct dma_map_ops vio_dma_mapping_ops = {
- .alloc = vio_dma_iommu_alloc_coherent,
- .free = vio_dma_iommu_free_coherent,
- .mmap = dma_direct_mmap_coherent,
- .map_sg = vio_dma_iommu_map_sg,
- .unmap_sg = vio_dma_iommu_unmap_sg,
- .map_page = vio_dma_iommu_map_page,
- .unmap_page = vio_dma_iommu_unmap_page,
- .dma_supported = vio_dma_iommu_dma_supported,
- .get_required_mask = vio_dma_get_required_mask,
-};
-
-/**
- * vio_cmo_set_dev_desired - Set desired entitlement for a device
- *
- * @viodev: struct vio_dev for device to alter
- * @desired: new desired entitlement level in bytes
- *
- * For use by devices to request a change to their entitlement at runtime or
- * through sysfs. The desired entitlement level is changed and a balancing
- * of system resources is scheduled to run in the future.
- */
-void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired)
-{
- unsigned long flags;
- struct vio_cmo_dev_entry *dev_ent;
- int found = 0;
-
- if (!firmware_has_feature(FW_FEATURE_CMO))
- return;
-
- spin_lock_irqsave(&vio_cmo.lock, flags);
- if (desired < VIO_CMO_MIN_ENT)
- desired = VIO_CMO_MIN_ENT;
-
- /*
- * Changes will not be made for devices not in the device list.
- * If it is not in the device list, then no driver is loaded
- * for the device and it can not receive entitlement.
- */
- list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
- if (viodev == dev_ent->viodev) {
- found = 1;
- break;
- }
- if (!found) {
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
- return;
- }
-
- /* Increase/decrease in desired device entitlement */
- if (desired >= viodev->cmo.desired) {
- /* Just bump the bus and device values prior to a balance*/
- vio_cmo.desired += desired - viodev->cmo.desired;
- viodev->cmo.desired = desired;
- } else {
- /* Decrease bus and device values for desired entitlement */
- vio_cmo.desired -= viodev->cmo.desired - desired;
- viodev->cmo.desired = desired;
- /*
- * If less entitlement is desired than current entitlement, move
- * any reserve memory in the change region to the excess pool.
- */
- if (viodev->cmo.entitled > desired) {
- vio_cmo.reserve.size -= viodev->cmo.entitled - desired;
- vio_cmo.excess.size += viodev->cmo.entitled - desired;
- /*
- * If entitlement moving from the reserve pool to the
- * excess pool is currently unused, add to the excess
- * free counter.
- */
- if (viodev->cmo.allocated < viodev->cmo.entitled)
- vio_cmo.excess.free += viodev->cmo.entitled -
- max(viodev->cmo.allocated, desired);
- viodev->cmo.entitled = desired;
- }
- }
- schedule_delayed_work(&vio_cmo.balance_q, 0);
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
-}
-
-/**
- * vio_cmo_bus_probe - Handle CMO specific bus probe activities
- *
- * @viodev - Pointer to struct vio_dev for device
- *
- * Determine the devices IO memory entitlement needs, attempting
- * to satisfy the system minimum entitlement at first and scheduling
- * a balance operation to take care of the rest at a later time.
- *
- * Returns: 0 on success, -EINVAL when device doesn't support CMO, and
- * -ENOMEM when entitlement is not available for device or
- * device entry.
- *
- */
-static int vio_cmo_bus_probe(struct vio_dev *viodev)
-{
- struct vio_cmo_dev_entry *dev_ent;
- struct device *dev = &viodev->dev;
- struct iommu_table *tbl;
- struct vio_driver *viodrv = to_vio_driver(dev->driver);
- unsigned long flags;
- size_t size;
- bool dma_capable = false;
-
- tbl = get_iommu_table_base(dev);
-
- /* A device requires entitlement if it has a DMA window property */
- switch (viodev->family) {
- case VDEVICE:
- if (of_get_property(viodev->dev.of_node,
- "ibm,my-dma-window", NULL))
- dma_capable = true;
- break;
- case PFO:
- dma_capable = false;
- break;
- default:
- dev_warn(dev, "unknown device family: %d\n", viodev->family);
- BUG();
- break;
- }
-
- /* Configure entitlement for the device. */
- if (dma_capable) {
- /* Check that the driver is CMO enabled and get desired DMA */
- if (!viodrv->get_desired_dma) {
- dev_err(dev, "%s: device driver does not support CMO\n",
- __func__);
- return -EINVAL;
- }
-
- viodev->cmo.desired =
- IOMMU_PAGE_ALIGN(viodrv->get_desired_dma(viodev), tbl);
- if (viodev->cmo.desired < VIO_CMO_MIN_ENT)
- viodev->cmo.desired = VIO_CMO_MIN_ENT;
- size = VIO_CMO_MIN_ENT;
-
- dev_ent = kmalloc(sizeof(struct vio_cmo_dev_entry),
- GFP_KERNEL);
- if (!dev_ent)
- return -ENOMEM;
-
- dev_ent->viodev = viodev;
- spin_lock_irqsave(&vio_cmo.lock, flags);
- list_add(&dev_ent->list, &vio_cmo.device_list);
- } else {
- viodev->cmo.desired = 0;
- size = 0;
- spin_lock_irqsave(&vio_cmo.lock, flags);
- }
-
- /*
- * If the needs for vio_cmo.min have not changed since they
- * were last set, the number of devices in the OF tree has
- * been constant and the IO memory for this is already in
- * the reserve pool.
- */
- if (vio_cmo.min == ((vio_cmo_num_OF_devs() + 1) *
- VIO_CMO_MIN_ENT)) {
- /* Updated desired entitlement if device requires it */
- if (size)
- vio_cmo.desired += (viodev->cmo.desired -
- VIO_CMO_MIN_ENT);
- } else {
- size_t tmp;
-
- tmp = vio_cmo.spare + vio_cmo.excess.free;
- if (tmp < size) {
- dev_err(dev, "%s: insufficient free "
- "entitlement to add device. "
- "Need %lu, have %lu\n", __func__,
- size, (vio_cmo.spare + tmp));
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
- return -ENOMEM;
- }
-
- /* Use excess pool first to fulfill request */
- tmp = min(size, vio_cmo.excess.free);
- vio_cmo.excess.free -= tmp;
- vio_cmo.excess.size -= tmp;
- vio_cmo.reserve.size += tmp;
-
- /* Use spare if excess pool was insufficient */
- vio_cmo.spare -= size - tmp;
-
- /* Update bus accounting */
- vio_cmo.min += size;
- vio_cmo.desired += viodev->cmo.desired;
- }
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
- return 0;
-}
-
-/**
- * vio_cmo_bus_remove - Handle CMO specific bus removal activities
- *
- * @viodev - Pointer to struct vio_dev for device
- *
- * Remove the device from the cmo device list. The minimum entitlement
- * will be reserved for the device as long as it is in the system. The
- * rest of the entitlement the device had been allocated will be returned
- * to the system.
- */
-static void vio_cmo_bus_remove(struct vio_dev *viodev)
-{
- struct vio_cmo_dev_entry *dev_ent;
- unsigned long flags;
- size_t tmp;
-
- spin_lock_irqsave(&vio_cmo.lock, flags);
- if (viodev->cmo.allocated) {
- dev_err(&viodev->dev, "%s: device had %lu bytes of IO "
- "allocated after remove operation.\n",
- __func__, viodev->cmo.allocated);
- BUG();
- }
-
- /*
- * Remove the device from the device list being maintained for
- * CMO enabled devices.
- */
- list_for_each_entry(dev_ent, &vio_cmo.device_list, list)
- if (viodev == dev_ent->viodev) {
- list_del(&dev_ent->list);
- kfree(dev_ent);
- break;
- }
-
- /*
- * Devices may not require any entitlement and they do not need
- * to be processed. Otherwise, return the device's entitlement
- * back to the pools.
- */
- if (viodev->cmo.entitled) {
- /*
- * This device has not yet left the OF tree, it's
- * minimum entitlement remains in vio_cmo.min and
- * vio_cmo.desired
- */
- vio_cmo.desired -= (viodev->cmo.desired - VIO_CMO_MIN_ENT);
-
- /*
- * Save min allocation for device in reserve as long
- * as it exists in OF tree as determined by later
- * balance operation
- */
- viodev->cmo.entitled -= VIO_CMO_MIN_ENT;
-
- /* Replenish spare from freed reserve pool */
- if (viodev->cmo.entitled && (vio_cmo.spare < VIO_CMO_MIN_ENT)) {
- tmp = min(viodev->cmo.entitled, (VIO_CMO_MIN_ENT -
- vio_cmo.spare));
- vio_cmo.spare += tmp;
- viodev->cmo.entitled -= tmp;
- }
-
- /* Remaining reserve goes to excess pool */
- vio_cmo.excess.size += viodev->cmo.entitled;
- vio_cmo.excess.free += viodev->cmo.entitled;
- vio_cmo.reserve.size -= viodev->cmo.entitled;
-
- /*
- * Until the device is removed it will keep a
- * minimum entitlement; this will guarantee that
- * a module unload/load will result in a success.
- */
- viodev->cmo.entitled = VIO_CMO_MIN_ENT;
- viodev->cmo.desired = VIO_CMO_MIN_ENT;
- atomic_set(&viodev->cmo.allocs_failed, 0);
- }
-
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
-}
-
-static void vio_cmo_set_dma_ops(struct vio_dev *viodev)
-{
- set_dma_ops(&viodev->dev, &vio_dma_mapping_ops);
-}
-
-/**
- * vio_cmo_bus_init - CMO entitlement initialization at bus init time
- *
- * Set up the reserve and excess entitlement pools based on available
- * system entitlement and the number of devices in the OF tree that
- * require entitlement in the reserve pool.
- */
-static void vio_cmo_bus_init(void)
-{
- struct hvcall_mpp_data mpp_data;
- int err;
-
- memset(&vio_cmo, 0, sizeof(struct vio_cmo));
- spin_lock_init(&vio_cmo.lock);
- INIT_LIST_HEAD(&vio_cmo.device_list);
- INIT_DELAYED_WORK(&vio_cmo.balance_q, vio_cmo_balance);
-
- /* Get current system entitlement */
- err = h_get_mpp(&mpp_data);
-
- /*
- * On failure, continue with entitlement set to 0, will panic()
- * later when spare is reserved.
- */
- if (err != H_SUCCESS) {
- printk(KERN_ERR "%s: unable to determine system IO "\
- "entitlement. (%d)\n", __func__, err);
- vio_cmo.entitled = 0;
- } else {
- vio_cmo.entitled = mpp_data.entitled_mem;
- }
-
- /* Set reservation and check against entitlement */
- vio_cmo.spare = VIO_CMO_MIN_ENT;
- vio_cmo.reserve.size = vio_cmo.spare;
- vio_cmo.reserve.size += (vio_cmo_num_OF_devs() *
- VIO_CMO_MIN_ENT);
- if (vio_cmo.reserve.size > vio_cmo.entitled) {
- printk(KERN_ERR "%s: insufficient system entitlement\n",
- __func__);
- panic("%s: Insufficient system entitlement", __func__);
- }
-
- /* Set the remaining accounting variables */
- vio_cmo.excess.size = vio_cmo.entitled - vio_cmo.reserve.size;
- vio_cmo.excess.free = vio_cmo.excess.size;
- vio_cmo.min = vio_cmo.reserve.size;
- vio_cmo.desired = vio_cmo.reserve.size;
-}
-
-/* sysfs device functions and data structures for CMO */
-
-#define viodev_cmo_rd_attr(name) \
-static ssize_t viodev_cmo_##name##_show(struct device *dev, \
- struct device_attribute *attr, \
- char *buf) \
-{ \
- return sprintf(buf, "%lu\n", to_vio_dev(dev)->cmo.name); \
-}
-
-static ssize_t viodev_cmo_allocs_failed_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
- return sprintf(buf, "%d\n", atomic_read(&viodev->cmo.allocs_failed));
-}
-
-static ssize_t viodev_cmo_allocs_failed_reset(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
- atomic_set(&viodev->cmo.allocs_failed, 0);
- return count;
-}
-
-static ssize_t viodev_cmo_desired_set(struct device *dev,
- struct device_attribute *attr, const char *buf, size_t count)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
- size_t new_desired;
- int ret;
-
- ret = kstrtoul(buf, 10, &new_desired);
- if (ret)
- return ret;
-
- vio_cmo_set_dev_desired(viodev, new_desired);
- return count;
-}
-
-viodev_cmo_rd_attr(desired);
-viodev_cmo_rd_attr(entitled);
-viodev_cmo_rd_attr(allocated);
-
-static ssize_t name_show(struct device *, struct device_attribute *, char *);
-static ssize_t devspec_show(struct device *, struct device_attribute *, char *);
-static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
- char *buf);
-static struct device_attribute vio_cmo_dev_attrs[] = {
- __ATTR_RO(name),
- __ATTR_RO(devspec),
- __ATTR_RO(modalias),
- __ATTR(cmo_desired, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
- viodev_cmo_desired_show, viodev_cmo_desired_set),
- __ATTR(cmo_entitled, S_IRUGO, viodev_cmo_entitled_show, NULL),
- __ATTR(cmo_allocated, S_IRUGO, viodev_cmo_allocated_show, NULL),
- __ATTR(cmo_allocs_failed, S_IWUSR|S_IRUSR|S_IWGRP|S_IRGRP|S_IROTH,
- viodev_cmo_allocs_failed_show, viodev_cmo_allocs_failed_reset),
- __ATTR_NULL
-};
-
-/* sysfs bus functions and data structures for CMO */
-
-#define viobus_cmo_rd_attr(name) \
-static ssize_t cmo_##name##_show(struct bus_type *bt, char *buf) \
-{ \
- return sprintf(buf, "%lu\n", vio_cmo.name); \
-} \
-static BUS_ATTR_RO(cmo_##name)
-
-#define viobus_cmo_pool_rd_attr(name, var) \
-static ssize_t \
-cmo_##name##_##var##_show(struct bus_type *bt, char *buf) \
-{ \
- return sprintf(buf, "%lu\n", vio_cmo.name.var); \
-} \
-static BUS_ATTR_RO(cmo_##name##_##var)
-
-viobus_cmo_rd_attr(entitled);
-viobus_cmo_rd_attr(spare);
-viobus_cmo_rd_attr(min);
-viobus_cmo_rd_attr(desired);
-viobus_cmo_rd_attr(curr);
-viobus_cmo_pool_rd_attr(reserve, size);
-viobus_cmo_pool_rd_attr(excess, size);
-viobus_cmo_pool_rd_attr(excess, free);
-
-static ssize_t cmo_high_show(struct bus_type *bt, char *buf)
-{
- return sprintf(buf, "%lu\n", vio_cmo.high);
-}
-
-static ssize_t cmo_high_store(struct bus_type *bt, const char *buf,
- size_t count)
-{
- unsigned long flags;
-
- spin_lock_irqsave(&vio_cmo.lock, flags);
- vio_cmo.high = vio_cmo.curr;
- spin_unlock_irqrestore(&vio_cmo.lock, flags);
-
- return count;
-}
-static BUS_ATTR_RW(cmo_high);
-
-static struct attribute *vio_bus_attrs[] = {
- &bus_attr_cmo_entitled.attr,
- &bus_attr_cmo_spare.attr,
- &bus_attr_cmo_min.attr,
- &bus_attr_cmo_desired.attr,
- &bus_attr_cmo_curr.attr,
- &bus_attr_cmo_high.attr,
- &bus_attr_cmo_reserve_size.attr,
- &bus_attr_cmo_excess_size.attr,
- &bus_attr_cmo_excess_free.attr,
- NULL,
-};
-ATTRIBUTE_GROUPS(vio_bus);
-
-static void vio_cmo_sysfs_init(void)
-{
- vio_bus_type.dev_attrs = vio_cmo_dev_attrs;
- vio_bus_type.bus_groups = vio_bus_groups;
-}
-#else /* CONFIG_PPC_SMLPAR */
-int vio_cmo_entitlement_update(size_t new_entitlement) { return 0; }
-void vio_cmo_set_dev_desired(struct vio_dev *viodev, size_t desired) {}
-static int vio_cmo_bus_probe(struct vio_dev *viodev) { return 0; }
-static void vio_cmo_bus_remove(struct vio_dev *viodev) {}
-static void vio_cmo_set_dma_ops(struct vio_dev *viodev) {}
-static void vio_cmo_bus_init(void) {}
-static void vio_cmo_sysfs_init(void) { }
-#endif /* CONFIG_PPC_SMLPAR */
-EXPORT_SYMBOL(vio_cmo_entitlement_update);
-EXPORT_SYMBOL(vio_cmo_set_dev_desired);
-
-
-/*
- * Platform Facilities Option (PFO) support
- */
-
-/**
- * vio_h_cop_sync - Perform a synchronous PFO co-processor operation
- *
- * @vdev - Pointer to a struct vio_dev for device
- * @op - Pointer to a struct vio_pfo_op for the operation parameters
- *
- * Calls the hypervisor to synchronously perform the PFO operation
- * described in @op. In the case of a busy response from the hypervisor,
- * the operation will be re-submitted indefinitely unless a non-zero timeout
- * is specified or an error occurs. The timeout places a limit on when to
- * stop re-submitting a operation, the total time can be exceeded if an
- * operation is in progress.
- *
- * If op->hcall_ret is not NULL, this will be set to the return from the
- * last h_cop_op call or it will be 0 if an error not involving the h_call
- * was encountered.
- *
- * Returns:
- * 0 on success,
- * -EINVAL if the h_call fails due to an invalid parameter,
- * -E2BIG if the h_call can not be performed synchronously,
- * -EBUSY if a timeout is specified and has elapsed,
- * -EACCES if the memory area for data/status has been rescinded, or
- * -EPERM if a hardware fault has been indicated
- */
-int vio_h_cop_sync(struct vio_dev *vdev, struct vio_pfo_op *op)
-{
- struct device *dev = &vdev->dev;
- unsigned long deadline = 0;
- long hret = 0;
- int ret = 0;
-
- if (op->timeout)
- deadline = jiffies + msecs_to_jiffies(op->timeout);
-
- while (true) {
- hret = plpar_hcall_norets(H_COP, op->flags,
- vdev->resource_id,
- op->in, op->inlen, op->out,
- op->outlen, op->csbcpb);
-
- if (hret == H_SUCCESS ||
- (hret != H_NOT_ENOUGH_RESOURCES &&
- hret != H_BUSY && hret != H_RESOURCE) ||
- (op->timeout && time_after(deadline, jiffies)))
- break;
-
- dev_dbg(dev, "%s: hcall ret(%ld), retrying.\n", __func__, hret);
- }
-
- switch (hret) {
- case H_SUCCESS:
- ret = 0;
- break;
- case H_OP_MODE:
- case H_TOO_BIG:
- ret = -E2BIG;
- break;
- case H_RESCINDED:
- ret = -EACCES;
- break;
- case H_HARDWARE:
- ret = -EPERM;
- break;
- case H_NOT_ENOUGH_RESOURCES:
- case H_RESOURCE:
- case H_BUSY:
- ret = -EBUSY;
- break;
- default:
- ret = -EINVAL;
- break;
- }
-
- if (ret)
- dev_dbg(dev, "%s: Sync h_cop_op failure (ret:%d) (hret:%ld)\n",
- __func__, ret, hret);
-
- op->hcall_err = hret;
- return ret;
-}
-EXPORT_SYMBOL(vio_h_cop_sync);
-
-static struct iommu_table *vio_build_iommu_table(struct vio_dev *dev)
-{
- const __be32 *dma_window;
- struct iommu_table *tbl;
- unsigned long offset, size;
-
- dma_window = of_get_property(dev->dev.of_node,
- "ibm,my-dma-window", NULL);
- if (!dma_window)
- return NULL;
-
- tbl = kzalloc(sizeof(*tbl), GFP_KERNEL);
- if (tbl == NULL)
- return NULL;
-
- of_parse_dma_window(dev->dev.of_node, dma_window,
- &tbl->it_index, &offset, &size);
-
- /* TCE table size - measured in tce entries */
- tbl->it_page_shift = IOMMU_PAGE_SHIFT_4K;
- tbl->it_size = size >> tbl->it_page_shift;
- /* offset for VIO should always be 0 */
- tbl->it_offset = offset >> tbl->it_page_shift;
- tbl->it_busno = 0;
- tbl->it_type = TCE_VB;
- tbl->it_blocksize = 16;
-
- if (firmware_has_feature(FW_FEATURE_LPAR))
- tbl->it_ops = &iommu_table_lpar_multi_ops;
- else
- tbl->it_ops = &iommu_table_pseries_ops;
-
- return iommu_init_table(tbl, -1);
-}
-
-/**
- * vio_match_device: - Tell if a VIO device has a matching
- * VIO device id structure.
- * @ids: array of VIO device id structures to search in
- * @dev: the VIO device structure to match against
- *
- * Used by a driver to check whether a VIO device present in the
- * system is in its list of supported devices. Returns the matching
- * vio_device_id structure or NULL if there is no match.
- */
-static const struct vio_device_id *vio_match_device(
- const struct vio_device_id *ids, const struct vio_dev *dev)
-{
- while (ids->type[0] != '\0') {
- if ((strncmp(dev->type, ids->type, strlen(ids->type)) == 0) &&
- of_device_is_compatible(dev->dev.of_node,
- ids->compat))
- return ids;
- ids++;
- }
- return NULL;
-}
-
-/*
- * Convert from struct device to struct vio_dev and pass to driver.
- * dev->driver has already been set by generic code because vio_bus_match
- * succeeded.
- */
-static int vio_bus_probe(struct device *dev)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
- struct vio_driver *viodrv = to_vio_driver(dev->driver);
- const struct vio_device_id *id;
- int error = -ENODEV;
-
- if (!viodrv->probe)
- return error;
-
- id = vio_match_device(viodrv->id_table, viodev);
- if (id) {
- memset(&viodev->cmo, 0, sizeof(viodev->cmo));
- if (firmware_has_feature(FW_FEATURE_CMO)) {
- error = vio_cmo_bus_probe(viodev);
- if (error)
- return error;
- }
- error = viodrv->probe(viodev, id);
- if (error && firmware_has_feature(FW_FEATURE_CMO))
- vio_cmo_bus_remove(viodev);
- }
-
- return error;
-}
-
-/* convert from struct device to struct vio_dev and pass to driver. */
-static int vio_bus_remove(struct device *dev)
-{
- struct vio_dev *viodev = to_vio_dev(dev);
- struct vio_driver *viodrv = to_vio_driver(dev->driver);
- struct device *devptr;
- int ret = 1;
-
- /*
- * Hold a reference to the device after the remove function is called
- * to allow for CMO accounting cleanup for the device.
- */
- devptr = get_device(dev);
-
- if (viodrv->remove)
- ret = viodrv->remove(viodev);
-
- if (!ret && firmware_has_feature(FW_FEATURE_CMO))
- vio_cmo_bus_remove(viodev);
-
- put_device(devptr);
- return ret;
-}
-
-/**
- * vio_register_driver: - Register a new vio driver
- * @viodrv: The vio_driver structure to be registered.
- */
-int __vio_register_driver(struct vio_driver *viodrv, struct module *owner,
- const char *mod_name)
-{
- pr_debug("%s: driver %s registering\n", __func__, viodrv->name);
-
- /* fill in 'struct driver' fields */
- viodrv->driver.name = viodrv->name;
- viodrv->driver.pm = viodrv->pm;
- viodrv->driver.bus = &vio_bus_type;
- viodrv->driver.owner = owner;
- viodrv->driver.mod_name = mod_name;
-
- return driver_register(&viodrv->driver);
-}
-EXPORT_SYMBOL(__vio_register_driver);
-
-/**
- * vio_unregister_driver - Remove registration of vio driver.
- * @viodrv: The vio_driver struct to be removed form registration
- */
-void vio_unregister_driver(struct vio_driver *viodrv)
-{
- driver_unregister(&viodrv->driver);
-}
-EXPORT_SYMBOL(vio_unregister_driver);
-
-/* vio_dev refcount hit 0 */
-static void vio_dev_release(struct device *dev)
-{
- struct iommu_table *tbl = get_iommu_table_base(dev);
-
- if (tbl)
- iommu_free_table(tbl, of_node_full_name(dev->of_node));
- of_node_put(dev->of_node);
- kfree(to_vio_dev(dev));
-}
-
-/**
- * vio_register_device_node: - Register a new vio device.
- * @of_node: The OF node for this device.
- *
- * Creates and initializes a vio_dev structure from the data in
- * of_node and adds it to the list of virtual devices.
- * Returns a pointer to the created vio_dev or NULL if node has
- * NULL device_type or compatible fields.
- */
-struct vio_dev *vio_register_device_node(struct device_node *of_node)
-{
- struct vio_dev *viodev;
- struct device_node *parent_node;
- const __be32 *prop;
- enum vio_dev_family family;
- const char *of_node_name = of_node->name ? of_node->name : "<unknown>";
-
- /*
- * Determine if this node is a under the /vdevice node or under the
- * /ibm,platform-facilities node. This decides the device's family.
- */
- parent_node = of_get_parent(of_node);
- if (parent_node) {
- if (!strcmp(parent_node->full_name, "/ibm,platform-facilities"))
- family = PFO;
- else if (!strcmp(parent_node->full_name, "/vdevice"))
- family = VDEVICE;
- else {
- pr_warn("%s: parent(%s) of %s not recognized.\n",
- __func__,
- parent_node->full_name,
- of_node_name);
- of_node_put(parent_node);
- return NULL;
- }
- of_node_put(parent_node);
- } else {
- pr_warn("%s: could not determine the parent of node %s.\n",
- __func__, of_node_name);
- return NULL;
- }
-
- if (family == PFO) {
- if (of_get_property(of_node, "interrupt-controller", NULL)) {
- pr_debug("%s: Skipping the interrupt controller %s.\n",
- __func__, of_node_name);
- return NULL;
- }
- }
-
- /* allocate a vio_dev for this node */
- viodev = kzalloc(sizeof(struct vio_dev), GFP_KERNEL);
- if (viodev == NULL) {
- pr_warn("%s: allocation failure for VIO device.\n", __func__);
- return NULL;
- }
-
- /* we need the 'device_type' property, in order to match with drivers */
- viodev->family = family;
- if (viodev->family == VDEVICE) {
- unsigned int unit_address;
-
- if (of_node->type != NULL)
- viodev->type = of_node->type;
- else {
- pr_warn("%s: node %s is missing the 'device_type' "
- "property.\n", __func__, of_node_name);
- goto out;
- }
-
- prop = of_get_property(of_node, "reg", NULL);
- if (prop == NULL) {
- pr_warn("%s: node %s missing 'reg'\n",
- __func__, of_node_name);
- goto out;
- }
- unit_address = of_read_number(prop, 1);
- dev_set_name(&viodev->dev, "%x", unit_address);
- viodev->irq = irq_of_parse_and_map(of_node, 0);
- viodev->unit_address = unit_address;
- } else {
- /* PFO devices need their resource_id for submitting COP_OPs
- * This is an optional field for devices, but is required when
- * performing synchronous ops */
- prop = of_get_property(of_node, "ibm,resource-id", NULL);
- if (prop != NULL)
- viodev->resource_id = of_read_number(prop, 1);
-
- dev_set_name(&viodev->dev, "%s", of_node_name);
- viodev->type = of_node_name;
- viodev->irq = 0;
- }
-
- viodev->name = of_node->name;
- viodev->dev.of_node = of_node_get(of_node);
-
- set_dev_node(&viodev->dev, of_node_to_nid(of_node));
-
- /* init generic 'struct device' fields: */
- viodev->dev.parent = &vio_bus_device.dev;
- viodev->dev.bus = &vio_bus_type;
- viodev->dev.release = vio_dev_release;
-
- if (of_get_property(viodev->dev.of_node, "ibm,my-dma-window", NULL)) {
- if (firmware_has_feature(FW_FEATURE_CMO))
- vio_cmo_set_dma_ops(viodev);
- else
- set_dma_ops(&viodev->dev, &dma_iommu_ops);
-
- set_iommu_table_base(&viodev->dev,
- vio_build_iommu_table(viodev));
-
- /* needed to ensure proper operation of coherent allocations
- * later, in case driver doesn't set it explicitly */
- viodev->dev.coherent_dma_mask = DMA_BIT_MASK(64);
- viodev->dev.dma_mask = &viodev->dev.coherent_dma_mask;
- }
-
- /* register with generic device framework */
- if (device_register(&viodev->dev)) {
- printk(KERN_ERR "%s: failed to register device %s\n",
- __func__, dev_name(&viodev->dev));
- put_device(&viodev->dev);
- return NULL;
- }
-
- return viodev;
-
-out: /* Use this exit point for any return prior to device_register */
- kfree(viodev);
-
- return NULL;
-}
-EXPORT_SYMBOL(vio_register_device_node);
-
-/*
- * vio_bus_scan_for_devices - Scan OF and register each child device
- * @root_name - OF node name for the root of the subtree to search.
- * This must be non-NULL
- *
- * Starting from the root node provide, register the device node for
- * each child beneath the root.
- */
-static void vio_bus_scan_register_devices(char *root_name)
-{
- struct device_node *node_root, *node_child;
-
- if (!root_name)
- return;
-
- node_root = of_find_node_by_name(NULL, root_name);
- if (node_root) {
-
- /*
- * Create struct vio_devices for each virtual device in
- * the device tree. Drivers will associate with them later.
- */
- node_child = of_get_next_child(node_root, NULL);
- while (node_child) {
- vio_register_device_node(node_child);
- node_child = of_get_next_child(node_root, node_child);
- }
- of_node_put(node_root);
- }
-}
-
-/**
- * vio_bus_init: - Initialize the virtual IO bus
- */
-static int __init vio_bus_init(void)
-{
- int err;
-
- if (firmware_has_feature(FW_FEATURE_CMO))
- vio_cmo_sysfs_init();
-
- err = bus_register(&vio_bus_type);
- if (err) {
- printk(KERN_ERR "failed to register VIO bus\n");
- return err;
- }
-
- /*
- * The fake parent of all vio devices, just to give us
- * a nice directory
- */
- err = device_register(&vio_bus_device.dev);
- if (err) {
- printk(KERN_WARNING "%s: device_register returned %i\n",
- __func__, err);
- return err;
- }
-
- if (firmware_has_feature(FW_FEATURE_CMO))
- vio_cmo_bus_init();
-
- return 0;
-}
-postcore_initcall(vio_bus_init);
-
-static int __init vio_device_init(void)
-{
- vio_bus_scan_register_devices("vdevice");
- vio_bus_scan_register_devices("ibm,platform-facilities");
-
- return 0;
-}
-device_initcall(vio_device_init);
-
-static ssize_t name_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- return sprintf(buf, "%s\n", to_vio_dev(dev)->name);
-}
-
-static ssize_t devspec_show(struct device *dev,
- struct device_attribute *attr, char *buf)
-{
- struct device_node *of_node = dev->of_node;
-
- return sprintf(buf, "%s\n", of_node_full_name(of_node));
-}
-
-static ssize_t modalias_show(struct device *dev, struct device_attribute *attr,
- char *buf)
-{
- const struct vio_dev *vio_dev = to_vio_dev(dev);
- struct device_node *dn;
- const char *cp;
-
- dn = dev->of_node;
- if (!dn) {
- strcpy(buf, "\n");
- return strlen(buf);
- }
- cp = of_get_property(dn, "compatible", NULL);
- if (!cp) {
- strcpy(buf, "\n");
- return strlen(buf);
- }
-
- return sprintf(buf, "vio:T%sS%s\n", vio_dev->type, cp);
-}
-
-static struct device_attribute vio_dev_attrs[] = {
- __ATTR_RO(name),
- __ATTR_RO(devspec),
- __ATTR_RO(modalias),
- __ATTR_NULL
-};
-
-void vio_unregister_device(struct vio_dev *viodev)
-{
- device_unregister(&viodev->dev);
-}
-EXPORT_SYMBOL(vio_unregister_device);
-
-static int vio_bus_match(struct device *dev, struct device_driver *drv)
-{
- const struct vio_dev *vio_dev = to_vio_dev(dev);
- struct vio_driver *vio_drv = to_vio_driver(drv);
- const struct vio_device_id *ids = vio_drv->id_table;
-
- return (ids != NULL) && (vio_match_device(ids, vio_dev) != NULL);
-}
-
-static int vio_hotplug(struct device *dev, struct kobj_uevent_env *env)
-{
- const struct vio_dev *vio_dev = to_vio_dev(dev);
- struct device_node *dn;
- const char *cp;
-
- dn = dev->of_node;
- if (!dn)
- return -ENODEV;
- cp = of_get_property(dn, "compatible", NULL);
- if (!cp)
- return -ENODEV;
-
- add_uevent_var(env, "MODALIAS=vio:T%sS%s", vio_dev->type, cp);
- return 0;
-}
-
-struct bus_type vio_bus_type = {
- .name = "vio",
- .dev_attrs = vio_dev_attrs,
- .uevent = vio_hotplug,
- .match = vio_bus_match,
- .probe = vio_bus_probe,
- .remove = vio_bus_remove,
-};
-
-/**
- * vio_get_attribute: - get attribute for virtual device
- * @vdev: The vio device to get property.
- * @which: The property/attribute to be extracted.
- * @length: Pointer to length of returned data size (unused if NULL).
- *
- * Calls prom.c's of_get_property() to return the value of the
- * attribute specified by @which
-*/
-const void *vio_get_attribute(struct vio_dev *vdev, char *which, int *length)
-{
- return of_get_property(vdev->dev.of_node, which, length);
-}
-EXPORT_SYMBOL(vio_get_attribute);
-
-#ifdef CONFIG_PPC_PSERIES
-/* vio_find_name() - internal because only vio.c knows how we formatted the
- * kobject name
- */
-static struct vio_dev *vio_find_name(const char *name)
-{
- struct device *found;
-
- found = bus_find_device_by_name(&vio_bus_type, NULL, name);
- if (!found)
- return NULL;
-
- return to_vio_dev(found);
-}
-
-/**
- * vio_find_node - find an already-registered vio_dev
- * @vnode: device_node of the virtual device we're looking for
- */
-struct vio_dev *vio_find_node(struct device_node *vnode)
-{
- char kobj_name[20];
- struct device_node *vnode_parent;
- const char *dev_type;
-
- vnode_parent = of_get_parent(vnode);
- if (!vnode_parent)
- return NULL;
-
- dev_type = of_get_property(vnode_parent, "device_type", NULL);
- of_node_put(vnode_parent);
- if (!dev_type)
- return NULL;
-
- /* construct the kobject name from the device node */
- if (!strcmp(dev_type, "vdevice")) {
- const __be32 *prop;
-
- prop = of_get_property(vnode, "reg", NULL);
- if (!prop)
- return NULL;
- snprintf(kobj_name, sizeof(kobj_name), "%x",
- (uint32_t)of_read_number(prop, 1));
- } else if (!strcmp(dev_type, "ibm,platform-facilities"))
- snprintf(kobj_name, sizeof(kobj_name), "%s", vnode->name);
- else
- return NULL;
-
- return vio_find_name(kobj_name);
-}
-EXPORT_SYMBOL(vio_find_node);
-
-int vio_enable_interrupts(struct vio_dev *dev)
-{
- int rc = h_vio_signal(dev->unit_address, VIO_IRQ_ENABLE);
- if (rc != H_SUCCESS)
- printk(KERN_ERR "vio: Error 0x%x enabling interrupts\n", rc);
- return rc;
-}
-EXPORT_SYMBOL(vio_enable_interrupts);
-
-int vio_disable_interrupts(struct vio_dev *dev)
-{
- int rc = h_vio_signal(dev->unit_address, VIO_IRQ_DISABLE);
- if (rc != H_SUCCESS)
- printk(KERN_ERR "vio: Error 0x%x disabling interrupts\n", rc);
- return rc;
-}
-EXPORT_SYMBOL(vio_disable_interrupts);
-#endif /* CONFIG_PPC_PSERIES */