From 55672ecfa21f23616541c50e0e687f14f9ecf165 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Mon, 16 Dec 2013 10:46:24 +0530 Subject: powerpc/book3s: Recover from MC in sapphire on SCOM read via MMIO. Detect and recover from machine check when inside opal on a special scom load instructions. On specific SCOM read via MMIO we may get a machine check exception with SRR0 pointing inside opal. To recover from MC in this scenario, get a recovery instruction address and return to it from MC. OPAL will export the machine check recoverable ranges through device tree node mcheck-recoverable-ranges under ibm,opal: # hexdump /proc/device-tree/ibm,opal/mcheck-recoverable-ranges 0000000 0000 0000 3000 2804 0000 000c 0000 0000 0000010 3000 2814 0000 0000 3000 27f0 0000 000c 0000020 0000 0000 3000 2814 xxxx xxxx xxxx xxxx 0000030 llll llll yyyy yyyy yyyy yyyy ... ... # where: xxxx xxxx xxxx xxxx = Starting instruction address llll llll = Length of the address range. yyyy yyyy yyyy yyyy = recovery address Each recoverable address range entry is (start address, len, recovery address), 2 cells each for start and recovery address, 1 cell for len, totalling 5 cells per entry. During kernel boot time, build up the recovery table with the list of recovery ranges from device-tree node which will be used during machine check exception to recover from MMIO SCOM UE. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/machdep.h | 3 +++ arch/powerpc/include/asm/mce.h | 3 ++- arch/powerpc/include/asm/opal.h | 3 +++ 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index ad3025d0880b..4da6574e9a20 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -170,6 +170,9 @@ struct machdep_calls { int (*system_reset_exception)(struct pt_regs *regs); int (*machine_check_exception)(struct pt_regs *regs); + /* Called during machine check exception to retrive fixup address. */ + bool (*mce_check_early_recovery)(struct pt_regs *regs); + /* Motherboard/chipset features. This is a kind of general purpose * hook used to control some machine specific features (like reset * lines, chip power control, etc...). diff --git a/arch/powerpc/include/asm/mce.h b/arch/powerpc/include/asm/mce.h index 8e99edf6d966..f97d8cb6bdf6 100644 --- a/arch/powerpc/include/asm/mce.h +++ b/arch/powerpc/include/asm/mce.h @@ -187,7 +187,8 @@ struct mce_error_info { #define MCE_EVENT_DONTRELEASE false extern void save_mce_event(struct pt_regs *regs, long handled, - struct mce_error_info *mce_err, uint64_t addr); + struct mce_error_info *mce_err, uint64_t nip, + uint64_t addr); extern int get_mce_event(struct machine_check_event *mce, bool release); extern void release_mce_event(void); extern void machine_check_queue_event(void); diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index ed82142a3251..ad67c40c1a21 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -833,6 +833,8 @@ int64_t opal_sync_host_reboot(void); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); +extern int early_init_dt_scan_recoverable_ranges(unsigned long node, + const char *uname, int depth, void *data); extern int opal_get_chars(uint32_t vtermno, char *buf, int count); extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); @@ -863,6 +865,7 @@ extern void opal_nvram_init(void); extern void opal_flash_init(void); extern int opal_machine_check(struct pt_regs *regs); +extern bool opal_mce_check_early_recovery(struct pt_regs *regs); extern void opal_shutdown(void); -- cgit v1.2.3 From 9ac8cde938481cd0e3f700b8f071c4eca989c9f6 Mon Sep 17 00:00:00 2001 From: Nathan Fontenot Date: Mon, 27 Jan 2014 10:54:06 -0600 Subject: powerpc/pseries: Use remove_memory() to remove memory The memory remove code for powerpc/pseries should call remove_memory() so that we are holding the hotplug_memory lock during memory remove operations. This patch updates the memory node remove handler to call remove_memory() and adds a ppc_md.remove_memory() entry to handle pseries specific work that is called from arch_remove_memory(). During memory remove in pseries_remove_memblock() we have to stay with removing memory one section at a time. This is needed because of how memory resources are handled. During memory add for pseries (via the probe file in sysfs) we add memory one section at a time which gives us a memory resource for each section. Future patches will aim to address this so will not have to remove memory one section at a time. Signed-off-by: Nathan Fontenot Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/machdep.h | 4 ++ arch/powerpc/mm/mem.c | 7 ++- arch/powerpc/platforms/pseries/hotplug-memory.c | 83 ++++++++++++------------- 3 files changed, 49 insertions(+), 45 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index 4da6574e9a20..5b6c03f1058f 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -282,6 +282,10 @@ struct machdep_calls { #ifdef CONFIG_ARCH_RANDOM int (*get_random_long)(unsigned long *v); #endif + +#ifdef CONFIG_MEMORY_HOTREMOVE + int (*remove_memory)(u64, u64); +#endif }; extern void e500_idle(void); diff --git a/arch/powerpc/mm/mem.c b/arch/powerpc/mm/mem.c index 4b5cd5c2594d..2c8e90f5789e 100644 --- a/arch/powerpc/mm/mem.c +++ b/arch/powerpc/mm/mem.c @@ -139,9 +139,14 @@ int arch_remove_memory(u64 start, u64 size) unsigned long start_pfn = start >> PAGE_SHIFT; unsigned long nr_pages = size >> PAGE_SHIFT; struct zone *zone; + int ret; zone = page_zone(pfn_to_page(start_pfn)); - return __remove_pages(zone, start_pfn, nr_pages); + ret = __remove_pages(zone, start_pfn, nr_pages); + if (!ret && (ppc_md.remove_memory)) + ret = ppc_md.remove_memory(start, size); + + return ret; } #endif #endif /* CONFIG_MEMORY_HOTPLUG */ diff --git a/arch/powerpc/platforms/pseries/hotplug-memory.c b/arch/powerpc/platforms/pseries/hotplug-memory.c index 9590dbb756f2..573b488fc48b 100644 --- a/arch/powerpc/platforms/pseries/hotplug-memory.c +++ b/arch/powerpc/platforms/pseries/hotplug-memory.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -75,13 +76,27 @@ unsigned long memory_block_size_bytes(void) } #ifdef CONFIG_MEMORY_HOTREMOVE -static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) +static int pseries_remove_memory(u64 start, u64 size) { - unsigned long start, start_pfn; - struct zone *zone; int ret; - unsigned long section; - unsigned long sections_to_remove; + + /* Remove htab bolted mappings for this section of memory */ + start = (unsigned long)__va(start); + ret = remove_section_mapping(start, start + size); + + /* Ensure all vmalloc mappings are flushed in case they also + * hit that section of memory + */ + vm_unmap_aliases(); + + return ret; +} + +static int pseries_remove_memblock(unsigned long base, unsigned int memblock_size) +{ + unsigned long block_sz, start_pfn; + int sections_per_block; + int i, nid; start_pfn = base >> PAGE_SHIFT; @@ -90,45 +105,21 @@ static int pseries_remove_memblock(unsigned long base, unsigned int memblock_siz return 0; } - zone = page_zone(pfn_to_page(start_pfn)); + block_sz = memory_block_size_bytes(); + sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; + nid = memory_add_physaddr_to_nid(base); - /* - * Remove section mappings and sysfs entries for the - * section of the memory we are removing. - * - * NOTE: Ideally, this should be done in generic code like - * remove_memory(). But remove_memory() gets called by writing - * to sysfs "state" file and we can't remove sysfs entries - * while writing to it. So we have to defer it to here. - */ - sections_to_remove = (memblock_size >> PAGE_SHIFT) / PAGES_PER_SECTION; - for (section = 0; section < sections_to_remove; section++) { - unsigned long pfn = start_pfn + section * PAGES_PER_SECTION; - ret = __remove_pages(zone, pfn, PAGES_PER_SECTION); - if (ret) - return ret; + for (i = 0; i < sections_per_block; i++) { + remove_memory(nid, base, MIN_MEMORY_BLOCK_SIZE); + base += MIN_MEMORY_BLOCK_SIZE; } - /* - * Update memory regions for memory remove - */ + /* Update memory regions for memory remove */ memblock_remove(base, memblock_size); - - /* - * Remove htab bolted mappings for this section of memory - */ - start = (unsigned long)__va(base); - ret = remove_section_mapping(start, start + memblock_size); - - /* Ensure all vmalloc mappings are flushed in case they also - * hit that section of memory - */ - vm_unmap_aliases(); - - return ret; + return 0; } -static int pseries_remove_memory(struct device_node *np) +static int pseries_remove_mem_node(struct device_node *np) { const char *type; const unsigned int *regs; @@ -153,8 +144,8 @@ static int pseries_remove_memory(struct device_node *np) base = *(unsigned long *)regs; lmb_size = regs[3]; - ret = pseries_remove_memblock(base, lmb_size); - return ret; + pseries_remove_memblock(base, lmb_size); + return 0; } #else static inline int pseries_remove_memblock(unsigned long base, @@ -162,13 +153,13 @@ static inline int pseries_remove_memblock(unsigned long base, { return -EOPNOTSUPP; } -static inline int pseries_remove_memory(struct device_node *np) +static inline int pseries_remove_mem_node(struct device_node *np) { return -EOPNOTSUPP; } #endif /* CONFIG_MEMORY_HOTREMOVE */ -static int pseries_add_memory(struct device_node *np) +static int pseries_add_mem_node(struct device_node *np) { const char *type; const unsigned int *regs; @@ -254,10 +245,10 @@ static int pseries_memory_notifier(struct notifier_block *nb, switch (action) { case OF_RECONFIG_ATTACH_NODE: - err = pseries_add_memory(node); + err = pseries_add_mem_node(node); break; case OF_RECONFIG_DETACH_NODE: - err = pseries_remove_memory(node); + err = pseries_remove_mem_node(node); break; case OF_RECONFIG_UPDATE_PROPERTY: pr = (struct of_prop_reconfig *)node; @@ -277,6 +268,10 @@ static int __init pseries_memory_hotplug_init(void) if (firmware_has_feature(FW_FEATURE_LPAR)) of_reconfig_notifier_register(&pseries_mem_nb); +#ifdef CONFIG_MEMORY_HOTREMOVE + ppc_md.remove_memory = pseries_remove_memory; +#endif + return 0; } machine_device_initcall(pseries, pseries_memory_hotplug_init); -- cgit v1.2.3 From 6b36ba8492abd1c819e949e085cc547b062d8593 Mon Sep 17 00:00:00 2001 From: Haren Myneni Date: Tue, 25 Feb 2014 20:02:18 -0800 Subject: powerpc/pseries: Update dynamic cache nodes for suspend/resume operation pHyp can change cache nodes for suspend/resume operation. Currently the device tree is updated by drmgr in userspace after all non boot CPUs are enabled. Hence, we do not modify the cache list based on the latest cache nodes. Also we do not remove cache entries for the primary CPU. This patch removes the cache list for the boot CPU, updates the device tree before enabling nonboot CPUs and adds cache list for the boot cpu. This patch also has the side effect that older versions of drmgr will perform a second device tree update from userspace. While this is a redundant waste of a couple cycles it is harmless since firmware returns the same data for the subsequent update-nodes/properties rtas calls. Signed-off-by: Haren Myneni Signed-off-by: Tyrel Datwyler Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/rtas.h | 1 + arch/powerpc/kernel/cacheinfo.c | 7 +++++-- arch/powerpc/platforms/pseries/suspend.c | 19 +++++++++++++++++++ 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/rtas.h b/arch/powerpc/include/asm/rtas.h index 9bd52c65e66f..a0e1add01ef5 100644 --- a/arch/powerpc/include/asm/rtas.h +++ b/arch/powerpc/include/asm/rtas.h @@ -283,6 +283,7 @@ extern void pSeries_log_error(char *buf, unsigned int err_type, int fatal); #ifdef CONFIG_PPC_PSERIES extern int pseries_devicetree_update(s32 scope); +extern void post_mobility_fixup(void); #endif #ifdef CONFIG_PPC_RTAS_DAEMON diff --git a/arch/powerpc/kernel/cacheinfo.c b/arch/powerpc/kernel/cacheinfo.c index 2912b8787aa4..40198d50b4c2 100644 --- a/arch/powerpc/kernel/cacheinfo.c +++ b/arch/powerpc/kernel/cacheinfo.c @@ -756,7 +756,10 @@ void cacheinfo_cpu_online(unsigned int cpu_id) cacheinfo_sysfs_populate(cpu_id, cache); } -#ifdef CONFIG_HOTPLUG_CPU /* functions needed for cpu offline */ +/* functions needed to remove cache entry for cpu offline or suspend/resume */ + +#if (defined(CONFIG_PPC_PSERIES) && defined(CONFIG_SUSPEND)) || \ + defined(CONFIG_HOTPLUG_CPU) static struct cache *cache_lookup_by_cpu(unsigned int cpu_id) { @@ -843,4 +846,4 @@ void cacheinfo_cpu_offline(unsigned int cpu_id) if (cache) cache_cpu_clear(cache, cpu_id); } -#endif /* CONFIG_HOTPLUG_CPU */ +#endif /* (CONFIG_PPC_PSERIES && CONFIG_SUSPEND) || CONFIG_HOTPLUG_CPU */ diff --git a/arch/powerpc/platforms/pseries/suspend.c b/arch/powerpc/platforms/pseries/suspend.c index 16a255255d30..1d9c580ab772 100644 --- a/arch/powerpc/platforms/pseries/suspend.c +++ b/arch/powerpc/platforms/pseries/suspend.c @@ -26,6 +26,7 @@ #include #include #include +#include "../../kernel/cacheinfo.h" static u64 stream_id; static struct device suspend_dev; @@ -78,6 +79,23 @@ static int pseries_suspend_cpu(void) return 0; } +/** + * pseries_suspend_enable_irqs + * + * Post suspend configuration updates + * + **/ +static void pseries_suspend_enable_irqs(void) +{ + /* + * Update configuration which can be modified based on device tree + * changes during resume. + */ + cacheinfo_cpu_offline(smp_processor_id()); + post_mobility_fixup(); + cacheinfo_cpu_online(smp_processor_id()); +} + /** * pseries_suspend_enter - Final phase of hibernation * @@ -235,6 +253,7 @@ static int __init pseries_suspend_init(void) return rc; ppc_md.suspend_disable_cpu = pseries_suspend_cpu; + ppc_md.suspend_enable_irqs = pseries_suspend_enable_irqs; suspend_set_ops(&pseries_suspend_ops); return 0; } -- cgit v1.2.3 From 774fea1a38c6a5a8ccc10969db84da24565f276f Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Fri, 28 Feb 2014 11:58:32 +1100 Subject: powerpc/powernv: Read OPAL error log and export it through sysfs Based on a patch by: Mahesh Salgaonkar This patch adds support to read error logs from OPAL and export them to userspace through a sysfs interface. We export each log entry as a directory in /sys/firmware/opal/elog/ Currently, OPAL will buffer up to 128 error log records, we don't need to have any knowledge of this limit on the Linux side as that is actually largely transparent to us. Each error log entry has the following files: id, type, acknowledge, raw. Currently we just export the raw binary error log in the 'raw' attribute. In a future patch, we may parse more of the error log to make it a bit easier for userspace (e.g. to be able to display a brief summary in petitboot without having to have a full parser). If we have >128 logs from OPAL, we'll only be notified of 128 until userspace starts acknowledging them. This limitation may be lifted in the future and with this patch, that should "just work" from the linux side. A userspace daemon should: - wait for error log entries using normal mechanisms (we announce creation) - read error log entry - save error log entry safely to disk - acknowledge the error log entry - rinse, repeat. On the Linux side, we read the error log when we're notified of it. This possibly isn't ideal as it would be better to only read them on-demand. However, this doesn't really work with current OPAL interface, so we read the error log immediately when notified at the moment. I've tested this pretty extensively and am rather confident that the linux side of things works rather well. There is currently an issue with the service processor side of things for >128 error logs though. Signed-off-by: Stewart Smith Signed-off-by: Benjamin Herrenschmidt --- Documentation/ABI/stable/sysfs-firmware-opal-elog | 60 +++++ arch/powerpc/include/asm/opal.h | 13 + arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/opal-elog.c | 313 ++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 5 + arch/powerpc/platforms/powernv/opal.c | 2 + 6 files changed, 394 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/stable/sysfs-firmware-opal-elog create mode 100644 arch/powerpc/platforms/powernv/opal-elog.c (limited to 'arch/powerpc/include') diff --git a/Documentation/ABI/stable/sysfs-firmware-opal-elog b/Documentation/ABI/stable/sysfs-firmware-opal-elog new file mode 100644 index 000000000000..e1f3058f5954 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-firmware-opal-elog @@ -0,0 +1,60 @@ +What: /sys/firmware/opal/elog +Date: Feb 2014 +Contact: Stewart Smith +Description: + This directory exposes error log entries retrieved + through the OPAL firmware interface. + + Each error log is identified by a unique ID and will + exist until explicitly acknowledged to firmware. + + Each log entry has a directory in /sys/firmware/opal/elog. + + Log entries may be purged by the service processor + before retrieved by firmware or retrieved/acknowledged by + Linux if there is no room for more log entries. + + In the event that Linux has retrieved the log entries + but not explicitly acknowledged them to firmware and + the service processor needs more room for log entries, + the only remaining copy of a log message may be in + Linux. + + Typically, a user space daemon will monitor for new + entries, read them out and acknowledge them. + + The service processor may be able to store more log + entries than firmware can, so after you acknowledge + an event from Linux you may instantly get another one + from the queue that was generated some time in the past. + + The raw log format is a binary format. We currently + do not parse this at all in kernel, leaving it up to + user space to solve the problem. In future, we may + do more parsing in kernel and add more files to make + it easier for simple user space processes to extract + more information. + + For each log entry (directory), there are the following + files: + + id: An ASCII representation of the ID of the + error log, in hex - e.g. "0x01". + + type: An ASCII representation of the type id and + description of the type of error log. + Currently just "0x00 PEL" - platform error log. + In the future there may be additional types. + + raw: A read-only binary file that can be read + to get the raw log entry. These are + <16kb, often just hundreds of bytes and + "average" 2kb. + + acknowledge: Writing 'ack' to this file will acknowledge + the error log to firmware (and in turn + the service processor, if applicable). + Shortly after acknowledging it, the log + entry will be removed from sysfs. + Reading this file will list the supported + operations (curently just acknowledge). \ No newline at end of file diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index ad67c40c1a21..933adde1bdea 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -151,6 +151,11 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_LPC_READ 67 #define OPAL_LPC_WRITE 68 #define OPAL_RETURN_CPU 69 +#define OPAL_ELOG_READ 71 +#define OPAL_ELOG_WRITE 72 +#define OPAL_ELOG_ACK 73 +#define OPAL_ELOG_RESEND 74 +#define OPAL_ELOG_SIZE 75 #define OPAL_FLASH_VALIDATE 76 #define OPAL_FLASH_MANAGE 77 #define OPAL_FLASH_UPDATE 78 @@ -823,6 +828,13 @@ int64_t opal_lpc_write(uint32_t chip_id, enum OpalLPCAddressType addr_type, uint32_t addr, uint32_t data, uint32_t sz); int64_t opal_lpc_read(uint32_t chip_id, enum OpalLPCAddressType addr_type, uint32_t addr, __be32 *data, uint32_t sz); + +int64_t opal_read_elog(uint64_t buffer, size_t size, uint64_t log_id); +int64_t opal_get_elog_size(uint64_t *log_id, size_t *size, uint64_t *elog_type); +int64_t opal_write_elog(uint64_t buffer, uint64_t size, uint64_t offset); +int64_t opal_send_ack_elog(uint64_t log_id); +void opal_resend_pending_logs(void); + int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result); int64_t opal_manage_flash(uint8_t op); int64_t opal_update_flash(uint64_t blk_list); @@ -863,6 +875,7 @@ extern void opal_get_rtc_time(struct rtc_time *tm); extern unsigned long opal_get_boot_time(void); extern void opal_nvram_init(void); extern void opal_flash_init(void); +extern int opal_elog_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 8d767fde5a6a..189fd4595161 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,6 +1,6 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o -obj-y += rng.o +obj-y += rng.o opal-elog.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o diff --git a/arch/powerpc/platforms/powernv/opal-elog.c b/arch/powerpc/platforms/powernv/opal-elog.c new file mode 100644 index 000000000000..1d7355bc9db0 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-elog.c @@ -0,0 +1,313 @@ +/* + * Error log support on PowerNV. + * + * Copyright 2013,2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +struct elog_obj { + struct kobject kobj; + struct bin_attribute raw_attr; + uint64_t id; + uint64_t type; + size_t size; + char *buffer; +}; +#define to_elog_obj(x) container_of(x, struct elog_obj, kobj) + +struct elog_attribute { + struct attribute attr; + ssize_t (*show)(struct elog_obj *elog, struct elog_attribute *attr, + char *buf); + ssize_t (*store)(struct elog_obj *elog, struct elog_attribute *attr, + const char *buf, size_t count); +}; +#define to_elog_attr(x) container_of(x, struct elog_attribute, attr) + +static ssize_t elog_id_show(struct elog_obj *elog_obj, + struct elog_attribute *attr, + char *buf) +{ + return sprintf(buf, "0x%llx\n", elog_obj->id); +} + +static const char *elog_type_to_string(uint64_t type) +{ + switch (type) { + case 0: return "PEL"; + default: return "unknown"; + } +} + +static ssize_t elog_type_show(struct elog_obj *elog_obj, + struct elog_attribute *attr, + char *buf) +{ + return sprintf(buf, "0x%llx %s\n", + elog_obj->type, + elog_type_to_string(elog_obj->type)); +} + +static ssize_t elog_ack_show(struct elog_obj *elog_obj, + struct elog_attribute *attr, + char *buf) +{ + return sprintf(buf, "ack - acknowledge log message\n"); +} + +static void delay_release_kobj(void *kobj) +{ + kobject_put((struct kobject *)kobj); +} + +static ssize_t elog_ack_store(struct elog_obj *elog_obj, + struct elog_attribute *attr, + const char *buf, + size_t count) +{ + opal_send_ack_elog(elog_obj->id); + sysfs_schedule_callback(&elog_obj->kobj, delay_release_kobj, + &elog_obj->kobj, THIS_MODULE); + return count; +} + +static struct elog_attribute id_attribute = + __ATTR(id, 0666, elog_id_show, NULL); +static struct elog_attribute type_attribute = + __ATTR(type, 0666, elog_type_show, NULL); +static struct elog_attribute ack_attribute = + __ATTR(acknowledge, 0660, elog_ack_show, elog_ack_store); + +static struct kset *elog_kset; + +static ssize_t elog_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct elog_attribute *attribute; + struct elog_obj *elog; + + attribute = to_elog_attr(attr); + elog = to_elog_obj(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(elog, attribute, buf); +} + +static ssize_t elog_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct elog_attribute *attribute; + struct elog_obj *elog; + + attribute = to_elog_attr(attr); + elog = to_elog_obj(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(elog, attribute, buf, len); +} + +static const struct sysfs_ops elog_sysfs_ops = { + .show = elog_attr_show, + .store = elog_attr_store, +}; + +static void elog_release(struct kobject *kobj) +{ + struct elog_obj *elog; + + elog = to_elog_obj(kobj); + kfree(elog->buffer); + kfree(elog); +} + +static struct attribute *elog_default_attrs[] = { + &id_attribute.attr, + &type_attribute.attr, + &ack_attribute.attr, + NULL, +}; + +static struct kobj_type elog_ktype = { + .sysfs_ops = &elog_sysfs_ops, + .release = &elog_release, + .default_attrs = elog_default_attrs, +}; + +/* Maximum size of a single log on FSP is 16KB */ +#define OPAL_MAX_ERRLOG_SIZE 16384 + +static ssize_t raw_attr_read(struct file *filep, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) +{ + int opal_rc; + + struct elog_obj *elog = to_elog_obj(kobj); + + /* We may have had an error reading before, so let's retry */ + if (!elog->buffer) { + elog->buffer = kzalloc(elog->size, GFP_KERNEL); + if (!elog->buffer) + return -EIO; + + opal_rc = opal_read_elog(__pa(elog->buffer), + elog->size, elog->id); + if (opal_rc != OPAL_SUCCESS) { + pr_err("ELOG: log read failed for log-id=%llx\n", + elog->id); + kfree(elog->buffer); + elog->buffer = NULL; + return -EIO; + } + } + + memcpy(buffer, elog->buffer + pos, count); + + return count; +} + +static struct elog_obj *create_elog_obj(uint64_t id, size_t size, uint64_t type) +{ + struct elog_obj *elog; + int rc; + + elog = kzalloc(sizeof(*elog), GFP_KERNEL); + if (!elog) + return NULL; + + elog->kobj.kset = elog_kset; + + kobject_init(&elog->kobj, &elog_ktype); + + sysfs_bin_attr_init(&elog->raw_attr); + + elog->raw_attr.attr.name = "raw"; + elog->raw_attr.attr.mode = 0400; + elog->raw_attr.size = size; + elog->raw_attr.read = raw_attr_read; + + elog->id = id; + elog->size = size; + elog->type = type; + + elog->buffer = kzalloc(elog->size, GFP_KERNEL); + + if (elog->buffer) { + rc = opal_read_elog(__pa(elog->buffer), + elog->size, elog->id); + if (rc != OPAL_SUCCESS) { + pr_err("ELOG: log read failed for log-id=%llx\n", + elog->id); + kfree(elog->buffer); + elog->buffer = NULL; + } + } + + rc = kobject_add(&elog->kobj, NULL, "0x%llx", id); + if (rc) { + kobject_put(&elog->kobj); + return NULL; + } + + rc = sysfs_create_bin_file(&elog->kobj, &elog->raw_attr); + if (rc) { + kobject_put(&elog->kobj); + return NULL; + } + + kobject_uevent(&elog->kobj, KOBJ_ADD); + + return elog; +} + +static void elog_work_fn(struct work_struct *work) +{ + size_t elog_size; + uint64_t log_id; + uint64_t elog_type; + int rc; + char name[2+16+1]; + + rc = opal_get_elog_size(&log_id, &elog_size, &elog_type); + if (rc != OPAL_SUCCESS) { + pr_err("ELOG: Opal log read failed\n"); + return; + } + + BUG_ON(elog_size > OPAL_MAX_ERRLOG_SIZE); + + if (elog_size >= OPAL_MAX_ERRLOG_SIZE) + elog_size = OPAL_MAX_ERRLOG_SIZE; + + sprintf(name, "0x%llx", log_id); + + /* we may get notified twice, let's handle + * that gracefully and not create two conflicting + * entries. + */ + if (kset_find_obj(elog_kset, name)) + return; + + create_elog_obj(log_id, elog_size, elog_type); +} + +static DECLARE_WORK(elog_work, elog_work_fn); + +static int elog_event(struct notifier_block *nb, + unsigned long events, void *change) +{ + /* check for error log event */ + if (events & OPAL_EVENT_ERROR_LOG_AVAIL) + schedule_work(&elog_work); + return 0; +} + +static struct notifier_block elog_nb = { + .notifier_call = elog_event, + .next = NULL, + .priority = 0 +}; + +int __init opal_elog_init(void) +{ + int rc = 0; + + elog_kset = kset_create_and_add("elog", NULL, opal_kobj); + if (!elog_kset) { + pr_warn("%s: failed to create elog kset\n", __func__); + return -1; + } + + rc = opal_notifier_register(&elog_nb); + if (rc) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, rc); + return rc; + } + + /* We are now ready to pull error logs from opal. */ + opal_resend_pending_logs(); + + return 0; +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 3e8829c40fbb..5fcbf253a870 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -123,6 +123,11 @@ OPAL_CALL(opal_xscom_write, OPAL_XSCOM_WRITE); OPAL_CALL(opal_lpc_read, OPAL_LPC_READ); OPAL_CALL(opal_lpc_write, OPAL_LPC_WRITE); OPAL_CALL(opal_return_cpu, OPAL_RETURN_CPU); +OPAL_CALL(opal_read_elog, OPAL_ELOG_READ); +OPAL_CALL(opal_send_ack_elog, OPAL_ELOG_ACK); +OPAL_CALL(opal_get_elog_size, OPAL_ELOG_SIZE); +OPAL_CALL(opal_resend_pending_logs, OPAL_ELOG_RESEND); +OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE); OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index d5f11d689d6c..0a4493895d16 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -566,6 +566,8 @@ static int __init opal_init(void) /* Create "opal" kobject under /sys/firmware */ rc = opal_sysfs_init(); if (rc == 0) { + /* Setup error log interface */ + rc = opal_elog_init(); /* Setup code update interface */ opal_flash_init(); } -- cgit v1.2.3 From c7e64b9ce04aa2e3fad7396d92b5cb92056d16ac Mon Sep 17 00:00:00 2001 From: Stewart Smith Date: Mon, 3 Mar 2014 10:25:42 +1100 Subject: powerpc/powernv Platform dump interface This enables support for userspace to fetch and initiate FSP and Platform dumps from the service processor (via firmware) through sysfs. Based on original patch from Vasant Hegde Flow: - We register for OPAL notification events. - OPAL sends new dump available notification. - We make information on dump available via sysfs - Userspace requests dump contents - We retrieve the dump via OPAL interface - User copies the dump data - userspace sends ack for dump - We send ACK to OPAL. sysfs files: - We add the /sys/firmware/opal/dump directory - echoing 1 (well, anything, but in future we may support different dump types) to /sys/firmware/opal/dump/initiate_dump will initiate a dump. - Each dump that we've been notified of gets a directory in /sys/firmware/opal/dump/ with a name of the dump type and ID (in hex, as this is what's used elsewhere to identify the dump). - Each dump has files: id, type, dump and acknowledge dump is binary and is the dump itself. echoing 'ack' to acknowledge (currently any string will do) will acknowledge the dump and it will soon after disappear from sysfs. OPAL APIs: - opal_dump_init() - opal_dump_info() - opal_dump_read() - opal_dump_ack() - opal_dump_resend_notification() Currently we are only ever notified for one dump at a time (until the user explicitly acks the current dump, then we get a notification of the next dump), but this kernel code should "just work" when OPAL starts notifying us of all the dumps present. Signed-off-by: Stewart Smith Signed-off-by: Benjamin Herrenschmidt --- Documentation/ABI/stable/sysfs-firmware-opal-dump | 41 ++ arch/powerpc/include/asm/opal.h | 14 + arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/opal-dump.c | 525 ++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 6 + arch/powerpc/platforms/powernv/opal.c | 2 + 6 files changed, 589 insertions(+), 1 deletion(-) create mode 100644 Documentation/ABI/stable/sysfs-firmware-opal-dump create mode 100644 arch/powerpc/platforms/powernv/opal-dump.c (limited to 'arch/powerpc/include') diff --git a/Documentation/ABI/stable/sysfs-firmware-opal-dump b/Documentation/ABI/stable/sysfs-firmware-opal-dump new file mode 100644 index 000000000000..32fe7f5c4880 --- /dev/null +++ b/Documentation/ABI/stable/sysfs-firmware-opal-dump @@ -0,0 +1,41 @@ +What: /sys/firmware/opal/dump +Date: Feb 2014 +Contact: Stewart Smith +Description: + This directory exposes interfaces for interacting with + the FSP and platform dumps through OPAL firmware interface. + + This is only for the powerpc/powernv platform. + + initiate_dump: When '1' is written to it, + we will initiate a dump. + Read this file for supported commands. + + 0xXX-0xYYYY: A directory for dump of type 0xXX and + id 0xYYYY (in hex). The name of this + directory should not be relied upon to + be in this format, only that it's unique + among all dumps. For determining the type + and ID of the dump, use the id and type files. + Do not rely on any particular size of dump + type or dump id. + + Each dump has the following files: + id: An ASCII representation of the dump ID + in hex (e.g. '0x01') + type: An ASCII representation of the type of + dump in the format "0x%x %s" with the ID + in hex and a description of the dump type + (or 'unknown'). + Type '0xffffffff unknown' is used when + we could not get the type from firmware. + e.g. '0x02 System/Platform Dump' + dump: A binary file containing the dump. + The size of the dump is the size of this file. + acknowledge: When 'ack' is written to this, we will + acknowledge that we've retrieved the + dump to the service processor. It will + then remove it, making the dump + inaccessible. + Reading this file will get a list of + supported actions. diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 933adde1bdea..2636acfcd340 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -159,9 +159,15 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_FLASH_VALIDATE 76 #define OPAL_FLASH_MANAGE 77 #define OPAL_FLASH_UPDATE 78 +#define OPAL_DUMP_INIT 81 +#define OPAL_DUMP_INFO 82 +#define OPAL_DUMP_READ 83 +#define OPAL_DUMP_ACK 84 #define OPAL_GET_MSG 85 #define OPAL_CHECK_ASYNC_COMPLETION 86 +#define OPAL_DUMP_RESEND 91 #define OPAL_SYNC_HOST_REBOOT 87 +#define OPAL_DUMP_INFO2 94 #ifndef __ASSEMBLY__ @@ -242,6 +248,7 @@ enum OpalPendingState { OPAL_EVENT_EPOW = 0x80, OPAL_EVENT_LED_STATUS = 0x100, OPAL_EVENT_PCI_ERROR = 0x200, + OPAL_EVENT_DUMP_AVAIL = 0x400, OPAL_EVENT_MSG_PENDING = 0x800, }; @@ -838,6 +845,12 @@ void opal_resend_pending_logs(void); int64_t opal_validate_flash(uint64_t buffer, uint32_t *size, uint32_t *result); int64_t opal_manage_flash(uint8_t op); int64_t opal_update_flash(uint64_t blk_list); +int64_t opal_dump_init(uint8_t dump_type); +int64_t opal_dump_info(uint32_t *dump_id, uint32_t *dump_size); +int64_t opal_dump_info2(uint32_t *dump_id, uint32_t *dump_size, uint32_t *dump_type); +int64_t opal_dump_read(uint32_t dump_id, uint64_t buffer); +int64_t opal_dump_ack(uint32_t dump_id); +int64_t opal_dump_resend_notification(void); int64_t opal_get_msg(uint64_t buffer, size_t size); int64_t opal_check_completion(uint64_t buffer, size_t size, uint64_t token); @@ -876,6 +889,7 @@ extern unsigned long opal_get_boot_time(void); extern void opal_nvram_init(void); extern void opal_flash_init(void); extern int opal_elog_init(void); +extern void opal_platform_dump_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 189fd4595161..5125caeb40f4 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,6 +1,6 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o -obj-y += rng.o opal-elog.o +obj-y += rng.o opal-elog.o opal-dump.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o diff --git a/arch/powerpc/platforms/powernv/opal-dump.c b/arch/powerpc/platforms/powernv/opal-dump.c new file mode 100644 index 000000000000..0c767c561dc9 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-dump.c @@ -0,0 +1,525 @@ +/* + * PowerNV OPAL Dump Interface + * + * Copyright 2013,2014 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include +#include +#include +#include +#include +#include + +#include + +#define DUMP_TYPE_FSP 0x01 + +struct dump_obj { + struct kobject kobj; + struct bin_attribute dump_attr; + uint32_t id; /* becomes object name */ + uint32_t type; + uint32_t size; + char *buffer; +}; +#define to_dump_obj(x) container_of(x, struct dump_obj, kobj) + +struct dump_attribute { + struct attribute attr; + ssize_t (*show)(struct dump_obj *dump, struct dump_attribute *attr, + char *buf); + ssize_t (*store)(struct dump_obj *dump, struct dump_attribute *attr, + const char *buf, size_t count); +}; +#define to_dump_attr(x) container_of(x, struct dump_attribute, attr) + +static ssize_t dump_id_show(struct dump_obj *dump_obj, + struct dump_attribute *attr, + char *buf) +{ + return sprintf(buf, "0x%x\n", dump_obj->id); +} + +static const char* dump_type_to_string(uint32_t type) +{ + switch (type) { + case 0x01: return "SP Dump"; + case 0x02: return "System/Platform Dump"; + case 0x03: return "SMA Dump"; + default: return "unknown"; + } +} + +static ssize_t dump_type_show(struct dump_obj *dump_obj, + struct dump_attribute *attr, + char *buf) +{ + + return sprintf(buf, "0x%x %s\n", dump_obj->type, + dump_type_to_string(dump_obj->type)); +} + +static ssize_t dump_ack_show(struct dump_obj *dump_obj, + struct dump_attribute *attr, + char *buf) +{ + return sprintf(buf, "ack - acknowledge dump\n"); +} + +/* + * Send acknowledgement to OPAL + */ +static int64_t dump_send_ack(uint32_t dump_id) +{ + int rc; + + rc = opal_dump_ack(dump_id); + if (rc) + pr_warn("%s: Failed to send ack to Dump ID 0x%x (%d)\n", + __func__, dump_id, rc); + return rc; +} + +static void delay_release_kobj(void *kobj) +{ + kobject_put((struct kobject *)kobj); +} + +static ssize_t dump_ack_store(struct dump_obj *dump_obj, + struct dump_attribute *attr, + const char *buf, + size_t count) +{ + dump_send_ack(dump_obj->id); + sysfs_schedule_callback(&dump_obj->kobj, delay_release_kobj, + &dump_obj->kobj, THIS_MODULE); + return count; +} + +/* Attributes of a dump + * The binary attribute of the dump itself is dynamic + * due to the dynamic size of the dump + */ +static struct dump_attribute id_attribute = + __ATTR(id, 0666, dump_id_show, NULL); +static struct dump_attribute type_attribute = + __ATTR(type, 0666, dump_type_show, NULL); +static struct dump_attribute ack_attribute = + __ATTR(acknowledge, 0660, dump_ack_show, dump_ack_store); + +static ssize_t init_dump_show(struct dump_obj *dump_obj, + struct dump_attribute *attr, + char *buf) +{ + return sprintf(buf, "1 - initiate dump\n"); +} + +static int64_t dump_fips_init(uint8_t type) +{ + int rc; + + rc = opal_dump_init(type); + if (rc) + pr_warn("%s: Failed to initiate FipS dump (%d)\n", + __func__, rc); + return rc; +} + +static ssize_t init_dump_store(struct dump_obj *dump_obj, + struct dump_attribute *attr, + const char *buf, + size_t count) +{ + dump_fips_init(DUMP_TYPE_FSP); + pr_info("%s: Initiated FSP dump\n", __func__); + return count; +} + +static struct dump_attribute initiate_attribute = + __ATTR(initiate_dump, 0600, init_dump_show, init_dump_store); + +static struct attribute *initiate_attrs[] = { + &initiate_attribute.attr, + NULL, +}; + +static struct attribute_group initiate_attr_group = { + .attrs = initiate_attrs, +}; + +static struct kset *dump_kset; + +static ssize_t dump_attr_show(struct kobject *kobj, + struct attribute *attr, + char *buf) +{ + struct dump_attribute *attribute; + struct dump_obj *dump; + + attribute = to_dump_attr(attr); + dump = to_dump_obj(kobj); + + if (!attribute->show) + return -EIO; + + return attribute->show(dump, attribute, buf); +} + +static ssize_t dump_attr_store(struct kobject *kobj, + struct attribute *attr, + const char *buf, size_t len) +{ + struct dump_attribute *attribute; + struct dump_obj *dump; + + attribute = to_dump_attr(attr); + dump = to_dump_obj(kobj); + + if (!attribute->store) + return -EIO; + + return attribute->store(dump, attribute, buf, len); +} + +static const struct sysfs_ops dump_sysfs_ops = { + .show = dump_attr_show, + .store = dump_attr_store, +}; + +static void dump_release(struct kobject *kobj) +{ + struct dump_obj *dump; + + dump = to_dump_obj(kobj); + vfree(dump->buffer); + kfree(dump); +} + +static struct attribute *dump_default_attrs[] = { + &id_attribute.attr, + &type_attribute.attr, + &ack_attribute.attr, + NULL, +}; + +static struct kobj_type dump_ktype = { + .sysfs_ops = &dump_sysfs_ops, + .release = &dump_release, + .default_attrs = dump_default_attrs, +}; + +static void free_dump_sg_list(struct opal_sg_list *list) +{ + struct opal_sg_list *sg1; + while (list) { + sg1 = list->next; + kfree(list); + list = sg1; + } + list = NULL; +} + +static struct opal_sg_list *dump_data_to_sglist(struct dump_obj *dump) +{ + struct opal_sg_list *sg1, *list = NULL; + void *addr; + int64_t size; + + addr = dump->buffer; + size = dump->size; + + sg1 = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!sg1) + goto nomem; + + list = sg1; + sg1->num_entries = 0; + while (size > 0) { + /* Translate virtual address to physical address */ + sg1->entry[sg1->num_entries].data = + (void *)(vmalloc_to_pfn(addr) << PAGE_SHIFT); + + if (size > PAGE_SIZE) + sg1->entry[sg1->num_entries].length = PAGE_SIZE; + else + sg1->entry[sg1->num_entries].length = size; + + sg1->num_entries++; + if (sg1->num_entries >= SG_ENTRIES_PER_NODE) { + sg1->next = kzalloc(PAGE_SIZE, GFP_KERNEL); + if (!sg1->next) + goto nomem; + + sg1 = sg1->next; + sg1->num_entries = 0; + } + addr += PAGE_SIZE; + size -= PAGE_SIZE; + } + return list; + +nomem: + pr_err("%s : Failed to allocate memory\n", __func__); + free_dump_sg_list(list); + return NULL; +} + +static void sglist_to_phy_addr(struct opal_sg_list *list) +{ + struct opal_sg_list *sg, *next; + + for (sg = list; sg; sg = next) { + next = sg->next; + /* Don't translate NULL pointer for last entry */ + if (sg->next) + sg->next = (struct opal_sg_list *)__pa(sg->next); + else + sg->next = NULL; + + /* Convert num_entries to length */ + sg->num_entries = + sg->num_entries * sizeof(struct opal_sg_entry) + 16; + } +} + +static int64_t dump_read_info(uint32_t *id, uint32_t *size, uint32_t *type) +{ + int rc; + *type = 0xffffffff; + + rc = opal_dump_info2(id, size, type); + + if (rc == OPAL_PARAMETER) + rc = opal_dump_info(id, size); + + if (rc) + pr_warn("%s: Failed to get dump info (%d)\n", + __func__, rc); + return rc; +} + +static int64_t dump_read_data(struct dump_obj *dump) +{ + struct opal_sg_list *list; + uint64_t addr; + int64_t rc; + + /* Allocate memory */ + dump->buffer = vzalloc(PAGE_ALIGN(dump->size)); + if (!dump->buffer) { + pr_err("%s : Failed to allocate memory\n", __func__); + rc = -ENOMEM; + goto out; + } + + /* Generate SG list */ + list = dump_data_to_sglist(dump); + if (!list) { + rc = -ENOMEM; + goto out; + } + + /* Translate sg list addr to real address */ + sglist_to_phy_addr(list); + + /* First entry address */ + addr = __pa(list); + + /* Fetch data */ + rc = OPAL_BUSY_EVENT; + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_dump_read(dump->id, addr); + if (rc == OPAL_BUSY_EVENT) { + opal_poll_events(NULL); + msleep(20); + } + } + + if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) + pr_warn("%s: Extract dump failed for ID 0x%x\n", + __func__, dump->id); + + /* Free SG list */ + free_dump_sg_list(list); + +out: + return rc; +} + +static ssize_t dump_attr_read(struct file *filep, struct kobject *kobj, + struct bin_attribute *bin_attr, + char *buffer, loff_t pos, size_t count) +{ + ssize_t rc; + + struct dump_obj *dump = to_dump_obj(kobj); + + if (!dump->buffer) { + rc = dump_read_data(dump); + + if (rc != OPAL_SUCCESS && rc != OPAL_PARTIAL) { + vfree(dump->buffer); + dump->buffer = NULL; + + return -EIO; + } + if (rc == OPAL_PARTIAL) { + /* On a partial read, we just return EIO + * and rely on userspace to ask us to try + * again. + */ + pr_info("%s: Platform dump partially read.ID = 0x%x\n", + __func__, dump->id); + return -EIO; + } + } + + memcpy(buffer, dump->buffer + pos, count); + + /* You may think we could free the dump buffer now and retrieve + * it again later if needed, but due to current firmware limitation, + * that's not the case. So, once read into userspace once, + * we keep the dump around until it's acknowledged by userspace. + */ + + return count; +} + +static struct dump_obj *create_dump_obj(uint32_t id, size_t size, + uint32_t type) +{ + struct dump_obj *dump; + int rc; + + dump = kzalloc(sizeof(*dump), GFP_KERNEL); + if (!dump) + return NULL; + + dump->kobj.kset = dump_kset; + + kobject_init(&dump->kobj, &dump_ktype); + + sysfs_bin_attr_init(&dump->dump_attr); + + dump->dump_attr.attr.name = "dump"; + dump->dump_attr.attr.mode = 0400; + dump->dump_attr.size = size; + dump->dump_attr.read = dump_attr_read; + + dump->id = id; + dump->size = size; + dump->type = type; + + rc = kobject_add(&dump->kobj, NULL, "0x%x-0x%x", type, id); + if (rc) { + kobject_put(&dump->kobj); + return NULL; + } + + rc = sysfs_create_bin_file(&dump->kobj, &dump->dump_attr); + if (rc) { + kobject_put(&dump->kobj); + return NULL; + } + + pr_info("%s: New platform dump. ID = 0x%x Size %u\n", + __func__, dump->id, dump->size); + + kobject_uevent(&dump->kobj, KOBJ_ADD); + + return dump; +} + +static int process_dump(void) +{ + int rc; + uint32_t dump_id, dump_size, dump_type; + struct dump_obj *dump; + char name[22]; + + rc = dump_read_info(&dump_id, &dump_size, &dump_type); + if (rc != OPAL_SUCCESS) + return rc; + + sprintf(name, "0x%x-0x%x", dump_type, dump_id); + + /* we may get notified twice, let's handle + * that gracefully and not create two conflicting + * entries. + */ + if (kset_find_obj(dump_kset, name)) + return 0; + + dump = create_dump_obj(dump_id, dump_size, dump_type); + if (!dump) + return -1; + + return 0; +} + +static void dump_work_fn(struct work_struct *work) +{ + process_dump(); +} + +static DECLARE_WORK(dump_work, dump_work_fn); + +static void schedule_process_dump(void) +{ + schedule_work(&dump_work); +} + +/* + * New dump available notification + * + * Once we get notification, we add sysfs entries for it. + * We only fetch the dump on demand, and create sysfs asynchronously. + */ +static int dump_event(struct notifier_block *nb, + unsigned long events, void *change) +{ + if (events & OPAL_EVENT_DUMP_AVAIL) + schedule_process_dump(); + + return 0; +} + +static struct notifier_block dump_nb = { + .notifier_call = dump_event, + .next = NULL, + .priority = 0 +}; + +void __init opal_platform_dump_init(void) +{ + int rc; + + dump_kset = kset_create_and_add("dump", NULL, opal_kobj); + if (!dump_kset) { + pr_warn("%s: Failed to create dump kset\n", __func__); + return; + } + + rc = sysfs_create_group(&dump_kset->kobj, &initiate_attr_group); + if (rc) { + pr_warn("%s: Failed to create initiate dump attr group\n", + __func__); + kobject_put(&dump_kset->kobj); + return; + } + + rc = opal_notifier_register(&dump_nb); + if (rc) { + pr_warn("%s: Can't register OPAL event notifier (%d)\n", + __func__, rc); + return; + } + + opal_dump_resend_notification(); +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 5fcbf253a870..47ec3f738062 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -131,6 +131,12 @@ OPAL_CALL(opal_write_elog, OPAL_ELOG_WRITE); OPAL_CALL(opal_validate_flash, OPAL_FLASH_VALIDATE); OPAL_CALL(opal_manage_flash, OPAL_FLASH_MANAGE); OPAL_CALL(opal_update_flash, OPAL_FLASH_UPDATE); +OPAL_CALL(opal_dump_init, OPAL_DUMP_INIT); +OPAL_CALL(opal_dump_info, OPAL_DUMP_INFO); +OPAL_CALL(opal_dump_info2, OPAL_DUMP_INFO2); +OPAL_CALL(opal_dump_read, OPAL_DUMP_READ); +OPAL_CALL(opal_dump_ack, OPAL_DUMP_ACK); OPAL_CALL(opal_get_msg, OPAL_GET_MSG); OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION); +OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND); OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 0a4493895d16..2e269c27dd23 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -570,6 +570,8 @@ static int __init opal_init(void) rc = opal_elog_init(); /* Setup code update interface */ opal_flash_init(); + /* Setup platform dump extract interface */ + opal_platform_dump_init(); } return 0; -- cgit v1.2.3 From b0b7dcbdf327bc57018d498e07331b2c96916dc7 Mon Sep 17 00:00:00 2001 From: Wang Dongsheng Date: Fri, 7 Mar 2014 12:57:57 +0800 Subject: powerpc/fsl: add PVR definition for E500MC and E5500 Signed-off-by: Wang Dongsheng Signed-off-by: Scott Wood --- arch/powerpc/include/asm/reg.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 90c06ec6eff5..bf0fb4db0855 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -1102,6 +1102,8 @@ #define PVR_8560 0x80200000 #define PVR_VER_E500V1 0x8020 #define PVR_VER_E500V2 0x8021 +#define PVR_VER_E500MC 0x8023 +#define PVR_VER_E5500 0x8024 #define PVR_VER_E6500 0x8040 /* -- cgit v1.2.3 From 82d86de25b9c99db546e17c6f7ebf9a691da557e Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Fri, 7 Mar 2014 14:48:35 -0600 Subject: powerpc/e6500: Make TLB lock recursive Once special level interrupts are supported, we may take nested TLB misses -- so allow the same thread to acquire the lock recursively. The lock will not be effective against the nested TLB miss handler trying to write the same entry as the interrupted TLB miss handler, but that's also a problem on non-threaded CPUs that lack TLB write conditional. This will be addressed in the patch that enables crit/mc support by invalidating the TLB on return from level exceptions. Signed-off-by: Scott Wood --- arch/powerpc/include/asm/mmu-book3e.h | 9 ++++++--- arch/powerpc/kernel/setup_64.c | 2 ++ arch/powerpc/mm/tlb_low_64e.S | 19 ++++++++++++------- 3 files changed, 20 insertions(+), 10 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/mmu-book3e.h b/arch/powerpc/include/asm/mmu-book3e.h index 89b785d16846..901dac6b6cb7 100644 --- a/arch/powerpc/include/asm/mmu-book3e.h +++ b/arch/powerpc/include/asm/mmu-book3e.h @@ -287,11 +287,14 @@ extern int mmu_linear_psize; extern int mmu_vmemmap_psize; struct tlb_core_data { + /* + * Per-core spinlock for e6500 TLB handlers (no tlbsrx.) + * Must be the first struct element. + */ + u8 lock; + /* For software way selection, as on Freescale TLB1 */ u8 esel_next, esel_max, esel_first; - - /* Per-core spinlock for e6500 TLB handlers (no tlbsrx.) */ - u8 lock; }; #ifdef CONFIG_PPC64 diff --git a/arch/powerpc/kernel/setup_64.c b/arch/powerpc/kernel/setup_64.c index da9c42f53bb1..4933909cc5c0 100644 --- a/arch/powerpc/kernel/setup_64.c +++ b/arch/powerpc/kernel/setup_64.c @@ -102,6 +102,8 @@ static void setup_tlb_core_data(void) { int cpu; + BUILD_BUG_ON(offsetof(struct tlb_core_data, lock) != 0); + for_each_possible_cpu(cpu) { int first = cpu_first_thread_sibling(cpu); diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 6bf50507a4b5..1e50249e900b 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -284,7 +284,7 @@ itlb_miss_fault_bolted: * r14 = page table base * r13 = PACA * r11 = tlb_per_core ptr - * r10 = crap (free to use) + * r10 = cpu number */ tlb_miss_common_e6500: /* @@ -293,15 +293,18 @@ tlb_miss_common_e6500: * * MAS6:IND should be already set based on MAS4 */ - addi r10,r11,TCD_LOCK -1: lbarx r15,0,r10 +1: lbarx r15,0,r11 + lhz r10,PACAPACAINDEX(r13) cmpdi r15,0 + cmpdi cr1,r15,1 /* set cr1.eq = 0 for non-recursive */ bne 2f - li r15,1 - stbcx. r15,0,r10 + stbcx. r10,0,r11 bne 1b +3: .subsection 1 -2: lbz r15,0(r10) +2: cmpd cr1,r15,r10 /* recursive lock due to mcheck/crit/etc? */ + beq cr1,3b /* unlock will happen if cr1.eq = 0 */ + lbz r15,0(r11) cmpdi r15,0 bne 2b b 1b @@ -379,9 +382,11 @@ tlb_miss_common_e6500: tlb_miss_done_e6500: .macro tlb_unlock_e6500 + beq cr1,1f /* no unlock if lock was recursively grabbed */ li r15,0 isync - stb r15,TCD_LOCK(r11) + stb r15,0(r11) +1: .endm tlb_unlock_e6500 -- cgit v1.2.3 From 9d378dfac885f72b8b369d08fc61bef36e2f2dd1 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 10 Mar 2014 17:29:38 -0500 Subject: powerpc/booke64: Use SPRG7 for VDSO Previously SPRG3 was marked for use by both VDSO and critical interrupts (though critical interrupts were not fully implemented). In commit 8b64a9dfb091f1eca8b7e58da82f1e7d1d5fe0ad ("powerpc/booke64: Use SPRG0/3 scratch for bolted TLB miss & crit int"), Mihai Caraman made an attempt to resolve this conflict by restoring the VDSO value early in the critical interrupt, but this has some issues: - It's incompatible with EXCEPTION_COMMON which restores r13 from the by-then-overwritten scratch (this cost me some debugging time). - It forces critical exceptions to be a special case handled differently from even machine check and debug level exceptions. - It didn't occur to me that it was possible to make this work at all (by doing a final "ld r13, PACA_EXCRIT+EX_R13(r13)") until after I made (most of) this patch. :-) It might be worth investigating using a load rather than SPRG on return from all exceptions (except TLB misses where the scratch never leaves the SPRG) -- it could save a few cycles. Until then, let's stick with SPRG for all exceptions. Since we cannot use SPRG4-7 for scratch without corrupting the state of a KVM guest, move VDSO to SPRG7 on book3e. Since neither SPRG4-7 nor critical interrupts exist on book3s, SPRG3 is still used for VDSO there. Signed-off-by: Scott Wood Cc: Mihai Caraman Cc: Anton Blanchard Cc: Paul Mackerras Cc: kvm-ppc@vger.kernel.org --- arch/powerpc/include/asm/exception-64e.h | 5 ++--- arch/powerpc/include/asm/kvm_booke_hv_asm.h | 9 +-------- arch/powerpc/include/asm/paca.h | 2 +- arch/powerpc/include/asm/reg.h | 13 ++++++++++--- arch/powerpc/kernel/asm-offsets.c | 2 +- arch/powerpc/kernel/exceptions-64e.S | 19 ++----------------- arch/powerpc/kernel/vdso.c | 8 ++++---- arch/powerpc/kernel/vdso32/getcpu.S | 2 +- arch/powerpc/kernel/vdso64/getcpu.S | 2 +- arch/powerpc/kvm/book3s_hv_rmhandlers.S | 4 ++-- arch/powerpc/kvm/book3s_interrupts.S | 4 ++-- arch/powerpc/kvm/bookehv_interrupts.S | 10 ++++++---- 12 files changed, 33 insertions(+), 47 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index 51fa43e536b9..e73452f09019 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -46,9 +46,8 @@ #define EX_CR (1 * 8) #define EX_R10 (2 * 8) #define EX_R11 (3 * 8) -#define EX_R13 (4 * 8) -#define EX_R14 (5 * 8) -#define EX_R15 (6 * 8) +#define EX_R14 (4 * 8) +#define EX_R15 (5 * 8) /* * The TLB miss exception uses different slots. diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h index 3a79f5325712..c3e3fd53a3a9 100644 --- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -36,19 +36,12 @@ * *(r8 + GPR11) = saved r11 * * 64-bit host - * Expected inputs (GEN/GDBELL/DBG/MC exception types): + * Expected inputs (GEN/GDBELL/DBG/CRIT/MC exception types): * r10 = saved CR * r13 = PACA_POINTER * *(r13 + PACA_EX##type + EX_R10) = saved r10 * *(r13 + PACA_EX##type + EX_R11) = saved r11 * SPRN_SPRG_##type##_SCRATCH = saved r13 - * - * Expected inputs (CRIT exception type): - * r10 = saved CR - * r13 = PACA_POINTER - * *(r13 + PACA_EX##type + EX_R10) = saved r10 - * *(r13 + PACA_EX##type + EX_R11) = saved r11 - * *(r13 + PACA_EX##type + EX_R13) = saved r13 * * Expected inputs (TLB exception type): * r10 = saved CR diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 9c5dbc3833fb..948f01a04cc3 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -146,7 +146,7 @@ struct paca_struct { u8 io_sync; /* writel() needs spin_unlock sync */ u8 irq_work_pending; /* IRQ_WORK interrupt while soft-disable */ u8 nap_state_lost; /* NV GPR values lost in power7_idle */ - u64 sprg3; /* Saved user-visible sprg */ + u64 sprg_vdso; /* Saved user-visible sprg */ #ifdef CONFIG_PPC_TRANSACTIONAL_MEM u64 tm_scratch; /* TM scratch area for reclaim */ #endif diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index bf0fb4db0855..f7251c2dc049 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -577,9 +577,13 @@ #define SPRN_SPRG3 0x113 /* Special Purpose Register General 3 */ #define SPRN_USPRG3 0x103 /* SPRG3 userspace read */ #define SPRN_SPRG4 0x114 /* Special Purpose Register General 4 */ +#define SPRN_USPRG4 0x104 /* SPRG4 userspace read */ #define SPRN_SPRG5 0x115 /* Special Purpose Register General 5 */ +#define SPRN_USPRG5 0x105 /* SPRG5 userspace read */ #define SPRN_SPRG6 0x116 /* Special Purpose Register General 6 */ +#define SPRN_USPRG6 0x106 /* SPRG6 userspace read */ #define SPRN_SPRG7 0x117 /* Special Purpose Register General 7 */ +#define SPRN_USPRG7 0x107 /* SPRG7 userspace read */ #define SPRN_SRR0 0x01A /* Save/Restore Register 0 */ #define SPRN_SRR1 0x01B /* Save/Restore Register 1 */ #define SRR1_ISI_NOPT 0x40000000 /* ISI: Not found in hash */ @@ -879,11 +883,10 @@ * 64-bit embedded * - SPRG0 generic exception scratch * - SPRG2 TLB exception stack - * - SPRG3 critical exception scratch and - * CPU and NUMA node for VDSO getcpu (user visible) + * - SPRG3 critical exception scratch (user visible, sorry!) * - SPRG4 unused (user visible) * - SPRG6 TLB miss scratch (user visible, sorry !) - * - SPRG7 critical exception scratch + * - SPRG7 CPU and NUMA node for VDSO getcpu (user visible) * - SPRG8 machine check exception scratch * - SPRG9 debug exception scratch * @@ -940,6 +943,8 @@ #define SPRN_SPRG_SCRATCH0 SPRN_SPRG2 #define SPRN_SPRG_HPACA SPRN_HSPRG0 #define SPRN_SPRG_HSCRATCH0 SPRN_HSPRG1 +#define SPRN_SPRG_VDSO_READ SPRN_USPRG3 +#define SPRN_SPRG_VDSO_WRITE SPRN_SPRG3 #define GET_PACA(rX) \ BEGIN_FTR_SECTION_NESTED(66); \ @@ -983,6 +988,8 @@ #define SPRN_SPRG_TLB_SCRATCH SPRN_SPRG6 #define SPRN_SPRG_GEN_SCRATCH SPRN_SPRG0 #define SPRN_SPRG_GDBELL_SCRATCH SPRN_SPRG_GEN_SCRATCH +#define SPRN_SPRG_VDSO_READ SPRN_USPRG7 +#define SPRN_SPRG_VDSO_WRITE SPRN_SPRG7 #define SET_PACA(rX) mtspr SPRN_SPRG_PACA,rX #define GET_PACA(rX) mfspr rX,SPRN_SPRG_PACA diff --git a/arch/powerpc/kernel/asm-offsets.c b/arch/powerpc/kernel/asm-offsets.c index b5aacf72ae6f..dba8140ebc20 100644 --- a/arch/powerpc/kernel/asm-offsets.c +++ b/arch/powerpc/kernel/asm-offsets.c @@ -253,7 +253,7 @@ int main(void) DEFINE(PACA_SYSTEM_TIME, offsetof(struct paca_struct, system_time)); DEFINE(PACA_TRAP_SAVE, offsetof(struct paca_struct, trap_save)); DEFINE(PACA_NAPSTATELOST, offsetof(struct paca_struct, nap_state_lost)); - DEFINE(PACA_SPRG3, offsetof(struct paca_struct, sprg3)); + DEFINE(PACA_SPRG_VDSO, offsetof(struct paca_struct, sprg_vdso)); #endif /* CONFIG_PPC64 */ /* RTAS */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 41380a424108..89e1133b0185 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -55,7 +55,6 @@ mfspr r13,SPRN_SPRG_PACA; /* get PACA */ \ std r10,PACA_EX##type+EX_R10(r13); \ std r11,PACA_EX##type+EX_R11(r13); \ - PROLOG_STORE_RESTORE_SCRATCH_##type; \ mfcr r10; /* save CR */ \ mfspr r11,SPRN_##type##_SRR1;/* what are we coming from */ \ DO_KVM intnum,SPRN_##type##_SRR1; /* KVM hook */ \ @@ -116,20 +115,6 @@ #define GDBELL_EXCEPTION_PROLOG(n, intnum, addition) \ EXCEPTION_PROLOG(n, intnum, GDBELL, addition##_GDBELL(n)) -/* - * Store user-visible scratch in PACA exception slots and restore proper value - */ -#define PROLOG_STORE_RESTORE_SCRATCH_GEN -#define PROLOG_STORE_RESTORE_SCRATCH_GDBELL -#define PROLOG_STORE_RESTORE_SCRATCH_DBG -#define PROLOG_STORE_RESTORE_SCRATCH_MC - -#define PROLOG_STORE_RESTORE_SCRATCH_CRIT \ - mfspr r10,SPRN_SPRG_CRIT_SCRATCH; /* get r13 */ \ - std r10,PACA_EXCRIT+EX_R13(r13); \ - ld r11,PACA_SPRG3(r13); \ - mtspr SPRN_SPRG_CRIT_SCRATCH,r11; - /* Variants of the "addition" argument for the prolog */ #define PROLOG_ADDITION_NONE_GEN(n) @@ -529,7 +514,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) mtcr r10 ld r10,PACA_EXCRIT+EX_R10(r13) /* restore registers */ ld r11,PACA_EXCRIT+EX_R11(r13) - ld r13,PACA_EXCRIT+EX_R13(r13) + mfspr r13,SPRN_SPRG_CRIT_SCRATCH rfci /* Normal debug exception */ @@ -542,7 +527,7 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) /* Now we mash up things to make it look like we are coming on a * normal exception */ - ld r15,PACA_EXCRIT+EX_R13(r13) + mfspr r15,SPRN_SPRG_CRIT_SCRATCH mtspr SPRN_SPRG_GEN_SCRATCH,r15 mfspr r14,SPRN_DBSR EXCEPTION_COMMON(0xd00, PACA_EXCRIT, INTS_DISABLE) diff --git a/arch/powerpc/kernel/vdso.c b/arch/powerpc/kernel/vdso.c index 094e45c16a17..ce74c335a6a4 100644 --- a/arch/powerpc/kernel/vdso.c +++ b/arch/powerpc/kernel/vdso.c @@ -715,8 +715,8 @@ int vdso_getcpu_init(void) unsigned long cpu, node, val; /* - * SPRG3 contains the CPU in the bottom 16 bits and the NUMA node in - * the next 16 bits. The VDSO uses this to implement getcpu(). + * SPRG_VDSO contains the CPU in the bottom 16 bits and the NUMA node + * in the next 16 bits. The VDSO uses this to implement getcpu(). */ cpu = get_cpu(); WARN_ON_ONCE(cpu > 0xffff); @@ -725,8 +725,8 @@ int vdso_getcpu_init(void) WARN_ON_ONCE(node > 0xffff); val = (cpu & 0xfff) | ((node & 0xffff) << 16); - mtspr(SPRN_SPRG3, val); - get_paca()->sprg3 = val; + mtspr(SPRN_SPRG_VDSO_WRITE, val); + get_paca()->sprg_vdso = val; put_cpu(); diff --git a/arch/powerpc/kernel/vdso32/getcpu.S b/arch/powerpc/kernel/vdso32/getcpu.S index 47afd08c90f7..23eb9a9441bd 100644 --- a/arch/powerpc/kernel/vdso32/getcpu.S +++ b/arch/powerpc/kernel/vdso32/getcpu.S @@ -29,7 +29,7 @@ */ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc - mfspr r5,SPRN_USPRG3 + mfspr r5,SPRN_SPRG_VDSO_READ cmpdi cr0,r3,0 cmpdi cr1,r4,0 clrlwi r6,r5,16 diff --git a/arch/powerpc/kernel/vdso64/getcpu.S b/arch/powerpc/kernel/vdso64/getcpu.S index 47afd08c90f7..23eb9a9441bd 100644 --- a/arch/powerpc/kernel/vdso64/getcpu.S +++ b/arch/powerpc/kernel/vdso64/getcpu.S @@ -29,7 +29,7 @@ */ V_FUNCTION_BEGIN(__kernel_getcpu) .cfi_startproc - mfspr r5,SPRN_USPRG3 + mfspr r5,SPRN_SPRG_VDSO_READ cmpdi cr0,r3,0 cmpdi cr1,r4,0 clrlwi r6,r5,16 diff --git a/arch/powerpc/kvm/book3s_hv_rmhandlers.S b/arch/powerpc/kvm/book3s_hv_rmhandlers.S index e66d4ec04d95..fbfca5778b0b 100644 --- a/arch/powerpc/kvm/book3s_hv_rmhandlers.S +++ b/arch/powerpc/kvm/book3s_hv_rmhandlers.S @@ -75,8 +75,8 @@ BEGIN_FTR_SECTION END_FTR_SECTION_IFCLR(CPU_FTR_ARCH_207S) /* Restore SPRG3 */ - ld r3,PACA_SPRG3(r13) - mtspr SPRN_SPRG3,r3 + ld r3,PACA_SPRG_VDSO(r13) + mtspr SPRN_SPRG_VDSO_WRITE,r3 /* Reload the host's PMU registers */ ld r3, PACALPPACAPTR(r13) /* is the host using the PMU? */ diff --git a/arch/powerpc/kvm/book3s_interrupts.S b/arch/powerpc/kvm/book3s_interrupts.S index f779450cb07c..3533c999194a 100644 --- a/arch/powerpc/kvm/book3s_interrupts.S +++ b/arch/powerpc/kvm/book3s_interrupts.S @@ -153,8 +153,8 @@ kvm_start_lightweight: * Reload kernel SPRG3 value. * No need to save guest value as usermode can't modify SPRG3. */ - ld r3, PACA_SPRG3(r13) - mtspr SPRN_SPRG3, r3 + ld r3, PACA_SPRG_VDSO(r13) + mtspr SPRN_SPRG_VDSO_WRITE, r3 #endif /* CONFIG_PPC_BOOK3S_64 */ /* R7 = vcpu */ diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index e4185f6b3309..99635a37c78c 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -229,11 +229,7 @@ stw r10, VCPU_CR(r4) PPC_STL r11, VCPU_GPR(R4)(r4) PPC_STL r5, VCPU_GPR(R5)(r4) - .if \type == EX_CRIT - PPC_LL r5, (\paca_ex + EX_R13)(r13) - .else mfspr r5, \scratch - .endif PPC_STL r6, VCPU_GPR(R6)(r4) PPC_STL r8, VCPU_GPR(R8)(r4) PPC_STL r9, VCPU_GPR(R9)(r4) @@ -435,10 +431,16 @@ _GLOBAL(kvmppc_resume_host) PPC_STL r5, VCPU_LR(r4) mfspr r7, SPRN_SPRG5 stw r3, VCPU_VRSAVE(r4) +#ifdef CONFIG_64BIT + PPC_LL r3, PACA_SPRG_VDSO(r13) +#endif PPC_STD(r6, VCPU_SHARED_SPRG4, r11) mfspr r8, SPRN_SPRG6 PPC_STD(r7, VCPU_SHARED_SPRG5, r11) mfspr r9, SPRN_SPRG7 +#ifdef CONFIG_64BIT + mtspr SPRN_SPRG_VDSO_WRITE, r3 +#endif PPC_STD(r8, VCPU_SHARED_SPRG6, r11) mfxer r3 PPC_STD(r9, VCPU_SHARED_SPRG7, r11) -- cgit v1.2.3 From a3dc620743f44cd509a1ab7b01c33d26fb501c8c Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 10 Mar 2014 17:29:38 -0500 Subject: powerpc/booke64: Use SPRG_TLB_EXFRAME on bolted handlers While bolted handlers (including e6500) do not need to deal with a TLB miss recursively causing another TLB miss, nested TLB misses can still happen with crit/mc/debug exceptions -- so we still need to honor SPRG_TLB_EXFRAME. We don't need to spend time modifying it in the TLB miss fastpath, though -- the special level exception will handle that. Signed-off-by: Scott Wood Cc: Mihai Caraman Cc: kvm-ppc@vger.kernel.org --- arch/powerpc/include/asm/exception-64e.h | 10 ------- arch/powerpc/include/asm/kvm_booke_hv_asm.h | 8 ++++-- arch/powerpc/kvm/bookehv_interrupts.S | 11 ++++++-- arch/powerpc/mm/tlb_low_64e.S | 44 ++++++++++++++++++----------- 4 files changed, 42 insertions(+), 31 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/exception-64e.h b/arch/powerpc/include/asm/exception-64e.h index e73452f09019..a563d9afd179 100644 --- a/arch/powerpc/include/asm/exception-64e.h +++ b/arch/powerpc/include/asm/exception-64e.h @@ -172,16 +172,6 @@ exc_##label##_book3e: ld r9,EX_TLB_R9(r12); \ ld r8,EX_TLB_R8(r12); \ mtlr r16; -#define TLB_MISS_PROLOG_STATS_BOLTED \ - mflr r10; \ - std r8,PACA_EXTLB+EX_TLB_R8(r13); \ - std r9,PACA_EXTLB+EX_TLB_R9(r13); \ - std r10,PACA_EXTLB+EX_TLB_LR(r13); -#define TLB_MISS_RESTORE_STATS_BOLTED \ - ld r16,PACA_EXTLB+EX_TLB_LR(r13); \ - ld r9,PACA_EXTLB+EX_TLB_R9(r13); \ - ld r8,PACA_EXTLB+EX_TLB_R8(r13); \ - mtlr r16; #define TLB_MISS_STATS_D(name) \ addi r9,r13,MMSTAT_DSTATS+name; \ bl .tlb_stat_inc; diff --git a/arch/powerpc/include/asm/kvm_booke_hv_asm.h b/arch/powerpc/include/asm/kvm_booke_hv_asm.h index c3e3fd53a3a9..e5f048bbcb7c 100644 --- a/arch/powerpc/include/asm/kvm_booke_hv_asm.h +++ b/arch/powerpc/include/asm/kvm_booke_hv_asm.h @@ -45,10 +45,12 @@ * * Expected inputs (TLB exception type): * r10 = saved CR + * r12 = extlb pointer * r13 = PACA_POINTER - * *(r13 + PACA_EX##type + EX_TLB_R10) = saved r10 - * *(r13 + PACA_EX##type + EX_TLB_R11) = saved r11 - * SPRN_SPRG_GEN_SCRATCH = saved r13 + * *(r12 + EX_TLB_R10) = saved r10 + * *(r12 + EX_TLB_R11) = saved r11 + * *(r12 + EX_TLB_R13) = saved r13 + * SPRN_SPRG_GEN_SCRATCH = saved r12 * * Only the bolted version of TLB miss exception handlers is supported now. */ diff --git a/arch/powerpc/kvm/bookehv_interrupts.S b/arch/powerpc/kvm/bookehv_interrupts.S index 99635a37c78c..a1712b818a5f 100644 --- a/arch/powerpc/kvm/bookehv_interrupts.S +++ b/arch/powerpc/kvm/bookehv_interrupts.S @@ -229,13 +229,20 @@ stw r10, VCPU_CR(r4) PPC_STL r11, VCPU_GPR(R4)(r4) PPC_STL r5, VCPU_GPR(R5)(r4) - mfspr r5, \scratch PPC_STL r6, VCPU_GPR(R6)(r4) PPC_STL r8, VCPU_GPR(R8)(r4) PPC_STL r9, VCPU_GPR(R9)(r4) - PPC_STL r5, VCPU_GPR(R13)(r4) + .if \type == EX_TLB + PPC_LL r5, EX_TLB_R13(r12) + PPC_LL r6, EX_TLB_R10(r12) + PPC_LL r8, EX_TLB_R11(r12) + mfspr r12, \scratch + .else + mfspr r5, \scratch PPC_LL r6, (\paca_ex + \ex_r10)(r13) PPC_LL r8, (\paca_ex + \ex_r11)(r13) + .endif + PPC_STL r5, VCPU_GPR(R13)(r4) PPC_STL r3, VCPU_GPR(R3)(r4) PPC_STL r7, VCPU_GPR(R7)(r4) PPC_STL r12, VCPU_GPR(R12)(r4) diff --git a/arch/powerpc/mm/tlb_low_64e.S b/arch/powerpc/mm/tlb_low_64e.S index 1e50249e900b..356e8b41fb09 100644 --- a/arch/powerpc/mm/tlb_low_64e.S +++ b/arch/powerpc/mm/tlb_low_64e.S @@ -39,37 +39,49 @@ * * **********************************************************************/ +/* + * Note that, unlike non-bolted handlers, TLB_EXFRAME is not + * modified by the TLB miss handlers themselves, since the TLB miss + * handler code will not itself cause a recursive TLB miss. + * + * TLB_EXFRAME will be modified when crit/mc/debug exceptions are + * entered/exited. + */ .macro tlb_prolog_bolted intnum addr - mtspr SPRN_SPRG_GEN_SCRATCH,r13 + mtspr SPRN_SPRG_GEN_SCRATCH,r12 + mfspr r12,SPRN_SPRG_TLB_EXFRAME + std r13,EX_TLB_R13(r12) + std r10,EX_TLB_R10(r12) mfspr r13,SPRN_SPRG_PACA - std r10,PACA_EXTLB+EX_TLB_R10(r13) + mfcr r10 - std r11,PACA_EXTLB+EX_TLB_R11(r13) + std r11,EX_TLB_R11(r12) #ifdef CONFIG_KVM_BOOKE_HV BEGIN_FTR_SECTION mfspr r11, SPRN_SRR1 END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) #endif DO_KVM \intnum, SPRN_SRR1 - std r16,PACA_EXTLB+EX_TLB_R16(r13) + std r16,EX_TLB_R16(r12) mfspr r16,\addr /* get faulting address */ - std r14,PACA_EXTLB+EX_TLB_R14(r13) + std r14,EX_TLB_R14(r12) ld r14,PACAPGD(r13) - std r15,PACA_EXTLB+EX_TLB_R15(r13) - std r10,PACA_EXTLB+EX_TLB_CR(r13) - TLB_MISS_PROLOG_STATS_BOLTED + std r15,EX_TLB_R15(r12) + std r10,EX_TLB_CR(r12) + TLB_MISS_PROLOG_STATS .endm .macro tlb_epilog_bolted - ld r14,PACA_EXTLB+EX_TLB_CR(r13) - ld r10,PACA_EXTLB+EX_TLB_R10(r13) - ld r11,PACA_EXTLB+EX_TLB_R11(r13) + ld r14,EX_TLB_CR(r12) + ld r10,EX_TLB_R10(r12) + ld r11,EX_TLB_R11(r12) + ld r13,EX_TLB_R13(r12) mtcr r14 - ld r14,PACA_EXTLB+EX_TLB_R14(r13) - ld r15,PACA_EXTLB+EX_TLB_R15(r13) - TLB_MISS_RESTORE_STATS_BOLTED - ld r16,PACA_EXTLB+EX_TLB_R16(r13) - mfspr r13,SPRN_SPRG_GEN_SCRATCH + ld r14,EX_TLB_R14(r12) + ld r15,EX_TLB_R15(r12) + TLB_MISS_RESTORE_STATS + ld r16,EX_TLB_R16(r12) + mfspr r12,SPRN_SPRG_GEN_SCRATCH .endm /* Data TLB miss */ -- cgit v1.2.3 From 609af38f8fc0f1dab993b2c67f90d07f761ea902 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Mon, 10 Mar 2014 17:29:38 -0500 Subject: powerpc/booke64: Critical and machine check exception support Add special state saving for critical and machine check exceptions. Most of this code could be used to handle debug exceptions taken from kernel space, but actually doing so is outside the scope of this patch. The various critical and machine check exceptions now point to their real handlers, rather than hanging the kernel. Signed-off-by: Scott Wood --- arch/powerpc/include/asm/paca.h | 7 +- arch/powerpc/kernel/exceptions-64e.S | 343 +++++++++++++++++++++++++++++------ arch/powerpc/mm/tlb_nohash.c | 11 ++ 3 files changed, 300 insertions(+), 61 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/paca.h b/arch/powerpc/include/asm/paca.h index 948f01a04cc3..8e956a0b6e85 100644 --- a/arch/powerpc/include/asm/paca.h +++ b/arch/powerpc/include/asm/paca.h @@ -116,8 +116,11 @@ struct paca_struct { /* Shared by all threads of a core -- points to tcd of first thread */ struct tlb_core_data *tcd_ptr; - /* We can have up to 3 levels of reentrancy in the TLB miss handler */ - u64 extlb[3][EX_TLB_SIZE / sizeof(u64)]; + /* + * We can have up to 3 levels of reentrancy in the TLB miss handler, + * in each of four exception levels (normal, crit, mcheck, debug). + */ + u64 extlb[12][EX_TLB_SIZE / sizeof(u64)]; u64 exmc[8]; /* used for machine checks */ u64 excrit[8]; /* used for crit interrupts */ u64 exdbg[8]; /* used for debug interrupts */ diff --git a/arch/powerpc/kernel/exceptions-64e.S b/arch/powerpc/kernel/exceptions-64e.S index 2beb5bd0728b..c1bee3ce9d1f 100644 --- a/arch/powerpc/kernel/exceptions-64e.S +++ b/arch/powerpc/kernel/exceptions-64e.S @@ -34,20 +34,250 @@ * special interrupts from within a non-standard level will probably * blow you up */ -#define SPECIAL_EXC_FRAME_SIZE INT_FRAME_SIZE +#define SPECIAL_EXC_SRR0 0 +#define SPECIAL_EXC_SRR1 1 +#define SPECIAL_EXC_SPRG_GEN 2 +#define SPECIAL_EXC_SPRG_TLB 3 +#define SPECIAL_EXC_MAS0 4 +#define SPECIAL_EXC_MAS1 5 +#define SPECIAL_EXC_MAS2 6 +#define SPECIAL_EXC_MAS3 7 +#define SPECIAL_EXC_MAS6 8 +#define SPECIAL_EXC_MAS7 9 +#define SPECIAL_EXC_MAS5 10 /* E.HV only */ +#define SPECIAL_EXC_MAS8 11 /* E.HV only */ +#define SPECIAL_EXC_IRQHAPPENED 12 +#define SPECIAL_EXC_DEAR 13 +#define SPECIAL_EXC_ESR 14 +#define SPECIAL_EXC_SOFTE 15 +#define SPECIAL_EXC_CSRR0 16 +#define SPECIAL_EXC_CSRR1 17 +/* must be even to keep 16-byte stack alignment */ +#define SPECIAL_EXC_END 18 + +#define SPECIAL_EXC_FRAME_SIZE (INT_FRAME_SIZE + SPECIAL_EXC_END * 8) +#define SPECIAL_EXC_FRAME_OFFS (INT_FRAME_SIZE - 288) + +#define SPECIAL_EXC_STORE(reg, name) \ + std reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) + +#define SPECIAL_EXC_LOAD(reg, name) \ + ld reg, (SPECIAL_EXC_##name * 8 + SPECIAL_EXC_FRAME_OFFS)(r1) + +special_reg_save: + lbz r9,PACAIRQHAPPENED(r13) + RECONCILE_IRQ_STATE(r3,r4) -/* Now we only store something to exception thread info */ -#define EXC_LEVEL_EXCEPTION_PROLOG(type) \ - ld r14,PACAKSAVE(r13); \ - CURRENT_THREAD_INFO(r14, r14); \ - CURRENT_THREAD_INFO(r15, r1); \ - ld r10,TI_FLAGS(r14); \ - std r10,TI_FLAGS(r15); \ - ld r10,TI_PREEMPT(r14); \ - std r10,TI_PREEMPT(r15); \ - ld r10,TI_TASK(r14); \ - std r10,TI_TASK(r15); + /* + * We only need (or have stack space) to save this stuff if + * we interrupted the kernel. + */ + ld r3,_MSR(r1) + andi. r3,r3,MSR_PR + bnelr + + /* Copy info into temporary exception thread info */ + ld r11,PACAKSAVE(r13) + CURRENT_THREAD_INFO(r11, r11) + CURRENT_THREAD_INFO(r12, r1) + ld r10,TI_FLAGS(r11) + std r10,TI_FLAGS(r12) + ld r10,TI_PREEMPT(r11) + std r10,TI_PREEMPT(r12) + ld r10,TI_TASK(r11) + std r10,TI_TASK(r12) + + /* + * Advance to the next TLB exception frame for handler + * types that don't do it automatically. + */ + LOAD_REG_ADDR(r11,extlb_level_exc) + lwz r12,0(r11) + mfspr r10,SPRN_SPRG_TLB_EXFRAME + add r10,r10,r12 + mtspr SPRN_SPRG_TLB_EXFRAME,r10 + + /* + * Save registers needed to allow nesting of certain exceptions + * (such as TLB misses) inside special exception levels + */ + mfspr r10,SPRN_SRR0 + SPECIAL_EXC_STORE(r10,SRR0) + mfspr r10,SPRN_SRR1 + SPECIAL_EXC_STORE(r10,SRR1) + mfspr r10,SPRN_SPRG_GEN_SCRATCH + SPECIAL_EXC_STORE(r10,SPRG_GEN) + mfspr r10,SPRN_SPRG_TLB_SCRATCH + SPECIAL_EXC_STORE(r10,SPRG_TLB) + mfspr r10,SPRN_MAS0 + SPECIAL_EXC_STORE(r10,MAS0) + mfspr r10,SPRN_MAS1 + SPECIAL_EXC_STORE(r10,MAS1) + mfspr r10,SPRN_MAS2 + SPECIAL_EXC_STORE(r10,MAS2) + mfspr r10,SPRN_MAS3 + SPECIAL_EXC_STORE(r10,MAS3) + mfspr r10,SPRN_MAS6 + SPECIAL_EXC_STORE(r10,MAS6) + mfspr r10,SPRN_MAS7 + SPECIAL_EXC_STORE(r10,MAS7) +BEGIN_FTR_SECTION + mfspr r10,SPRN_MAS5 + SPECIAL_EXC_STORE(r10,MAS5) + mfspr r10,SPRN_MAS8 + SPECIAL_EXC_STORE(r10,MAS8) + + /* MAS5/8 could have inappropriate values if we interrupted KVM code */ + li r10,0 + mtspr SPRN_MAS5,r10 + mtspr SPRN_MAS8,r10 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) + SPECIAL_EXC_STORE(r9,IRQHAPPENED) + + mfspr r10,SPRN_DEAR + SPECIAL_EXC_STORE(r10,DEAR) + mfspr r10,SPRN_ESR + SPECIAL_EXC_STORE(r10,ESR) + + lbz r10,PACASOFTIRQEN(r13) + SPECIAL_EXC_STORE(r10,SOFTE) + ld r10,_NIP(r1) + SPECIAL_EXC_STORE(r10,CSRR0) + ld r10,_MSR(r1) + SPECIAL_EXC_STORE(r10,CSRR1) + + blr + +ret_from_level_except: + ld r3,_MSR(r1) + andi. r3,r3,MSR_PR + beq 1f + b ret_from_except +1: + + LOAD_REG_ADDR(r11,extlb_level_exc) + lwz r12,0(r11) + mfspr r10,SPRN_SPRG_TLB_EXFRAME + sub r10,r10,r12 + mtspr SPRN_SPRG_TLB_EXFRAME,r10 + + /* + * It's possible that the special level exception interrupted a + * TLB miss handler, and inserted the same entry that the + * interrupted handler was about to insert. On CPUs without TLB + * write conditional, this can result in a duplicate TLB entry. + * Wipe all non-bolted entries to be safe. + * + * Note that this doesn't protect against any TLB misses + * we may take accessing the stack from here to the end of + * the special level exception. It's not clear how we can + * reasonably protect against that, but only CPUs with + * neither TLB write conditional nor bolted kernel memory + * are affected. Do any such CPUs even exist? + */ + PPC_TLBILX_ALL(0,R0) + + REST_NVGPRS(r1) + + SPECIAL_EXC_LOAD(r10,SRR0) + mtspr SPRN_SRR0,r10 + SPECIAL_EXC_LOAD(r10,SRR1) + mtspr SPRN_SRR1,r10 + SPECIAL_EXC_LOAD(r10,SPRG_GEN) + mtspr SPRN_SPRG_GEN_SCRATCH,r10 + SPECIAL_EXC_LOAD(r10,SPRG_TLB) + mtspr SPRN_SPRG_TLB_SCRATCH,r10 + SPECIAL_EXC_LOAD(r10,MAS0) + mtspr SPRN_MAS0,r10 + SPECIAL_EXC_LOAD(r10,MAS1) + mtspr SPRN_MAS1,r10 + SPECIAL_EXC_LOAD(r10,MAS2) + mtspr SPRN_MAS2,r10 + SPECIAL_EXC_LOAD(r10,MAS3) + mtspr SPRN_MAS3,r10 + SPECIAL_EXC_LOAD(r10,MAS6) + mtspr SPRN_MAS6,r10 + SPECIAL_EXC_LOAD(r10,MAS7) + mtspr SPRN_MAS7,r10 +BEGIN_FTR_SECTION + SPECIAL_EXC_LOAD(r10,MAS5) + mtspr SPRN_MAS5,r10 + SPECIAL_EXC_LOAD(r10,MAS8) + mtspr SPRN_MAS8,r10 +END_FTR_SECTION_IFSET(CPU_FTR_EMB_HV) + + lbz r6,PACASOFTIRQEN(r13) + ld r5,SOFTE(r1) + + /* Interrupts had better not already be enabled... */ + twnei r6,0 + + cmpwi cr0,r5,0 + beq 1f + TRACE_ENABLE_INTS + stb r5,PACASOFTIRQEN(r13) +1: + /* + * Restore PACAIRQHAPPENED rather than setting it based on + * the return MSR[EE], since we could have interrupted + * __check_irq_replay() or other inconsistent transitory + * states that must remain that way. + */ + SPECIAL_EXC_LOAD(r10,IRQHAPPENED) + stb r10,PACAIRQHAPPENED(r13) + + SPECIAL_EXC_LOAD(r10,DEAR) + mtspr SPRN_DEAR,r10 + SPECIAL_EXC_LOAD(r10,ESR) + mtspr SPRN_ESR,r10 + + stdcx. r0,0,r1 /* to clear the reservation */ + + REST_4GPRS(2, r1) + REST_4GPRS(6, r1) + + ld r10,_CTR(r1) + ld r11,_XER(r1) + mtctr r10 + mtxer r11 + + blr + +.macro ret_from_level srr0 srr1 paca_ex scratch + bl ret_from_level_except + + ld r10,_LINK(r1) + ld r11,_CCR(r1) + ld r0,GPR13(r1) + mtlr r10 + mtcr r11 + + ld r10,GPR10(r1) + ld r11,GPR11(r1) + ld r12,GPR12(r1) + mtspr \scratch,r0 + + std r10,\paca_ex+EX_R10(r13); + std r11,\paca_ex+EX_R11(r13); + ld r10,_NIP(r1) + ld r11,_MSR(r1) + ld r0,GPR0(r1) + ld r1,GPR1(r1) + mtspr \srr0,r10 + mtspr \srr1,r11 + ld r10,\paca_ex+EX_R10(r13) + ld r11,\paca_ex+EX_R11(r13) + mfspr r13,\scratch +.endm + +ret_from_crit_except: + ret_from_level SPRN_CSRR0 SPRN_CSRR1 PACA_EXCRIT SPRN_SPRG_CRIT_SCRATCH + rfci + +ret_from_mc_except: + ret_from_level SPRN_MCSRR0 SPRN_MCSRR1 PACA_EXMC SPRN_SPRG_MC_SCRATCH + rfmci /* Exception prolog code for all exceptions */ #define EXCEPTION_PROLOG(n, intnum, type, addition) \ @@ -81,22 +311,19 @@ #define CRIT_SET_KSTACK \ ld r1,PACA_CRIT_STACK(r13); \ - subi r1,r1,SPECIAL_EXC_FRAME_SIZE; \ - EXC_LEVEL_EXCEPTION_PROLOG(CRIT); + subi r1,r1,SPECIAL_EXC_FRAME_SIZE #define SPRN_CRIT_SRR0 SPRN_CSRR0 #define SPRN_CRIT_SRR1 SPRN_CSRR1 #define DBG_SET_KSTACK \ ld r1,PACA_DBG_STACK(r13); \ - subi r1,r1,SPECIAL_EXC_FRAME_SIZE; \ - EXC_LEVEL_EXCEPTION_PROLOG(DBG); + subi r1,r1,SPECIAL_EXC_FRAME_SIZE #define SPRN_DBG_SRR0 SPRN_DSRR0 #define SPRN_DBG_SRR1 SPRN_DSRR1 #define MC_SET_KSTACK \ ld r1,PACA_MC_STACK(r13); \ - subi r1,r1,SPECIAL_EXC_FRAME_SIZE; \ - EXC_LEVEL_EXCEPTION_PROLOG(MC); + subi r1,r1,SPECIAL_EXC_FRAME_SIZE #define SPRN_MC_SRR0 SPRN_MCSRR0 #define SPRN_MC_SRR1 SPRN_MCSRR1 @@ -322,27 +549,25 @@ interrupt_end_book3e: START_EXCEPTION(critical_input); CRIT_EXCEPTION_PROLOG(0x100, BOOKE_INTERRUPT_CRITICAL, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON_CRIT(0x100) -// INTS_DISABLE -// bl special_reg_save_crit -// CHECK_NAPPING(); -// addi r3,r1,STACK_FRAME_OVERHEAD -// bl .critical_exception -// b ret_from_crit_except - b . + EXCEPTION_COMMON_CRIT(0x100) + bl .save_nvgprs + bl special_reg_save + CHECK_NAPPING(); + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unknown_exception + b ret_from_crit_except /* Machine Check Interrupt */ START_EXCEPTION(machine_check); MC_EXCEPTION_PROLOG(0x000, BOOKE_INTERRUPT_MACHINE_CHECK, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON_MC(0x000) -// INTS_DISABLE -// bl special_reg_save_mc -// addi r3,r1,STACK_FRAME_OVERHEAD -// CHECK_NAPPING(); -// bl .machine_check_exception -// b ret_from_mc_except - b . + EXCEPTION_COMMON_MC(0x000) + bl .save_nvgprs + bl special_reg_save + CHECK_NAPPING(); + addi r3,r1,STACK_FRAME_OVERHEAD + bl .machine_check_exception + b ret_from_mc_except /* Data Storage Interrupt */ START_EXCEPTION(data_storage) @@ -461,14 +686,17 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) START_EXCEPTION(watchdog); CRIT_EXCEPTION_PROLOG(0x9f0, BOOKE_INTERRUPT_WATCHDOG, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON_CRIT(0x9f0) -// INTS_DISABLE -// bl special_reg_save_crit -// CHECK_NAPPING(); -// addi r3,r1,STACK_FRAME_OVERHEAD -// bl .unknown_exception -// b ret_from_crit_except - b . + EXCEPTION_COMMON_CRIT(0x9f0) + bl .save_nvgprs + bl special_reg_save + CHECK_NAPPING(); + addi r3,r1,STACK_FRAME_OVERHEAD +#ifdef CONFIG_BOOKE_WDT + bl .WatchdogException +#else + bl .unknown_exception +#endif + b ret_from_crit_except /* System Call Interrupt */ START_EXCEPTION(system_call) @@ -541,7 +769,6 @@ END_FTR_SECTION_IFSET(CPU_FTR_ALTIVEC) */ mfspr r14,SPRN_DBSR EXCEPTION_COMMON_CRIT(0xd00) - INTS_DISABLE std r14,_DSISR(r1) addi r3,r1,STACK_FRAME_OVERHEAD mr r4,r14 @@ -634,14 +861,13 @@ kernel_dbg_exc: START_EXCEPTION(doorbell_crit); CRIT_EXCEPTION_PROLOG(0x2a0, BOOKE_INTERRUPT_DOORBELL_CRITICAL, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON_CRIT(0x2a0) -// INTS_DISABLE -// bl special_reg_save_crit -// CHECK_NAPPING(); -// addi r3,r1,STACK_FRAME_OVERHEAD -// bl .doorbell_critical_exception -// b ret_from_crit_except - b . + EXCEPTION_COMMON_CRIT(0x2a0) + bl .save_nvgprs + bl special_reg_save + CHECK_NAPPING(); + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unknown_exception + b ret_from_crit_except /* * Guest doorbell interrupt @@ -661,14 +887,13 @@ kernel_dbg_exc: START_EXCEPTION(guest_doorbell_crit); CRIT_EXCEPTION_PROLOG(0x2e0, BOOKE_INTERRUPT_GUEST_DBELL_CRIT, PROLOG_ADDITION_NONE) -// EXCEPTION_COMMON_CRIT(0x2e0) -// INTS_DISABLE -// bl special_reg_save_crit -// CHECK_NAPPING(); -// addi r3,r1,STACK_FRAME_OVERHEAD -// bl .guest_doorbell_critical_exception -// b ret_from_crit_except - b . + EXCEPTION_COMMON_CRIT(0x2e0) + bl .save_nvgprs + bl special_reg_save + CHECK_NAPPING(); + addi r3,r1,STACK_FRAME_OVERHEAD + bl .unknown_exception + b ret_from_crit_except /* Hypervisor call */ START_EXCEPTION(hypercall); diff --git a/arch/powerpc/mm/tlb_nohash.c b/arch/powerpc/mm/tlb_nohash.c index b37a58e1c92d..ae3d5b799b90 100644 --- a/arch/powerpc/mm/tlb_nohash.c +++ b/arch/powerpc/mm/tlb_nohash.c @@ -144,6 +144,15 @@ int mmu_vmemmap_psize; /* Page size used for the virtual mem map */ int book3e_htw_mode; /* HW tablewalk? Value is PPC_HTW_* */ unsigned long linear_map_top; /* Top of linear mapping */ + +/* + * Number of bytes to add to SPRN_SPRG_TLB_EXFRAME on crit/mcheck/debug + * exceptions. This is used for bolted and e6500 TLB miss handlers which + * do not modify this SPRG in the TLB miss code; for other TLB miss handlers, + * this is set to zero. + */ +int extlb_level_exc; + #endif /* CONFIG_PPC64 */ #ifdef CONFIG_PPC_FSL_BOOK3E @@ -559,6 +568,7 @@ static void setup_mmu_htw(void) break; #ifdef CONFIG_PPC_FSL_BOOK3E case PPC_HTW_E6500: + extlb_level_exc = EX_TLB_SIZE; patch_exception(0x1c0, exc_data_tlb_miss_e6500_book3e); patch_exception(0x1e0, exc_instruction_tlb_miss_e6500_book3e); break; @@ -652,6 +662,7 @@ static void __early_init_mmu(int boot_cpu) memblock_enforce_memory_limit(linear_map_top); if (book3e_htw_mode == PPC_HTW_NONE) { + extlb_level_exc = EX_TLB_SIZE; patch_exception(0x1c0, exc_data_tlb_miss_bolted_book3e); patch_exception(0x1e0, exc_instruction_tlb_miss_bolted_book3e); -- cgit v1.2.3 From 8d724823220862ce289be3b50119235e03537597 Mon Sep 17 00:00:00 2001 From: Neelesh Gupta Date: Fri, 7 Mar 2014 11:00:24 +0530 Subject: powerpc/powernv: Infrastructure to support OPAL async completion This patch adds support for notifying the clients of their request completion. Clients request for the token before making OPAL call and then wait for the response. This patch uses messaging infrastructure to pull the data to linux by registering itself for the message type OPAL_MSG_ASYNC_COMP. Signed-off-by: Neelesh Gupta Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 12 +- arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/opal-async.c | 203 ++++++++++++++++++++++++++++ 3 files changed, 215 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/opal-async.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 2636acfcd340..1e186ff55f3e 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -83,6 +83,8 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_INTERNAL_ERROR -11 #define OPAL_BUSY_EVENT -12 #define OPAL_HARDWARE_FROZEN -13 +#define OPAL_WRONG_STATE -14 +#define OPAL_ASYNC_COMPLETION -15 /* API Tokens (in r0) */ #define OPAL_CONSOLE_WRITE 1 @@ -253,7 +255,9 @@ enum OpalPendingState { }; enum OpalMessageType { - OPAL_MSG_ASYNC_COMP = 0, + OPAL_MSG_ASYNC_COMP = 0, /* params[0] = token, params[1] = rc, + * additional params function-specific + */ OPAL_MSG_MEM_ERR, OPAL_MSG_EPOW, OPAL_MSG_SHUTDOWN, @@ -880,6 +884,12 @@ extern void opal_notifier_update_evt(uint64_t evt_mask, uint64_t evt_val); extern int opal_get_chars(uint32_t vtermno, char *buf, int count); extern int opal_put_chars(uint32_t vtermno, const char *buf, int total_len); +extern int __opal_async_get_token(void); +extern int opal_async_get_token_interruptible(void); +extern int __opal_async_release_token(int token); +extern int opal_async_release_token(int token); +extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg); + extern void hvc_opal_init_early(void); struct rtc_time; diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 5125caeb40f4..3f9309c4218c 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,4 +1,4 @@ -obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o +obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o obj-y += rng.o opal-elog.o opal-dump.o diff --git a/arch/powerpc/platforms/powernv/opal-async.c b/arch/powerpc/platforms/powernv/opal-async.c new file mode 100644 index 000000000000..cd0c1354d404 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-async.c @@ -0,0 +1,203 @@ +/* + * PowerNV OPAL asynchronous completion interfaces + * + * Copyright 2013 IBM Corp. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#undef DEBUG + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define N_ASYNC_COMPLETIONS 64 + +static DECLARE_BITMAP(opal_async_complete_map, N_ASYNC_COMPLETIONS) = {~0UL}; +static DECLARE_BITMAP(opal_async_token_map, N_ASYNC_COMPLETIONS); +static DECLARE_WAIT_QUEUE_HEAD(opal_async_wait); +static DEFINE_SPINLOCK(opal_async_comp_lock); +static struct semaphore opal_async_sem; +static struct opal_msg *opal_async_responses; +static unsigned int opal_max_async_tokens; + +int __opal_async_get_token(void) +{ + unsigned long flags; + int token; + + spin_lock_irqsave(&opal_async_comp_lock, flags); + token = find_first_bit(opal_async_complete_map, opal_max_async_tokens); + if (token >= opal_max_async_tokens) { + token = -EBUSY; + goto out; + } + + if (__test_and_set_bit(token, opal_async_token_map)) { + token = -EBUSY; + goto out; + } + + __clear_bit(token, opal_async_complete_map); + +out: + spin_unlock_irqrestore(&opal_async_comp_lock, flags); + return token; +} + +int opal_async_get_token_interruptible(void) +{ + int token; + + /* Wait until a token is available */ + if (down_interruptible(&opal_async_sem)) + return -ERESTARTSYS; + + token = __opal_async_get_token(); + if (token < 0) + up(&opal_async_sem); + + return token; +} + +int __opal_async_release_token(int token) +{ + unsigned long flags; + + if (token < 0 || token >= opal_max_async_tokens) { + pr_err("%s: Passed token is out of range, token %d\n", + __func__, token); + return -EINVAL; + } + + spin_lock_irqsave(&opal_async_comp_lock, flags); + __set_bit(token, opal_async_complete_map); + __clear_bit(token, opal_async_token_map); + spin_unlock_irqrestore(&opal_async_comp_lock, flags); + + return 0; +} + +int opal_async_release_token(int token) +{ + int ret; + + ret = __opal_async_release_token(token); + if (ret) + return ret; + + up(&opal_async_sem); + + return 0; +} + +int opal_async_wait_response(uint64_t token, struct opal_msg *msg) +{ + if (token >= opal_max_async_tokens) { + pr_err("%s: Invalid token passed\n", __func__); + return -EINVAL; + } + + if (!msg) { + pr_err("%s: Invalid message pointer passed\n", __func__); + return -EINVAL; + } + + wait_event(opal_async_wait, test_bit(token, opal_async_complete_map)); + memcpy(msg, &opal_async_responses[token], sizeof(*msg)); + + return 0; +} + +static int opal_async_comp_event(struct notifier_block *nb, + unsigned long msg_type, void *msg) +{ + struct opal_msg *comp_msg = msg; + unsigned long flags; + + if (msg_type != OPAL_MSG_ASYNC_COMP) + return 0; + + memcpy(&opal_async_responses[comp_msg->params[0]], comp_msg, + sizeof(*comp_msg)); + spin_lock_irqsave(&opal_async_comp_lock, flags); + __set_bit(comp_msg->params[0], opal_async_complete_map); + spin_unlock_irqrestore(&opal_async_comp_lock, flags); + + wake_up(&opal_async_wait); + + return 0; +} + +static struct notifier_block opal_async_comp_nb = { + .notifier_call = opal_async_comp_event, + .next = NULL, + .priority = 0, +}; + +static int __init opal_async_comp_init(void) +{ + struct device_node *opal_node; + const __be32 *async; + int err; + + opal_node = of_find_node_by_path("/ibm,opal"); + if (!opal_node) { + pr_err("%s: Opal node not found\n", __func__); + err = -ENOENT; + goto out; + } + + async = of_get_property(opal_node, "opal-msg-async-num", NULL); + if (!async) { + pr_err("%s: %s has no opal-msg-async-num\n", + __func__, opal_node->full_name); + err = -ENOENT; + goto out_opal_node; + } + + opal_max_async_tokens = be32_to_cpup(async); + if (opal_max_async_tokens > N_ASYNC_COMPLETIONS) + opal_max_async_tokens = N_ASYNC_COMPLETIONS; + + err = opal_message_notifier_register(OPAL_MSG_ASYNC_COMP, + &opal_async_comp_nb); + if (err) { + pr_err("%s: Can't register OPAL event notifier (%d)\n", + __func__, err); + goto out_opal_node; + } + + opal_async_responses = kzalloc( + sizeof(*opal_async_responses) * opal_max_async_tokens, + GFP_KERNEL); + if (!opal_async_responses) { + pr_err("%s: Out of memory, failed to do asynchronous " + "completion init\n", __func__); + err = -ENOMEM; + goto out_opal_node; + } + + /* Initialize to 1 less than the maximum tokens available, as we may + * require to pop one during emergency through synchronous call to + * __opal_async_get_token() + */ + sema_init(&opal_async_sem, opal_max_async_tokens - 1); + +out_opal_node: + of_node_put(opal_node); +out: + return err; +} +subsys_initcall(opal_async_comp_init); -- cgit v1.2.3 From 4029cd66545f0a45258eda5313b7559bfeaaaae4 Mon Sep 17 00:00:00 2001 From: Neelesh Gupta Date: Fri, 7 Mar 2014 11:02:09 +0530 Subject: powerpc/powernv: Enable reading and updating of system parameters This patch enables reading and updating of system parameters through OPAL call. Signed-off-by: Neelesh Gupta Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 16 +- arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/opal-sysparam.c | 290 +++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 2 + arch/powerpc/platforms/powernv/opal.c | 2 + 5 files changed, 310 insertions(+), 2 deletions(-) create mode 100644 arch/powerpc/platforms/powernv/opal-sysparam.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index 1e186ff55f3e..d239223279aa 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -167,8 +167,10 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_DUMP_ACK 84 #define OPAL_GET_MSG 85 #define OPAL_CHECK_ASYNC_COMPLETION 86 -#define OPAL_DUMP_RESEND 91 #define OPAL_SYNC_HOST_REBOOT 87 +#define OPAL_GET_PARAM 89 +#define OPAL_SET_PARAM 90 +#define OPAL_DUMP_RESEND 91 #define OPAL_DUMP_INFO2 94 #ifndef __ASSEMBLY__ @@ -410,6 +412,13 @@ enum OpalLPCAddressType { OPAL_LPC_FW = 2, }; +/* System parameter permission */ +enum OpalSysparamPerm { + OPAL_SYSPARAM_READ = 0x1, + OPAL_SYSPARAM_WRITE = 0x2, + OPAL_SYSPARAM_RW = (OPAL_SYSPARAM_READ | OPAL_SYSPARAM_WRITE), +}; + struct opal_msg { uint32_t msg_type; uint32_t reserved; @@ -859,6 +868,10 @@ int64_t opal_dump_resend_notification(void); int64_t opal_get_msg(uint64_t buffer, size_t size); int64_t opal_check_completion(uint64_t buffer, size_t size, uint64_t token); int64_t opal_sync_host_reboot(void); +int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer, + size_t length); +int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer, + size_t length); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); @@ -900,6 +913,7 @@ extern void opal_nvram_init(void); extern void opal_flash_init(void); extern int opal_elog_init(void); extern void opal_platform_dump_init(void); +extern void opal_sys_param_init(void); extern int opal_machine_check(struct pt_regs *regs); extern bool opal_mce_check_early_recovery(struct pt_regs *regs); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 3f9309c4218c..760b49948f76 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,6 +1,6 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o -obj-y += rng.o opal-elog.o opal-dump.o +obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o diff --git a/arch/powerpc/platforms/powernv/opal-sysparam.c b/arch/powerpc/platforms/powernv/opal-sysparam.c new file mode 100644 index 000000000000..0bd249a26f30 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-sysparam.c @@ -0,0 +1,290 @@ +/* + * PowerNV system parameter code + * + * Copyright (C) 2013 IBM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include +#include +#include +#include +#include + +#define MAX_PARAM_DATA_LEN 64 + +static DEFINE_MUTEX(opal_sysparam_mutex); +static struct kobject *sysparam_kobj; +static void *param_data_buf; + +struct param_attr { + struct list_head list; + u32 param_id; + u32 param_size; + struct kobj_attribute kobj_attr; +}; + +static int opal_get_sys_param(u32 param_id, u32 length, void *buffer) +{ + struct opal_msg msg; + int ret, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("%s: Couldn't get the token, returning\n", + __func__); + ret = token; + goto out; + } + + ret = opal_get_param(token, param_id, (u64)buffer, length); + if (ret != OPAL_ASYNC_COMPLETION) + goto out_token; + + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_err("%s: Failed to wait for the async response, %d\n", + __func__, ret); + goto out_token; + } + + ret = msg.params[1]; + +out_token: + opal_async_release_token(token); +out: + return ret; +} + +static int opal_set_sys_param(u32 param_id, u32 length, void *buffer) +{ + struct opal_msg msg; + int ret, token; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("%s: Couldn't get the token, returning\n", + __func__); + ret = token; + goto out; + } + + ret = opal_set_param(token, param_id, (u64)buffer, length); + + if (ret != OPAL_ASYNC_COMPLETION) + goto out_token; + + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_err("%s: Failed to wait for the async response, %d\n", + __func__, ret); + goto out_token; + } + + ret = msg.params[1]; + +out_token: + opal_async_release_token(token); +out: + return ret; +} + +static ssize_t sys_param_show(struct kobject *kobj, + struct kobj_attribute *kobj_attr, char *buf) +{ + struct param_attr *attr = container_of(kobj_attr, struct param_attr, + kobj_attr); + int ret; + + mutex_lock(&opal_sysparam_mutex); + ret = opal_get_sys_param(attr->param_id, attr->param_size, + param_data_buf); + if (ret) + goto out; + + memcpy(buf, param_data_buf, attr->param_size); + +out: + mutex_unlock(&opal_sysparam_mutex); + return ret ? ret : attr->param_size; +} + +static ssize_t sys_param_store(struct kobject *kobj, + struct kobj_attribute *kobj_attr, const char *buf, size_t count) +{ + struct param_attr *attr = container_of(kobj_attr, struct param_attr, + kobj_attr); + int ret; + + mutex_lock(&opal_sysparam_mutex); + memcpy(param_data_buf, buf, count); + ret = opal_set_sys_param(attr->param_id, attr->param_size, + param_data_buf); + mutex_unlock(&opal_sysparam_mutex); + return ret ? ret : count; +} + +void __init opal_sys_param_init(void) +{ + struct device_node *sysparam; + struct param_attr *attr; + u32 *id, *size; + int count, i; + u8 *perm; + + if (!opal_kobj) { + pr_warn("SYSPARAM: opal kobject is not available\n"); + goto out; + } + + sysparam_kobj = kobject_create_and_add("sysparams", opal_kobj); + if (!sysparam_kobj) { + pr_err("SYSPARAM: Failed to create sysparam kobject\n"); + goto out; + } + + /* Allocate big enough buffer for any get/set transactions */ + param_data_buf = kzalloc(MAX_PARAM_DATA_LEN, GFP_KERNEL); + if (!param_data_buf) { + pr_err("SYSPARAM: Failed to allocate memory for param data " + "buf\n"); + goto out_kobj_put; + } + + sysparam = of_find_node_by_path("/ibm,opal/sysparams"); + if (!sysparam) { + pr_err("SYSPARAM: Opal sysparam node not found\n"); + goto out_param_buf; + } + + if (!of_device_is_compatible(sysparam, "ibm,opal-sysparams")) { + pr_err("SYSPARAM: Opal sysparam node not compatible\n"); + goto out_node_put; + } + + /* Number of parameters exposed through DT */ + count = of_property_count_strings(sysparam, "param-name"); + if (count < 0) { + pr_err("SYSPARAM: No string found of property param-name in " + "the node %s\n", sysparam->name); + goto out_node_put; + } + + id = kzalloc(sizeof(*id) * count, GFP_KERNEL); + if (!id) { + pr_err("SYSPARAM: Failed to allocate memory to read parameter " + "id\n"); + goto out_node_put; + } + + size = kzalloc(sizeof(*size) * count, GFP_KERNEL); + if (!size) { + pr_err("SYSPARAM: Failed to allocate memory to read parameter " + "size\n"); + goto out_free_id; + } + + perm = kzalloc(sizeof(*perm) * count, GFP_KERNEL); + if (!perm) { + pr_err("SYSPARAM: Failed to allocate memory to read supported " + "action on the parameter"); + goto out_free_size; + } + + if (of_property_read_u32_array(sysparam, "param-id", id, count)) { + pr_err("SYSPARAM: Missing property param-id in the DT\n"); + goto out_free_perm; + } + + if (of_property_read_u32_array(sysparam, "param-len", size, count)) { + pr_err("SYSPARAM: Missing propery param-len in the DT\n"); + goto out_free_perm; + } + + + if (of_property_read_u8_array(sysparam, "param-perm", perm, count)) { + pr_err("SYSPARAM: Missing propery param-perm in the DT\n"); + goto out_free_perm; + } + + attr = kzalloc(sizeof(*attr) * count, GFP_KERNEL); + if (!attr) { + pr_err("SYSPARAM: Failed to allocate memory for parameter " + "attributes\n"); + goto out_free_perm; + } + + /* For each of the parameters, populate the parameter attributes */ + for (i = 0; i < count; i++) { + sysfs_attr_init(&attr[i].kobj_attr.attr); + attr[i].param_id = id[i]; + attr[i].param_size = size[i]; + if (of_property_read_string_index(sysparam, "param-name", i, + &attr[i].kobj_attr.attr.name)) + continue; + + /* If the parameter is read-only or read-write */ + switch (perm[i] & 3) { + case OPAL_SYSPARAM_READ: + attr[i].kobj_attr.attr.mode = S_IRUGO; + break; + case OPAL_SYSPARAM_WRITE: + attr[i].kobj_attr.attr.mode = S_IWUGO; + break; + case OPAL_SYSPARAM_RW: + attr[i].kobj_attr.attr.mode = S_IRUGO | S_IWUGO; + break; + default: + break; + } + + attr[i].kobj_attr.show = sys_param_show; + attr[i].kobj_attr.store = sys_param_store; + + if (sysfs_create_file(sysparam_kobj, &attr[i].kobj_attr.attr)) { + pr_err("SYSPARAM: Failed to create sysfs file %s\n", + attr[i].kobj_attr.attr.name); + goto out_free_attr; + } + } + + kfree(perm); + kfree(size); + kfree(id); + of_node_put(sysparam); + return; + +out_free_attr: + kfree(attr); +out_free_perm: + kfree(perm); +out_free_size: + kfree(size); +out_free_id: + kfree(id); +out_node_put: + of_node_put(sysparam); +out_param_buf: + kfree(param_data_buf); +out_kobj_put: + kobject_put(sysparam_kobj); +out: + return; +} diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index 47ec3f738062..b17f2d8c4073 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -140,3 +140,5 @@ OPAL_CALL(opal_get_msg, OPAL_GET_MSG); OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION); OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND); OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); +OPAL_CALL(opal_get_param, OPAL_GET_PARAM); +OPAL_CALL(opal_set_param, OPAL_SET_PARAM); diff --git a/arch/powerpc/platforms/powernv/opal.c b/arch/powerpc/platforms/powernv/opal.c index 2e269c27dd23..e92f2f67640f 100644 --- a/arch/powerpc/platforms/powernv/opal.c +++ b/arch/powerpc/platforms/powernv/opal.c @@ -572,6 +572,8 @@ static int __init opal_init(void) opal_flash_init(); /* Setup platform dump extract interface */ opal_platform_dump_init(); + /* Setup system parameters interface */ + opal_sys_param_init(); } return 0; -- cgit v1.2.3 From 7224adbbb80329d1a3ec5aa98213b50365fcd246 Mon Sep 17 00:00:00 2001 From: Neelesh Gupta Date: Fri, 7 Mar 2014 11:03:27 +0530 Subject: powerpc/powernv: Enable fetching of platform sensor data This patch enables fetching of various platform sensor data through OPAL and expects a sensor handle from the driver to pass to OPAL. Signed-off-by: Neelesh Gupta Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/opal.h | 4 ++ arch/powerpc/platforms/powernv/Makefile | 2 +- arch/powerpc/platforms/powernv/opal-sensor.c | 64 ++++++++++++++++++++++++++ arch/powerpc/platforms/powernv/opal-wrappers.S | 1 + 4 files changed, 70 insertions(+), 1 deletion(-) create mode 100644 arch/powerpc/platforms/powernv/opal-sensor.c (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/opal.h b/arch/powerpc/include/asm/opal.h index d239223279aa..ffafab037ba8 100644 --- a/arch/powerpc/include/asm/opal.h +++ b/arch/powerpc/include/asm/opal.h @@ -168,6 +168,7 @@ extern int opal_enter_rtas(struct rtas_args *args, #define OPAL_GET_MSG 85 #define OPAL_CHECK_ASYNC_COMPLETION 86 #define OPAL_SYNC_HOST_REBOOT 87 +#define OPAL_SENSOR_READ 88 #define OPAL_GET_PARAM 89 #define OPAL_SET_PARAM 90 #define OPAL_DUMP_RESEND 91 @@ -872,6 +873,8 @@ int64_t opal_get_param(uint64_t token, uint32_t param_id, uint64_t buffer, size_t length); int64_t opal_set_param(uint64_t token, uint32_t param_id, uint64_t buffer, size_t length); +int64_t opal_sensor_read(uint32_t sensor_hndl, int token, + uint32_t *sensor_data); /* Internal functions */ extern int early_init_dt_scan_opal(unsigned long node, const char *uname, int depth, void *data); @@ -902,6 +905,7 @@ extern int opal_async_get_token_interruptible(void); extern int __opal_async_release_token(int token); extern int opal_async_release_token(int token); extern int opal_async_wait_response(uint64_t token, struct opal_msg *msg); +extern int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data); extern void hvc_opal_init_early(void); diff --git a/arch/powerpc/platforms/powernv/Makefile b/arch/powerpc/platforms/powernv/Makefile index 760b49948f76..f324ea099503 100644 --- a/arch/powerpc/platforms/powernv/Makefile +++ b/arch/powerpc/platforms/powernv/Makefile @@ -1,6 +1,6 @@ obj-y += setup.o opal-takeover.o opal-wrappers.o opal.o opal-async.o obj-y += opal-rtc.o opal-nvram.o opal-lpc.o opal-flash.o -obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o +obj-y += rng.o opal-elog.o opal-dump.o opal-sysparam.o opal-sensor.o obj-$(CONFIG_SMP) += smp.o obj-$(CONFIG_PCI) += pci.o pci-p5ioc2.o pci-ioda.o diff --git a/arch/powerpc/platforms/powernv/opal-sensor.c b/arch/powerpc/platforms/powernv/opal-sensor.c new file mode 100644 index 000000000000..663cc9c65613 --- /dev/null +++ b/arch/powerpc/platforms/powernv/opal-sensor.c @@ -0,0 +1,64 @@ +/* + * PowerNV sensor code + * + * Copyright (C) 2013 IBM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + */ + +#include +#include +#include + +static DEFINE_MUTEX(opal_sensor_mutex); + +/* + * This will return sensor information to driver based on the requested sensor + * handle. A handle is an opaque id for the powernv, read by the driver from the + * device tree.. + */ +int opal_get_sensor_data(u32 sensor_hndl, u32 *sensor_data) +{ + int ret, token; + struct opal_msg msg; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + pr_err("%s: Couldn't get the token, returning\n", __func__); + ret = token; + goto out; + } + + mutex_lock(&opal_sensor_mutex); + ret = opal_sensor_read(sensor_hndl, token, sensor_data); + if (ret != OPAL_ASYNC_COMPLETION) + goto out_token; + + ret = opal_async_wait_response(token, &msg); + if (ret) { + pr_err("%s: Failed to wait for the async response, %d\n", + __func__, ret); + goto out_token; + } + + ret = msg.params[1]; + +out_token: + mutex_unlock(&opal_sensor_mutex); + opal_async_release_token(token); +out: + return ret; +} +EXPORT_SYMBOL_GPL(opal_get_sensor_data); diff --git a/arch/powerpc/platforms/powernv/opal-wrappers.S b/arch/powerpc/platforms/powernv/opal-wrappers.S index b17f2d8c4073..75c89df8d71e 100644 --- a/arch/powerpc/platforms/powernv/opal-wrappers.S +++ b/arch/powerpc/platforms/powernv/opal-wrappers.S @@ -140,5 +140,6 @@ OPAL_CALL(opal_get_msg, OPAL_GET_MSG); OPAL_CALL(opal_check_completion, OPAL_CHECK_ASYNC_COMPLETION); OPAL_CALL(opal_dump_resend_notification, OPAL_DUMP_RESEND); OPAL_CALL(opal_sync_host_reboot, OPAL_SYNC_HOST_REBOOT); +OPAL_CALL(opal_sensor_read, OPAL_SENSOR_READ); OPAL_CALL(opal_get_param, OPAL_GET_PARAM); OPAL_CALL(opal_set_param, OPAL_SET_PARAM); -- cgit v1.2.3 From 5f6d0380c64051400961accf99ec41e70ec6d8ca Mon Sep 17 00:00:00 2001 From: Anshuman Khandual Date: Fri, 14 Mar 2014 16:00:27 +1100 Subject: powerpc/perf: Define perf_event_print_debug() to print PMU register values Currently the sysrq ShowRegs command does not print any PMU registers as we have an empty definition for perf_event_print_debug(). This patch defines perf_event_print_debug() to print various PMU registers. Example output: CPU: 0 PMU registers, ppmu = POWER7 n_counters = 6 PMC1: 00000000 PMC2: 00000000 PMC3: 00000000 PMC4: 00000000 PMC5: 00000000 PMC6: 00000000 PMC7: deadbeef PMC8: deadbeef MMCR0: 0000000080000000 MMCR1: 0000000000000000 MMCRA: 0f00000001000000 SIAR: 0000000000000000 SDAR: 0000000000000000 SIER: 0000000000000000 Signed-off-by: Anshuman Khandual [mpe: Fix 32 bit build and rework formatting for compactness] Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/perf_event_server.h | 1 + arch/powerpc/perf/core-book3s.c | 55 ++++++++++++++++++++++++++-- 2 files changed, 52 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/perf_event_server.h b/arch/powerpc/include/asm/perf_event_server.h index 3fd2f1b6f906..9ed737146dbb 100644 --- a/arch/powerpc/include/asm/perf_event_server.h +++ b/arch/powerpc/include/asm/perf_event_server.h @@ -14,6 +14,7 @@ #include #include +/* Update perf_event_print_debug() if this changes */ #define MAX_HWEVENTS 8 #define MAX_EVENT_ALTERNATIVES 8 #define MAX_LIMITED_HWCOUNTERS 2 diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 67cf22083f4c..704d1c5fefca 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -563,10 +563,6 @@ out: static void perf_event_interrupt(struct pt_regs *regs); -void perf_event_print_debug(void) -{ -} - /* * Read one performance monitor counter (PMC). */ @@ -645,6 +641,57 @@ static void write_pmc(int idx, unsigned long val) } } +/* Called from sysrq_handle_showregs() */ +void perf_event_print_debug(void) +{ + unsigned long sdar, sier, flags; + u32 pmcs[MAX_HWEVENTS]; + int i; + + if (!ppmu->n_counter) + return; + + local_irq_save(flags); + + pr_info("CPU: %d PMU registers, ppmu = %s n_counters = %d", + smp_processor_id(), ppmu->name, ppmu->n_counter); + + for (i = 0; i < ppmu->n_counter; i++) + pmcs[i] = read_pmc(i + 1); + + for (; i < MAX_HWEVENTS; i++) + pmcs[i] = 0xdeadbeef; + + pr_info("PMC1: %08x PMC2: %08x PMC3: %08x PMC4: %08x\n", + pmcs[0], pmcs[1], pmcs[2], pmcs[3]); + + if (ppmu->n_counter > 4) + pr_info("PMC5: %08x PMC6: %08x PMC7: %08x PMC8: %08x\n", + pmcs[4], pmcs[5], pmcs[6], pmcs[7]); + + pr_info("MMCR0: %016lx MMCR1: %016lx MMCRA: %016lx\n", + mfspr(SPRN_MMCR0), mfspr(SPRN_MMCR1), mfspr(SPRN_MMCRA)); + + sdar = sier = 0; +#ifdef CONFIG_PPC64 + sdar = mfspr(SPRN_SDAR); + + if (ppmu->flags & PPMU_HAS_SIER) + sier = mfspr(SPRN_SIER); + + if (ppmu->flags & PPMU_EBB) { + pr_info("MMCR2: %016lx EBBHR: %016lx\n", + mfspr(SPRN_MMCR2), mfspr(SPRN_EBBHR)); + pr_info("EBBRR: %016lx BESCR: %016lx\n", + mfspr(SPRN_EBBRR), mfspr(SPRN_BESCR)); + } +#endif + pr_info("SIAR: %016lx SDAR: %016lx SIER: %016lx\n", + mfspr(SPRN_SIAR), sdar, sier); + + local_irq_restore(flags); +} + /* * Check if a set of events can all go on the PMU at once. * If they can't, this will look at alternative codes for the events -- cgit v1.2.3 From 68f2f0d431d9ea4fbd373cd31e828b0ceaefea30 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 14 Mar 2014 16:00:28 +1100 Subject: powerpc: Add a cpu feature CPU_FTR_PMAO_BUG Some power8 revisions have a hardware bug where we can lose a Performance Monitor (PMU) exception under certain circumstances. We will be adding a workaround for this case, see the next commit for details. The observed behaviour is that writing PMAO doesn't cause an exception as we would expect, hence the name of the feature. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/cputable.h | 6 ++++-- arch/powerpc/kernel/cputable.c | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/cputable.h b/arch/powerpc/include/asm/cputable.h index 617cc767c076..bc2347774f0a 100644 --- a/arch/powerpc/include/asm/cputable.h +++ b/arch/powerpc/include/asm/cputable.h @@ -189,6 +189,7 @@ extern const char *powerpc_base_platform; #define CPU_FTR_HAS_PPR LONG_ASM_CONST(0x0200000000000000) #define CPU_FTR_DAWR LONG_ASM_CONST(0x0400000000000000) #define CPU_FTR_DABRX LONG_ASM_CONST(0x0800000000000000) +#define CPU_FTR_PMAO_BUG LONG_ASM_CONST(0x1000000000000000) #ifndef __ASSEMBLY__ @@ -445,6 +446,7 @@ extern const char *powerpc_base_platform; CPU_FTR_ICSWX | CPU_FTR_CFAR | CPU_FTR_HVMODE | CPU_FTR_VMX_COPY | \ CPU_FTR_DBELL | CPU_FTR_HAS_PPR | CPU_FTR_DAWR | \ CPU_FTR_ARCH_207S | CPU_FTR_TM_COMP) +#define CPU_FTRS_POWER8E (CPU_FTRS_POWER8 | CPU_FTR_PMAO_BUG) #define CPU_FTRS_CELL (CPU_FTR_USE_TB | CPU_FTR_LWSYNC | \ CPU_FTR_PPCAS_ARCH_V2 | CPU_FTR_CTRL | \ CPU_FTR_ALTIVEC_COMP | CPU_FTR_MMCRA | CPU_FTR_SMT | \ @@ -466,8 +468,8 @@ extern const char *powerpc_base_platform; #define CPU_FTRS_POSSIBLE \ (CPU_FTRS_POWER3 | CPU_FTRS_RS64 | CPU_FTRS_POWER4 | \ CPU_FTRS_PPC970 | CPU_FTRS_POWER5 | CPU_FTRS_POWER6 | \ - CPU_FTRS_POWER7 | CPU_FTRS_POWER8 | CPU_FTRS_CELL | \ - CPU_FTRS_PA6T | CPU_FTR_VSX) + CPU_FTRS_POWER7 | CPU_FTRS_POWER8E | CPU_FTRS_POWER8 | \ + CPU_FTRS_CELL | CPU_FTRS_PA6T | CPU_FTR_VSX) #endif #else enum { diff --git a/arch/powerpc/kernel/cputable.c b/arch/powerpc/kernel/cputable.c index 6c8dd5da4de5..c1faade6506d 100644 --- a/arch/powerpc/kernel/cputable.c +++ b/arch/powerpc/kernel/cputable.c @@ -510,7 +510,7 @@ static struct cpu_spec __initdata cpu_specs[] = { .pvr_mask = 0xffff0000, .pvr_value = 0x004b0000, .cpu_name = "POWER8E (raw)", - .cpu_features = CPU_FTRS_POWER8, + .cpu_features = CPU_FTRS_POWER8E, .cpu_user_features = COMMON_USER_POWER8, .cpu_user_features2 = COMMON_USER2_POWER8, .mmu_features = MMU_FTRS_POWER8, -- cgit v1.2.3 From c2e37a2626a7471875f3a6452b99dfd3809972b9 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 14 Mar 2014 16:00:29 +1100 Subject: powerpc/perf: Add lost exception workaround Some power8 revisions have a hardware bug where we can lose a PMU exception, this commit adds a workaround to detect the bad condition and rectify the situation. See the comment in the commit for a full description. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/reg.h | 2 + arch/powerpc/perf/core-book3s.c | 100 +++++++++++++++++++++++++++++++++++++++- arch/powerpc/perf/power8-pmu.c | 5 ++ 3 files changed, 105 insertions(+), 2 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 90c06ec6eff5..30034720589a 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -670,6 +670,7 @@ #define MMCR0_PMC1CE 0x00008000UL /* PMC1 count enable*/ #define MMCR0_PMCjCE 0x00004000UL /* PMCj count enable*/ #define MMCR0_TRIGGER 0x00002000UL /* TRIGGER enable */ +#define MMCR0_PMAO_SYNC 0x00000800UL /* PMU interrupt is synchronous */ #define MMCR0_PMAO 0x00000080UL /* performance monitor alert has occurred, set to 0 after handling exception */ #define MMCR0_SHRFC 0x00000040UL /* SHRre freeze conditions between threads */ #define MMCR0_FC56 0x00000010UL /* freeze counters 5 and 6 */ @@ -703,6 +704,7 @@ #define SPRN_EBBHR 804 /* Event based branch handler register */ #define SPRN_EBBRR 805 /* Event based branch return register */ #define SPRN_BESCR 806 /* Branch event status and control register */ +#define BESCR_GE 0x8000000000000000ULL /* Global Enable */ #define SPRN_WORT 895 /* Workload optimization register - thread */ #define SPRN_PMC1 787 diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 704d1c5fefca..53ac1b20e14d 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -120,6 +120,7 @@ static inline void power_pmu_bhrb_enable(struct perf_event *event) {} static inline void power_pmu_bhrb_disable(struct perf_event *event) {} void power_pmu_flush_branch_stack(void) {} static inline void power_pmu_bhrb_read(struct cpu_hw_events *cpuhw) {} +static void pmao_restore_workaround(bool ebb) { } #endif /* CONFIG_PPC32 */ static bool regs_use_siar(struct pt_regs *regs) @@ -545,10 +546,18 @@ static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) /* Enable EBB and read/write to all 6 PMCs for userspace */ mmcr0 |= MMCR0_EBE | MMCR0_PMCC_U6; - /* Add any bits from the user reg, FC or PMAO */ + /* + * Add any bits from the user MMCR0, FC or PMAO. This is compatible + * with pmao_restore_workaround() because we may add PMAO but we never + * clear it here. + */ mmcr0 |= current->thread.mmcr0; - /* Be careful not to set PMXE if userspace had it cleared */ + /* + * Be careful not to set PMXE if userspace had it cleared. This is also + * compatible with pmao_restore_workaround() because it has already + * cleared PMXE and we leave PMAO alone. + */ if (!(current->thread.mmcr0 & MMCR0_PMXE)) mmcr0 &= ~MMCR0_PMXE; @@ -559,6 +568,91 @@ static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) out: return mmcr0; } + +static void pmao_restore_workaround(bool ebb) +{ + unsigned pmcs[6]; + + if (!cpu_has_feature(CPU_FTR_PMAO_BUG)) + return; + + /* + * On POWER8E there is a hardware defect which affects the PMU context + * switch logic, ie. power_pmu_disable/enable(). + * + * When a counter overflows PMXE is cleared and FC/PMAO is set in MMCR0 + * by the hardware. Sometime later the actual PMU exception is + * delivered. + * + * If we context switch, or simply disable/enable, the PMU prior to the + * exception arriving, the exception will be lost when we clear PMAO. + * + * When we reenable the PMU, we will write the saved MMCR0 with PMAO + * set, and this _should_ generate an exception. However because of the + * defect no exception is generated when we write PMAO, and we get + * stuck with no counters counting but no exception delivered. + * + * The workaround is to detect this case and tweak the hardware to + * create another pending PMU exception. + * + * We do that by setting up PMC6 (cycles) for an imminent overflow and + * enabling the PMU. That causes a new exception to be generated in the + * chip, but we don't take it yet because we have interrupts hard + * disabled. We then write back the PMU state as we want it to be seen + * by the exception handler. When we reenable interrupts the exception + * handler will be called and see the correct state. + * + * The logic is the same for EBB, except that the exception is gated by + * us having interrupts hard disabled as well as the fact that we are + * not in userspace. The exception is finally delivered when we return + * to userspace. + */ + + /* Only if PMAO is set and PMAO_SYNC is clear */ + if ((current->thread.mmcr0 & (MMCR0_PMAO | MMCR0_PMAO_SYNC)) != MMCR0_PMAO) + return; + + /* If we're doing EBB, only if BESCR[GE] is set */ + if (ebb && !(current->thread.bescr & BESCR_GE)) + return; + + /* + * We are already soft-disabled in power_pmu_enable(). We need to hard + * enable to actually prevent the PMU exception from firing. + */ + hard_irq_disable(); + + /* + * This is a bit gross, but we know we're on POWER8E and have 6 PMCs. + * Using read/write_pmc() in a for loop adds 12 function calls and + * almost doubles our code size. + */ + pmcs[0] = mfspr(SPRN_PMC1); + pmcs[1] = mfspr(SPRN_PMC2); + pmcs[2] = mfspr(SPRN_PMC3); + pmcs[3] = mfspr(SPRN_PMC4); + pmcs[4] = mfspr(SPRN_PMC5); + pmcs[5] = mfspr(SPRN_PMC6); + + /* Ensure all freeze bits are unset */ + mtspr(SPRN_MMCR2, 0); + + /* Set up PMC6 to overflow in one cycle */ + mtspr(SPRN_PMC6, 0x7FFFFFFE); + + /* Enable exceptions and unfreeze PMC6 */ + mtspr(SPRN_MMCR0, MMCR0_PMXE | MMCR0_PMCjCE | MMCR0_PMAO); + + /* Now we need to refreeze and restore the PMCs */ + mtspr(SPRN_MMCR0, MMCR0_FC | MMCR0_PMAO); + + mtspr(SPRN_PMC1, pmcs[0]); + mtspr(SPRN_PMC2, pmcs[1]); + mtspr(SPRN_PMC3, pmcs[2]); + mtspr(SPRN_PMC4, pmcs[3]); + mtspr(SPRN_PMC5, pmcs[4]); + mtspr(SPRN_PMC6, pmcs[5]); +} #endif /* CONFIG_PPC64 */ static void perf_event_interrupt(struct pt_regs *regs); @@ -1191,6 +1285,8 @@ static void power_pmu_enable(struct pmu *pmu) cpuhw->mmcr[0] |= MMCR0_PMXE | MMCR0_FCECE; out_enable: + pmao_restore_workaround(ebb); + mmcr0 = ebb_switch_in(ebb, cpuhw->mmcr[0]); mb(); diff --git a/arch/powerpc/perf/power8-pmu.c b/arch/powerpc/perf/power8-pmu.c index 96cee20dcd34..64f04cfabd23 100644 --- a/arch/powerpc/perf/power8-pmu.c +++ b/arch/powerpc/perf/power8-pmu.c @@ -10,6 +10,8 @@ * 2 of the License, or (at your option) any later version. */ +#define pr_fmt(fmt) "power8-pmu: " fmt + #include #include #include @@ -774,6 +776,9 @@ static int __init init_power8_pmu(void) /* Tell userspace that EBB is supported */ cur_cpu_spec->cpu_user_features2 |= PPC_FEATURE2_EBB; + if (cpu_has_feature(CPU_FTR_PMAO_BUG)) + pr_info("PMAO restore workaround active.\n"); + return 0; } early_initcall(init_power8_pmu); -- cgit v1.2.3 From 76cb8a783a4b7403a6731ffdeec0831cd4b19936 Mon Sep 17 00:00:00 2001 From: Michael Ellerman Date: Fri, 14 Mar 2014 16:00:34 +1100 Subject: powerpc/perf: Enable BHRB access for EBB events The previous commit added constraint and register handling to allow processes using EBB (Event Based Branches) to request access to the BHRB (Branch History Rolling Buffer). With that in place we can allow processes using EBB to access the BHRB. This is achieved by setting BHRBA in MMCR0 when we enable EBB access. We must also clear BHRBA when we are disabling. Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/reg.h | 1 + arch/powerpc/perf/core-book3s.c | 10 ++++++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/reg.h b/arch/powerpc/include/asm/reg.h index 30034720589a..980e8db2f261 100644 --- a/arch/powerpc/include/asm/reg.h +++ b/arch/powerpc/include/asm/reg.h @@ -664,6 +664,7 @@ #define MMCR0_PMXE 0x04000000UL /* performance monitor exception enable */ #define MMCR0_FCECE 0x02000000UL /* freeze ctrs on enabled cond or event */ #define MMCR0_TBEE 0x00400000UL /* time base exception enable */ +#define MMCR0_BHRBA 0x00200000UL /* BHRB Access allowed in userspace */ #define MMCR0_EBE 0x00100000UL /* Event based branch enable */ #define MMCR0_PMCC 0x000c0000UL /* PMC control */ #define MMCR0_PMCC_U6 0x00080000UL /* PMC1-6 are R/W by user (PR) */ diff --git a/arch/powerpc/perf/core-book3s.c b/arch/powerpc/perf/core-book3s.c index 1e095fd68b6c..4520c9356b54 100644 --- a/arch/powerpc/perf/core-book3s.c +++ b/arch/powerpc/perf/core-book3s.c @@ -78,6 +78,7 @@ static unsigned int freeze_events_kernel = MMCR0_FCS; #define MMCR0_FC56 0 #define MMCR0_PMAO 0 #define MMCR0_EBE 0 +#define MMCR0_BHRBA 0 #define MMCR0_PMCC 0 #define MMCR0_PMCC_U6 0 @@ -546,8 +547,8 @@ static unsigned long ebb_switch_in(bool ebb, unsigned long mmcr0) if (!ebb) goto out; - /* Enable EBB and read/write to all 6 PMCs for userspace */ - mmcr0 |= MMCR0_EBE | MMCR0_PMCC_U6; + /* Enable EBB and read/write to all 6 PMCs and BHRB for userspace */ + mmcr0 |= MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC_U6; /* * Add any bits from the user MMCR0, FC or PMAO. This is compatible @@ -1117,11 +1118,12 @@ static void power_pmu_disable(struct pmu *pmu) } /* - * Set the 'freeze counters' bit, clear EBE/PMCC/PMAO/FC56. + * Set the 'freeze counters' bit, clear EBE/BHRBA/PMCC/PMAO/FC56 */ val = mmcr0 = mfspr(SPRN_MMCR0); val |= MMCR0_FC; - val &= ~(MMCR0_EBE | MMCR0_PMCC | MMCR0_PMAO | MMCR0_FC56); + val &= ~(MMCR0_EBE | MMCR0_BHRBA | MMCR0_PMCC | MMCR0_PMAO | + MMCR0_FC56); /* * The barrier is to make sure the mtspr has been -- cgit v1.2.3 From 827f798ac13c27c4a122175c60a3a684b59e9103 Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 14 Mar 2014 16:00:36 +1100 Subject: powerpc: Add hvcalls for 24x7 and gpci (Get Performance Counter Info) Signed-off-by: Cody P Schafer Signed-off-by: Michael Ellerman Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/hvcall.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/hvcall.h b/arch/powerpc/include/asm/hvcall.h index d8b600b3f058..5dbbb29f5c3e 100644 --- a/arch/powerpc/include/asm/hvcall.h +++ b/arch/powerpc/include/asm/hvcall.h @@ -274,6 +274,11 @@ /* Platform specific hcalls, used by KVM */ #define H_RTAS 0xf000 +/* "Platform specific hcalls", provided by PHYP */ +#define H_GET_24X7_CATALOG_PAGE 0xF078 +#define H_GET_24X7_DATA 0xF07C +#define H_GET_PERF_COUNTER_INFO 0xF080 + #ifndef __ASSEMBLY__ /** -- cgit v1.2.3 From 422b9b9684db3c511e65c91842275c43f5910ae9 Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Thu, 6 Mar 2014 16:10:11 +1100 Subject: powerpc/compat: 32-bit little endian machine name is ppcle, not ppc I noticed this when testing setarch. No, we don't magically support a big endian userspace on a little endian kernel. Signed-off-by: Anton Blanchard Cc: stable@vger.kernel.org # v3.10+ Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/compat.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/compat.h b/arch/powerpc/include/asm/compat.h index a613d2c82fd9..b142b8e0ed9e 100644 --- a/arch/powerpc/include/asm/compat.h +++ b/arch/powerpc/include/asm/compat.h @@ -8,7 +8,11 @@ #include #define COMPAT_USER_HZ 100 +#ifdef __BIG_ENDIAN__ #define COMPAT_UTS_MACHINE "ppc\0\0" +#else +#define COMPAT_UTS_MACHINE "ppcle\0\0" +#endif typedef u32 compat_size_t; typedef s32 compat_ssize_t; -- cgit v1.2.3 From d410ae2126481a74bc5be4a6242c4a232c19a984 Mon Sep 17 00:00:00 2001 From: Mahesh Salgaonkar Date: Tue, 11 Mar 2014 10:56:18 +0530 Subject: powerpc/book3s: Fix CFAR clobbering issue in machine check handler. While checking powersaving mode in machine check handler at 0x200, we clobber CFAR register. Fix it by saving and restoring it during beq/bgt. Signed-off-by: Mahesh Salgaonkar Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/include/asm/exception-64s.h | 8 ++++++++ arch/powerpc/kernel/exceptions-64s.S | 5 +++++ 2 files changed, 13 insertions(+) (limited to 'arch/powerpc/include') diff --git a/arch/powerpc/include/asm/exception-64s.h b/arch/powerpc/include/asm/exception-64s.h index 66830618cc19..aeaa56cd9b54 100644 --- a/arch/powerpc/include/asm/exception-64s.h +++ b/arch/powerpc/include/asm/exception-64s.h @@ -146,6 +146,14 @@ BEGIN_FTR_SECTION_NESTED(943) \ mfspr ra,spr; \ END_FTR_SECTION_NESTED(ftr,ftr,943) +/* + * Set an SPR from a register if the CPU has the given feature + */ +#define OPT_SET_SPR(ra, spr, ftr) \ +BEGIN_FTR_SECTION_NESTED(943) \ + mtspr spr,ra; \ +END_FTR_SECTION_NESTED(ftr,ftr,943) + /* * Save a register to the PACA if the CPU has the given feature */ diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index 38d507306a11..4c34c3c827ad 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -164,13 +164,18 @@ BEGIN_FTR_SECTION */ mfspr r13,SPRN_SRR1 rlwinm. r13,r13,47-31,30,31 + OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) beq 9f + mfspr r13,SPRN_SRR1 + rlwinm. r13,r13,47-31,30,31 /* waking up from powersave (nap) state */ cmpwi cr1,r13,2 /* Total loss of HV state is fatal. let's just stay stuck here */ + OPT_GET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) bgt cr1,. 9: + OPT_SET_SPR(r13, SPRN_CFAR, CPU_FTR_CFAR) END_FTR_SECTION_IFSET(CPU_FTR_HVMODE | CPU_FTR_ARCH_206) #endif /* CONFIG_PPC_P7_NAP */ EXCEPTION_PROLOG_0(PACA_EXMC) -- cgit v1.2.3