From 6b0b50b0617fad5f2af3b928596a25f7de8dbf50 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Jun 2013 17:14:02 -0700 Subject: mm/THP: add pmd args to pgtable deposit and withdraw APIs This will be later used by powerpc THP support. In powerpc we want to use pgtable for storing the hash index values. So instead of adding them to mm_context list, we would like to store them in the second half of pmd Signed-off-by: Aneesh Kumar K.V Reviewed-by: Andrea Arcangeli Reviewed-by: David Gibson Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- include/asm-generic/pgtable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index a59ff51b0166..18e27c210716 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -173,11 +173,12 @@ extern void pmdp_splitting_flush(struct vm_area_struct *vma, #endif #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT -extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pgtable_t pgtable); +extern void pgtable_trans_huge_deposit(struct mm_struct *mm, pmd_t *pmdp, + pgtable_t pgtable); #endif #ifndef __HAVE_ARCH_PGTABLE_WITHDRAW -extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm); +extern pgtable_t pgtable_trans_huge_withdraw(struct mm_struct *mm, pmd_t *pmdp); #endif #ifndef __HAVE_ARCH_PMDP_INVALIDATE -- cgit v1.2.3 From fde52796d487b675cde55427e3347ff3e59f9a7f Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Wed, 5 Jun 2013 17:14:05 -0700 Subject: mm/THP: don't use HPAGE_SHIFT in transparent hugepage code For architectures like powerpc that support multiple explicit hugepage sizes, HPAGE_SHIFT indicate the default explicit hugepage shift. For THP to work the hugepage size should be same as PMD_SIZE. So use PMD_SHIFT directly. So move the define outside CONFIG_TRANSPARENT_HUGEPAGE #ifdef because we want to use these defines in generic code with if (pmd_trans_huge()) conditional. Signed-off-by: Aneesh Kumar K.V Cc: Andrea Arcangeli Cc: David Gibson Cc: Andrea Arcangeli Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- include/linux/huge_mm.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 528454c2caa9..cc276d2c3a40 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -58,12 +58,11 @@ extern pmd_t *page_check_address_pmd(struct page *page, #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1< Date: Tue, 21 May 2013 13:33:10 +1000 Subject: powerpc/vfio: Implement IOMMU driver for VFIO VFIO implements platform independent stuff such as a PCI driver, BAR access (via read/write on a file descriptor or direct mapping when possible) and IRQ signaling. The platform dependent part includes IOMMU initialization and handling. This implements an IOMMU driver for VFIO which does mapping/unmapping pages for the guest IO and provides information about DMA window (required by a POWER guest). Cc: David Gibson Signed-off-by: Alexey Kardashevskiy Signed-off-by: Paul Mackerras Acked-by: Alex Williamson Signed-off-by: Benjamin Herrenschmidt --- Documentation/vfio.txt | 63 ++++++ drivers/vfio/Kconfig | 6 + drivers/vfio/Makefile | 1 + drivers/vfio/vfio.c | 1 + drivers/vfio/vfio_iommu_spapr_tce.c | 377 ++++++++++++++++++++++++++++++++++++ include/uapi/linux/vfio.h | 34 ++++ 6 files changed, 482 insertions(+) create mode 100644 drivers/vfio/vfio_iommu_spapr_tce.c (limited to 'include') diff --git a/Documentation/vfio.txt b/Documentation/vfio.txt index 8eda3635a17d..c55533c0adb3 100644 --- a/Documentation/vfio.txt +++ b/Documentation/vfio.txt @@ -283,6 +283,69 @@ a direct pass through for VFIO_DEVICE_* ioctls. The read/write/mmap interfaces implement the device region access defined by the device's own VFIO_DEVICE_GET_REGION_INFO ioctl. + +PPC64 sPAPR implementation note +------------------------------------------------------------------------------- + +This implementation has some specifics: + +1) Only one IOMMU group per container is supported as an IOMMU group +represents the minimal entity which isolation can be guaranteed for and +groups are allocated statically, one per a Partitionable Endpoint (PE) +(PE is often a PCI domain but not always). + +2) The hardware supports so called DMA windows - the PCI address range +within which DMA transfer is allowed, any attempt to access address space +out of the window leads to the whole PE isolation. + +3) PPC64 guests are paravirtualized but not fully emulated. There is an API +to map/unmap pages for DMA, and it normally maps 1..32 pages per call and +currently there is no way to reduce the number of calls. In order to make things +faster, the map/unmap handling has been implemented in real mode which provides +an excellent performance which has limitations such as inability to do +locked pages accounting in real time. + +So 3 additional ioctls have been added: + + VFIO_IOMMU_SPAPR_TCE_GET_INFO - returns the size and the start + of the DMA window on the PCI bus. + + VFIO_IOMMU_ENABLE - enables the container. The locked pages accounting + is done at this point. This lets user first to know what + the DMA window is and adjust rlimit before doing any real job. + + VFIO_IOMMU_DISABLE - disables the container. + + +The code flow from the example above should be slightly changed: + + ..... + /* Add the group to the container */ + ioctl(group, VFIO_GROUP_SET_CONTAINER, &container); + + /* Enable the IOMMU model we want */ + ioctl(container, VFIO_SET_IOMMU, VFIO_SPAPR_TCE_IOMMU) + + /* Get addition sPAPR IOMMU info */ + vfio_iommu_spapr_tce_info spapr_iommu_info; + ioctl(container, VFIO_IOMMU_SPAPR_TCE_GET_INFO, &spapr_iommu_info); + + if (ioctl(container, VFIO_IOMMU_ENABLE)) + /* Cannot enable container, may be low rlimit */ + + /* Allocate some space and setup a DMA mapping */ + dma_map.vaddr = mmap(0, 1024 * 1024, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANONYMOUS, 0, 0); + + dma_map.size = 1024 * 1024; + dma_map.iova = 0; /* 1MB starting at 0x0 from device view */ + dma_map.flags = VFIO_DMA_MAP_FLAG_READ | VFIO_DMA_MAP_FLAG_WRITE; + + /* Check here is .iova/.size are within DMA window from spapr_iommu_info */ + + ioctl(container, VFIO_IOMMU_MAP_DMA, &dma_map); + ..... + ------------------------------------------------------------------------------- [1] VFIO was originally an acronym for "Virtual Function I/O" in its diff --git a/drivers/vfio/Kconfig b/drivers/vfio/Kconfig index 7cd5dec0abd1..b464687f6e14 100644 --- a/drivers/vfio/Kconfig +++ b/drivers/vfio/Kconfig @@ -3,10 +3,16 @@ config VFIO_IOMMU_TYPE1 depends on VFIO default n +config VFIO_IOMMU_SPAPR_TCE + tristate + depends on VFIO && SPAPR_TCE_IOMMU + default n + menuconfig VFIO tristate "VFIO Non-Privileged userspace driver framework" depends on IOMMU_API select VFIO_IOMMU_TYPE1 if X86 + select VFIO_IOMMU_SPAPR_TCE if PPC_POWERNV help VFIO provides a framework for secure userspace device drivers. See Documentation/vfio.txt for more details. diff --git a/drivers/vfio/Makefile b/drivers/vfio/Makefile index 2398d4a0e38b..72bfabc8629e 100644 --- a/drivers/vfio/Makefile +++ b/drivers/vfio/Makefile @@ -1,3 +1,4 @@ obj-$(CONFIG_VFIO) += vfio.o obj-$(CONFIG_VFIO_IOMMU_TYPE1) += vfio_iommu_type1.o +obj-$(CONFIG_VFIO_IOMMU_SPAPR_TCE) += vfio_iommu_spapr_tce.o obj-$(CONFIG_VFIO_PCI) += pci/ diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 6d78736563de..259ad282ae5d 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -1415,6 +1415,7 @@ static int __init vfio_init(void) * drivers. */ request_module_nowait("vfio_iommu_type1"); + request_module_nowait("vfio_iommu_spapr_tce"); return 0; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c new file mode 100644 index 000000000000..bdae7a04af75 --- /dev/null +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -0,0 +1,377 @@ +/* + * VFIO: IOMMU DMA mapping support for TCE on POWER + * + * Copyright (C) 2013 IBM Corp. All rights reserved. + * Author: Alexey Kardashevskiy + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * Derived from original vfio_iommu_type1.c: + * Copyright (C) 2012 Red Hat, Inc. All rights reserved. + * Author: Alex Williamson + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#define DRIVER_VERSION "0.1" +#define DRIVER_AUTHOR "aik@ozlabs.ru" +#define DRIVER_DESC "VFIO IOMMU SPAPR TCE" + +static void tce_iommu_detach_group(void *iommu_data, + struct iommu_group *iommu_group); + +/* + * VFIO IOMMU fd for SPAPR_TCE IOMMU implementation + * + * This code handles mapping and unmapping of user data buffers + * into DMA'ble space using the IOMMU + */ + +/* + * The container descriptor supports only a single group per container. + * Required by the API as the container is not supplied with the IOMMU group + * at the moment of initialization. + */ +struct tce_container { + struct mutex lock; + struct iommu_table *tbl; + bool enabled; +}; + +static int tce_iommu_enable(struct tce_container *container) +{ + int ret = 0; + unsigned long locked, lock_limit, npages; + struct iommu_table *tbl = container->tbl; + + if (!container->tbl) + return -ENXIO; + + if (!current->mm) + return -ESRCH; /* process exited */ + + if (container->enabled) + return -EBUSY; + + /* + * When userspace pages are mapped into the IOMMU, they are effectively + * locked memory, so, theoretically, we need to update the accounting + * of locked pages on each map and unmap. For powerpc, the map unmap + * paths can be very hot, though, and the accounting would kill + * performance, especially since it would be difficult to impossible + * to handle the accounting in real mode only. + * + * To address that, rather than precisely accounting every page, we + * instead account for a worst case on locked memory when the iommu is + * enabled and disabled. The worst case upper bound on locked memory + * is the size of the whole iommu window, which is usually relatively + * small (compared to total memory sizes) on POWER hardware. + * + * Also we don't have a nice way to fail on H_PUT_TCE due to ulimits, + * that would effectively kill the guest at random points, much better + * enforcing the limit based on the max that the guest can map. + */ + down_write(¤t->mm->mmap_sem); + npages = (tbl->it_size << IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; + locked = current->mm->locked_vm + npages; + lock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; + if (locked > lock_limit && !capable(CAP_IPC_LOCK)) { + pr_warn("RLIMIT_MEMLOCK (%ld) exceeded\n", + rlimit(RLIMIT_MEMLOCK)); + ret = -ENOMEM; + } else { + + current->mm->locked_vm += npages; + container->enabled = true; + } + up_write(¤t->mm->mmap_sem); + + return ret; +} + +static void tce_iommu_disable(struct tce_container *container) +{ + if (!container->enabled) + return; + + container->enabled = false; + + if (!container->tbl || !current->mm) + return; + + down_write(¤t->mm->mmap_sem); + current->mm->locked_vm -= (container->tbl->it_size << + IOMMU_PAGE_SHIFT) >> PAGE_SHIFT; + up_write(¤t->mm->mmap_sem); +} + +static void *tce_iommu_open(unsigned long arg) +{ + struct tce_container *container; + + if (arg != VFIO_SPAPR_TCE_IOMMU) { + pr_err("tce_vfio: Wrong IOMMU type\n"); + return ERR_PTR(-EINVAL); + } + + container = kzalloc(sizeof(*container), GFP_KERNEL); + if (!container) + return ERR_PTR(-ENOMEM); + + mutex_init(&container->lock); + + return container; +} + +static void tce_iommu_release(void *iommu_data) +{ + struct tce_container *container = iommu_data; + + WARN_ON(container->tbl && !container->tbl->it_group); + tce_iommu_disable(container); + + if (container->tbl && container->tbl->it_group) + tce_iommu_detach_group(iommu_data, container->tbl->it_group); + + mutex_destroy(&container->lock); + + kfree(container); +} + +static long tce_iommu_ioctl(void *iommu_data, + unsigned int cmd, unsigned long arg) +{ + struct tce_container *container = iommu_data; + unsigned long minsz; + long ret; + + switch (cmd) { + case VFIO_CHECK_EXTENSION: + return (arg == VFIO_SPAPR_TCE_IOMMU) ? 1 : 0; + + case VFIO_IOMMU_SPAPR_TCE_GET_INFO: { + struct vfio_iommu_spapr_tce_info info; + struct iommu_table *tbl = container->tbl; + + if (WARN_ON(!tbl)) + return -ENXIO; + + minsz = offsetofend(struct vfio_iommu_spapr_tce_info, + dma32_window_size); + + if (copy_from_user(&info, (void __user *)arg, minsz)) + return -EFAULT; + + if (info.argsz < minsz) + return -EINVAL; + + info.dma32_window_start = tbl->it_offset << IOMMU_PAGE_SHIFT; + info.dma32_window_size = tbl->it_size << IOMMU_PAGE_SHIFT; + info.flags = 0; + + if (copy_to_user((void __user *)arg, &info, minsz)) + return -EFAULT; + + return 0; + } + case VFIO_IOMMU_MAP_DMA: { + struct vfio_iommu_type1_dma_map param; + struct iommu_table *tbl = container->tbl; + unsigned long tce, i; + + if (!tbl) + return -ENXIO; + + BUG_ON(!tbl->it_group); + + minsz = offsetofend(struct vfio_iommu_type1_dma_map, size); + + if (copy_from_user(¶m, (void __user *)arg, minsz)) + return -EFAULT; + + if (param.argsz < minsz) + return -EINVAL; + + if (param.flags & ~(VFIO_DMA_MAP_FLAG_READ | + VFIO_DMA_MAP_FLAG_WRITE)) + return -EINVAL; + + if ((param.size & ~IOMMU_PAGE_MASK) || + (param.vaddr & ~IOMMU_PAGE_MASK)) + return -EINVAL; + + /* iova is checked by the IOMMU API */ + tce = param.vaddr; + if (param.flags & VFIO_DMA_MAP_FLAG_READ) + tce |= TCE_PCI_READ; + if (param.flags & VFIO_DMA_MAP_FLAG_WRITE) + tce |= TCE_PCI_WRITE; + + ret = iommu_tce_put_param_check(tbl, param.iova, tce); + if (ret) + return ret; + + for (i = 0; i < (param.size >> IOMMU_PAGE_SHIFT); ++i) { + ret = iommu_put_tce_user_mode(tbl, + (param.iova >> IOMMU_PAGE_SHIFT) + i, + tce); + if (ret) + break; + tce += IOMMU_PAGE_SIZE; + } + if (ret) + iommu_clear_tces_and_put_pages(tbl, + param.iova >> IOMMU_PAGE_SHIFT, i); + + iommu_flush_tce(tbl); + + return ret; + } + case VFIO_IOMMU_UNMAP_DMA: { + struct vfio_iommu_type1_dma_unmap param; + struct iommu_table *tbl = container->tbl; + + if (WARN_ON(!tbl)) + return -ENXIO; + + minsz = offsetofend(struct vfio_iommu_type1_dma_unmap, + size); + + if (copy_from_user(¶m, (void __user *)arg, minsz)) + return -EFAULT; + + if (param.argsz < minsz) + return -EINVAL; + + /* No flag is supported now */ + if (param.flags) + return -EINVAL; + + if (param.size & ~IOMMU_PAGE_MASK) + return -EINVAL; + + ret = iommu_tce_clear_param_check(tbl, param.iova, 0, + param.size >> IOMMU_PAGE_SHIFT); + if (ret) + return ret; + + ret = iommu_clear_tces_and_put_pages(tbl, + param.iova >> IOMMU_PAGE_SHIFT, + param.size >> IOMMU_PAGE_SHIFT); + iommu_flush_tce(tbl); + + return ret; + } + case VFIO_IOMMU_ENABLE: + mutex_lock(&container->lock); + ret = tce_iommu_enable(container); + mutex_unlock(&container->lock); + return ret; + + + case VFIO_IOMMU_DISABLE: + mutex_lock(&container->lock); + tce_iommu_disable(container); + mutex_unlock(&container->lock); + return 0; + } + + return -ENOTTY; +} + +static int tce_iommu_attach_group(void *iommu_data, + struct iommu_group *iommu_group) +{ + int ret; + struct tce_container *container = iommu_data; + struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); + + BUG_ON(!tbl); + mutex_lock(&container->lock); + + /* pr_debug("tce_vfio: Attaching group #%u to iommu %p\n", + iommu_group_id(iommu_group), iommu_group); */ + if (container->tbl) { + pr_warn("tce_vfio: Only one group per IOMMU container is allowed, existing id=%d, attaching id=%d\n", + iommu_group_id(container->tbl->it_group), + iommu_group_id(iommu_group)); + ret = -EBUSY; + } else if (container->enabled) { + pr_err("tce_vfio: attaching group #%u to enabled container\n", + iommu_group_id(iommu_group)); + ret = -EBUSY; + } else { + ret = iommu_take_ownership(tbl); + if (!ret) + container->tbl = tbl; + } + + mutex_unlock(&container->lock); + + return ret; +} + +static void tce_iommu_detach_group(void *iommu_data, + struct iommu_group *iommu_group) +{ + struct tce_container *container = iommu_data; + struct iommu_table *tbl = iommu_group_get_iommudata(iommu_group); + + BUG_ON(!tbl); + mutex_lock(&container->lock); + if (tbl != container->tbl) { + pr_warn("tce_vfio: detaching group #%u, expected group is #%u\n", + iommu_group_id(iommu_group), + iommu_group_id(tbl->it_group)); + } else { + if (container->enabled) { + pr_warn("tce_vfio: detaching group #%u from enabled container, forcing disable\n", + iommu_group_id(tbl->it_group)); + tce_iommu_disable(container); + } + + /* pr_debug("tce_vfio: detaching group #%u from iommu %p\n", + iommu_group_id(iommu_group), iommu_group); */ + container->tbl = NULL; + iommu_release_ownership(tbl); + } + mutex_unlock(&container->lock); +} + +const struct vfio_iommu_driver_ops tce_iommu_driver_ops = { + .name = "iommu-vfio-powerpc", + .owner = THIS_MODULE, + .open = tce_iommu_open, + .release = tce_iommu_release, + .ioctl = tce_iommu_ioctl, + .attach_group = tce_iommu_attach_group, + .detach_group = tce_iommu_detach_group, +}; + +static int __init tce_iommu_init(void) +{ + return vfio_register_iommu_driver(&tce_iommu_driver_ops); +} + +static void __exit tce_iommu_cleanup(void) +{ + vfio_unregister_iommu_driver(&tce_iommu_driver_ops); +} + +module_init(tce_iommu_init); +module_exit(tce_iommu_cleanup); + +MODULE_VERSION(DRIVER_VERSION); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR(DRIVER_AUTHOR); +MODULE_DESCRIPTION(DRIVER_DESC); + diff --git a/include/uapi/linux/vfio.h b/include/uapi/linux/vfio.h index 284ff2436829..87ee4f4cff25 100644 --- a/include/uapi/linux/vfio.h +++ b/include/uapi/linux/vfio.h @@ -22,6 +22,7 @@ /* Extensions */ #define VFIO_TYPE1_IOMMU 1 +#define VFIO_SPAPR_TCE_IOMMU 2 /* * The IOCTL interface is designed for extensibility by embedding the @@ -375,4 +376,37 @@ struct vfio_iommu_type1_dma_unmap { #define VFIO_IOMMU_UNMAP_DMA _IO(VFIO_TYPE, VFIO_BASE + 14) +/* + * IOCTLs to enable/disable IOMMU container usage. + * No parameters are supported. + */ +#define VFIO_IOMMU_ENABLE _IO(VFIO_TYPE, VFIO_BASE + 15) +#define VFIO_IOMMU_DISABLE _IO(VFIO_TYPE, VFIO_BASE + 16) + +/* -------- Additional API for SPAPR TCE (Server POWERPC) IOMMU -------- */ + +/* + * The SPAPR TCE info struct provides the information about the PCI bus + * address ranges available for DMA, these values are programmed into + * the hardware so the guest has to know that information. + * + * The DMA 32 bit window start is an absolute PCI bus address. + * The IOVA address passed via map/unmap ioctls are absolute PCI bus + * addresses too so the window works as a filter rather than an offset + * for IOVA addresses. + * + * A flag will need to be added if other page sizes are supported, + * so as defined here, it is always 4k. + */ +struct vfio_iommu_spapr_tce_info { + __u32 argsz; + __u32 flags; /* reserved for future use */ + __u32 dma32_window_start; /* 32 bit window start (bytes) */ + __u32 dma32_window_size; /* 32 bit window size (bytes) */ +}; + +#define VFIO_IOMMU_SPAPR_TCE_GET_INFO _IO(VFIO_TYPE, VFIO_BASE + 12) + +/* ***************************************************************** */ + #endif /* _UAPIVFIO_H */ -- cgit v1.2.3 From 69020eea973d95766e905ee0ce7773e0027377a3 Mon Sep 17 00:00:00 2001 From: Aruna Balakrishnaiah Date: Thu, 6 Jun 2013 00:21:44 +0530 Subject: powerpc/pseries: Read rtas partition via pstore This patch set exploits the pstore subsystem to read details of rtas partition in NVRAM to a separate file in /dev/pstore. For instance, rtas details will be stored in a file named [rtas-nvram-4]. Signed-off-by: Aruna Balakrishnaiah Reviewed-by: Jim Keniston Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/nvram.c | 33 ++++++++++++++++++++++++++------- fs/pstore/inode.c | 3 +++ include/linux/pstore.h | 2 ++ 3 files changed, 31 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 9edec8ee02ef..78d72f0175ad 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -131,9 +131,11 @@ static struct z_stream_s stream; #ifdef CONFIG_PSTORE static enum pstore_type_id nvram_type_ids[] = { PSTORE_TYPE_DMESG, + PSTORE_TYPE_PPC_RTAS, -1 }; static int read_type; +static unsigned long last_rtas_event; #endif static ssize_t pSeries_nvram_read(char *buf, size_t count, loff_t *index) @@ -297,8 +299,13 @@ int nvram_write_error_log(char * buff, int length, { int rc = nvram_write_os_partition(&rtas_log_partition, buff, length, err_type, error_log_cnt); - if (!rc) + if (!rc) { last_unread_rtas_event = get_seconds(); +#ifdef CONFIG_PSTORE + last_rtas_event = get_seconds(); +#endif + } + return rc; } @@ -506,7 +513,7 @@ static int nvram_pstore_write(enum pstore_type_id type, } /* - * Reads the oops/panic report. + * Reads the oops/panic report and ibm,rtas-log partition. * Returns the length of the data we read from each partition. * Returns 0 if we've been called before. */ @@ -526,6 +533,12 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, part = &oops_log_partition; *type = PSTORE_TYPE_DMESG; break; + case PSTORE_TYPE_PPC_RTAS: + part = &rtas_log_partition; + *type = PSTORE_TYPE_PPC_RTAS; + time->tv_sec = last_rtas_event; + time->tv_nsec = 0; + break; default: return 0; } @@ -542,11 +555,17 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, *count = 0; *id = id_no; - oops_hdr = (struct oops_log_info *)buff; - *buf = buff + sizeof(*oops_hdr); - time->tv_sec = oops_hdr->timestamp; - time->tv_nsec = 0; - return oops_hdr->report_length; + + if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) { + oops_hdr = (struct oops_log_info *)buff; + *buf = buff + sizeof(*oops_hdr); + time->tv_sec = oops_hdr->timestamp; + time->tv_nsec = 0; + return oops_hdr->report_length; + } + + *buf = buff; + return part->size; } static struct pstore_info nvram_pstore_info = { diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index e4bcb2cf055a..ec24f9ceb5ed 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -324,6 +324,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, case PSTORE_TYPE_MCE: sprintf(name, "mce-%s-%lld", psname, id); break; + case PSTORE_TYPE_PPC_RTAS: + sprintf(name, "rtas-%s-%lld", psname, id); + break; case PSTORE_TYPE_UNKNOWN: sprintf(name, "unknown-%s-%lld", psname, id); break; diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 75d01760c911..d7a8fe938c0f 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -35,6 +35,8 @@ enum pstore_type_id { PSTORE_TYPE_MCE = 1, PSTORE_TYPE_CONSOLE = 2, PSTORE_TYPE_FTRACE = 3, + /* PPC64 partition types */ + PSTORE_TYPE_PPC_RTAS = 4, PSTORE_TYPE_UNKNOWN = 255 }; -- cgit v1.2.3 From f33f748c964f6a6ee272b1c794b52f54f4da1d04 Mon Sep 17 00:00:00 2001 From: Aruna Balakrishnaiah Date: Thu, 6 Jun 2013 00:22:10 +0530 Subject: powerpc/pseries: Read of-config partition via pstore This patch set exploits the pstore subsystem to read details of of-config partition in NVRAM to a separate file in /dev/pstore. For instance, of-config partition details will be stored in a file named [of-nvram-5]. Signed-off-by: Aruna Balakrishnaiah Reviewed-by: Jim Keniston Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/nvram.c | 55 ++++++++++++++++++++++++++++------ fs/pstore/inode.c | 3 ++ include/linux/pstore.h | 1 + 3 files changed, 50 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 714ed8ac7d59..f7392f6ea7b3 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -132,9 +132,16 @@ static size_t oops_data_sz; static struct z_stream_s stream; #ifdef CONFIG_PSTORE +static struct nvram_os_partition of_config_partition = { + .name = "of-config", + .index = -1, + .os_partition = false +}; + static enum pstore_type_id nvram_type_ids[] = { PSTORE_TYPE_DMESG, PSTORE_TYPE_PPC_RTAS, + PSTORE_TYPE_PPC_OF, -1 }; static int read_type; @@ -332,10 +339,15 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff, tmp_index = part->index; - rc = ppc_md.nvram_read((char *)&info, sizeof(struct err_log_info), &tmp_index); - if (rc <= 0) { - pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, rc); - return rc; + if (part->os_partition) { + rc = ppc_md.nvram_read((char *)&info, + sizeof(struct err_log_info), + &tmp_index); + if (rc <= 0) { + pr_err("%s: Failed nvram_read (%d)\n", __FUNCTION__, + rc); + return rc; + } } rc = ppc_md.nvram_read(buff, length, &tmp_index); @@ -344,8 +356,10 @@ int nvram_read_partition(struct nvram_os_partition *part, char *buff, return rc; } - *error_log_cnt = info.seq_num; - *err_type = info.error_type; + if (part->os_partition) { + *error_log_cnt = info.seq_num; + *err_type = info.error_type; + } return 0; } @@ -516,7 +530,7 @@ static int nvram_pstore_write(enum pstore_type_id type, } /* - * Reads the oops/panic report and ibm,rtas-log partition. + * Reads the oops/panic report, rtas and of-config partition. * Returns the length of the data we read from each partition. * Returns 0 if we've been called before. */ @@ -525,9 +539,11 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, struct pstore_info *psi) { struct oops_log_info *oops_hdr; - unsigned int err_type, id_no; + unsigned int err_type, id_no, size = 0; struct nvram_os_partition *part = NULL; char *buff = NULL; + int sig = 0; + loff_t p; read_type++; @@ -542,10 +558,29 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, time->tv_sec = last_rtas_event; time->tv_nsec = 0; break; + case PSTORE_TYPE_PPC_OF: + sig = NVRAM_SIG_OF; + part = &of_config_partition; + *type = PSTORE_TYPE_PPC_OF; + *id = PSTORE_TYPE_PPC_OF; + time->tv_sec = 0; + time->tv_nsec = 0; + break; default: return 0; } + if (!part->os_partition) { + p = nvram_find_partition(part->name, sig, &size); + if (p <= 0) { + pr_err("nvram: Failed to find partition %s, " + "err %d\n", part->name, (int)p); + return 0; + } + part->index = p; + part->size = size; + } + buff = kmalloc(part->size, GFP_KERNEL); if (!buff) @@ -557,7 +592,9 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, } *count = 0; - *id = id_no; + + if (part->os_partition) + *id = id_no; if (nvram_type_ids[read_type] == PSTORE_TYPE_DMESG) { oops_hdr = (struct oops_log_info *)buff; diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index ec24f9ceb5ed..73148aef9e31 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -327,6 +327,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, case PSTORE_TYPE_PPC_RTAS: sprintf(name, "rtas-%s-%lld", psname, id); break; + case PSTORE_TYPE_PPC_OF: + sprintf(name, "powerpc-ofw-%s-%lld", psname, id); + break; case PSTORE_TYPE_UNKNOWN: sprintf(name, "unknown-%s-%lld", psname, id); break; diff --git a/include/linux/pstore.h b/include/linux/pstore.h index d7a8fe938c0f..615dc18638b8 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -37,6 +37,7 @@ enum pstore_type_id { PSTORE_TYPE_FTRACE = 3, /* PPC64 partition types */ PSTORE_TYPE_PPC_RTAS = 4, + PSTORE_TYPE_PPC_OF = 5, PSTORE_TYPE_UNKNOWN = 255 }; -- cgit v1.2.3 From a5e4797b0f46819a74a7233825137ed5d2f51b51 Mon Sep 17 00:00:00 2001 From: Aruna Balakrishnaiah Date: Thu, 6 Jun 2013 00:22:20 +0530 Subject: powerpc/pseries: Read common partition via pstore This patch exploits pstore subsystem to read details of common partition in NVRAM to a separate file in /dev/pstore. For instance, common partition details will be stored in a file named [common-nvram-6]. Signed-off-by: Aruna Balakrishnaiah Reviewed-by: Jim Keniston Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/nvram.c | 17 ++++++++++++++++- fs/pstore/inode.c | 3 +++ include/linux/pstore.h | 1 + 3 files changed, 20 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index f7392f6ea7b3..14cc486709f6 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -138,10 +138,17 @@ static struct nvram_os_partition of_config_partition = { .os_partition = false }; +static struct nvram_os_partition common_partition = { + .name = "common", + .index = -1, + .os_partition = false +}; + static enum pstore_type_id nvram_type_ids[] = { PSTORE_TYPE_DMESG, PSTORE_TYPE_PPC_RTAS, PSTORE_TYPE_PPC_OF, + PSTORE_TYPE_PPC_COMMON, -1 }; static int read_type; @@ -530,7 +537,7 @@ static int nvram_pstore_write(enum pstore_type_id type, } /* - * Reads the oops/panic report, rtas and of-config partition. + * Reads the oops/panic report, rtas, of-config and common partition. * Returns the length of the data we read from each partition. * Returns 0 if we've been called before. */ @@ -566,6 +573,14 @@ static ssize_t nvram_pstore_read(u64 *id, enum pstore_type_id *type, time->tv_sec = 0; time->tv_nsec = 0; break; + case PSTORE_TYPE_PPC_COMMON: + sig = NVRAM_SIG_SYS; + part = &common_partition; + *type = PSTORE_TYPE_PPC_COMMON; + *id = PSTORE_TYPE_PPC_COMMON; + time->tv_sec = 0; + time->tv_nsec = 0; + break; default: return 0; } diff --git a/fs/pstore/inode.c b/fs/pstore/inode.c index 73148aef9e31..08c3d76b24ca 100644 --- a/fs/pstore/inode.c +++ b/fs/pstore/inode.c @@ -330,6 +330,9 @@ int pstore_mkfile(enum pstore_type_id type, char *psname, u64 id, int count, case PSTORE_TYPE_PPC_OF: sprintf(name, "powerpc-ofw-%s-%lld", psname, id); break; + case PSTORE_TYPE_PPC_COMMON: + sprintf(name, "powerpc-common-%s-%lld", psname, id); + break; case PSTORE_TYPE_UNKNOWN: sprintf(name, "unknown-%s-%lld", psname, id); break; diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 615dc18638b8..656699fcc7d7 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -38,6 +38,7 @@ enum pstore_type_id { /* PPC64 partition types */ PSTORE_TYPE_PPC_RTAS = 4, PSTORE_TYPE_PPC_OF = 5, + PSTORE_TYPE_PPC_COMMON = 6, PSTORE_TYPE_UNKNOWN = 255 }; -- cgit v1.2.3 From 3766a1abc53164f058d74f950599c797cb3fe302 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Tue, 25 Jun 2013 14:15:36 -0700 Subject: mm/thp: define HPAGE_PMD_* constants as BUILD_BUG() if !THP Currently, HPAGE_PMD_* constans rely on PMD_SHIFT regardless of CONFIG_TRANSPARENT_HUGEPAGE. PMD_SHIFT is not defined everywhere (e.g. arm nommu case). It means we can't use anything like this in generic code: if (PageTransHuge(page)) zero_huge_user(page, 0, HPAGE_PMD_SIZE); else clear_highpage(page); For !THP case, PageTransHuge() is 0 and compiler can eliminate zero_huge_user() call. But it still need to be valid C expression, means HPAGE_PMD_SIZE has to expand to something compiler can understand. Previously, HPAGE_PMD_* were defined to BUILD_BUG() for !THP. Let's come back to it. Signed-off-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Benjamin Herrenschmidt --- include/linux/huge_mm.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index cc276d2c3a40..e2dbefb38e3b 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -58,11 +58,12 @@ extern pmd_t *page_check_address_pmd(struct page *page, #define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT) #define HPAGE_PMD_NR (1< Date: Thu, 27 Jun 2013 14:02:56 +0530 Subject: pstore: Pass header size in the pstore write callback Header size is needed to distinguish between header and the dump data. Incorporate the addition of new argument (hsize) in the pstore write callback. Signed-off-by: Aruna Balakrishnaiah Acked-by: Kees Cook Signed-off-by: Benjamin Herrenschmidt --- arch/powerpc/platforms/pseries/nvram.c | 4 +++- drivers/acpi/apei/erst.c | 4 ++-- drivers/firmware/efi/efi-pstore.c | 2 +- fs/pstore/platform.c | 10 ++++++---- fs/pstore/ram.c | 3 ++- include/linux/pstore.h | 8 ++++---- 6 files changed, 18 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/arch/powerpc/platforms/pseries/nvram.c b/arch/powerpc/platforms/pseries/nvram.c index 14cc486709f6..3f0e7d67d747 100644 --- a/arch/powerpc/platforms/pseries/nvram.c +++ b/arch/powerpc/platforms/pseries/nvram.c @@ -502,6 +502,7 @@ static int nvram_pstore_open(struct pstore_info *psi) * @part: pstore writes data to registered buffer in parts, * part number will indicate the same. * @count: Indicates oops count + * @hsize: Size of header added by pstore * @size: number of bytes written to the registered buffer * @psi: registered pstore_info structure * @@ -512,7 +513,8 @@ static int nvram_pstore_open(struct pstore_info *psi) static int nvram_pstore_write(enum pstore_type_id type, enum kmsg_dump_reason reason, u64 *id, unsigned int part, int count, - size_t size, struct pstore_info *psi) + size_t hsize, size_t size, + struct pstore_info *psi) { int rc; struct oops_log_info *oops_hdr = (struct oops_log_info *) oops_buf; diff --git a/drivers/acpi/apei/erst.c b/drivers/acpi/apei/erst.c index 6d894bfd8b8f..a9cf96085f85 100644 --- a/drivers/acpi/apei/erst.c +++ b/drivers/acpi/apei/erst.c @@ -935,7 +935,7 @@ static ssize_t erst_reader(u64 *id, enum pstore_type_id *type, int *count, struct timespec *time, char **buf, struct pstore_info *psi); static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason, - u64 *id, unsigned int part, int count, + u64 *id, unsigned int part, int count, size_t hsize, size_t size, struct pstore_info *psi); static int erst_clearer(enum pstore_type_id type, u64 id, int count, struct timespec time, struct pstore_info *psi); @@ -1055,7 +1055,7 @@ out: } static int erst_writer(enum pstore_type_id type, enum kmsg_dump_reason reason, - u64 *id, unsigned int part, int count, + u64 *id, unsigned int part, int count, size_t hsize, size_t size, struct pstore_info *psi) { struct cper_pstore_record *rcd = (struct cper_pstore_record *) diff --git a/drivers/firmware/efi/efi-pstore.c b/drivers/firmware/efi/efi-pstore.c index 202d2c85ba2e..452800e005b6 100644 --- a/drivers/firmware/efi/efi-pstore.c +++ b/drivers/firmware/efi/efi-pstore.c @@ -104,7 +104,7 @@ static ssize_t efi_pstore_read(u64 *id, enum pstore_type_id *type, static int efi_pstore_write(enum pstore_type_id type, enum kmsg_dump_reason reason, u64 *id, - unsigned int part, int count, size_t size, + unsigned int part, int count, size_t hsize, size_t size, struct pstore_info *psi) { char name[DUMP_NAME_LEN]; diff --git a/fs/pstore/platform.c b/fs/pstore/platform.c index 86d1038b5a12..4637ec4169cd 100644 --- a/fs/pstore/platform.c +++ b/fs/pstore/platform.c @@ -159,7 +159,7 @@ static void pstore_dump(struct kmsg_dumper *dumper, break; ret = psinfo->write(PSTORE_TYPE_DMESG, reason, &id, part, - oopscount, hsize + len, psinfo); + oopscount, hsize, hsize + len, psinfo); if (ret == 0 && reason == KMSG_DUMP_OOPS && pstore_is_mounted()) pstore_new_entry = 1; @@ -196,7 +196,7 @@ static void pstore_console_write(struct console *con, const char *s, unsigned c) spin_lock_irqsave(&psinfo->buf_lock, flags); } memcpy(psinfo->buf, s, c); - psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, c, psinfo); + psinfo->write(PSTORE_TYPE_CONSOLE, 0, &id, 0, 0, 0, c, psinfo); spin_unlock_irqrestore(&psinfo->buf_lock, flags); s += c; c = e - s; @@ -221,9 +221,11 @@ static void pstore_register_console(void) {} static int pstore_write_compat(enum pstore_type_id type, enum kmsg_dump_reason reason, u64 *id, unsigned int part, int count, - size_t size, struct pstore_info *psi) + size_t hsize, size_t size, + struct pstore_info *psi) { - return psi->write_buf(type, reason, id, part, psinfo->buf, size, psi); + return psi->write_buf(type, reason, id, part, psinfo->buf, hsize, + size, psi); } /* diff --git a/fs/pstore/ram.c b/fs/pstore/ram.c index 1376e5a8f0d6..c6bb77ca35b5 100644 --- a/fs/pstore/ram.c +++ b/fs/pstore/ram.c @@ -195,7 +195,8 @@ static size_t ramoops_write_kmsg_hdr(struct persistent_ram_zone *prz) static int notrace ramoops_pstore_write_buf(enum pstore_type_id type, enum kmsg_dump_reason reason, u64 *id, unsigned int part, - const char *buf, size_t size, + const char *buf, + size_t hsize, size_t size, struct pstore_info *psi) { struct ramoops_context *cxt = psi->data; diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 656699fcc7d7..4aa80ba830a2 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -58,12 +58,12 @@ struct pstore_info { struct pstore_info *psi); int (*write)(enum pstore_type_id type, enum kmsg_dump_reason reason, u64 *id, - unsigned int part, int count, size_t size, - struct pstore_info *psi); + unsigned int part, int count, size_t hsize, + size_t size, struct pstore_info *psi); int (*write_buf)(enum pstore_type_id type, enum kmsg_dump_reason reason, u64 *id, - unsigned int part, const char *buf, size_t size, - struct pstore_info *psi); + unsigned int part, const char *buf, size_t hsize, + size_t size, struct pstore_info *psi); int (*erase)(enum pstore_type_id type, u64 id, int count, struct timespec time, struct pstore_info *psi); -- cgit v1.2.3