From 16b1d26e77b142546e2b9b6dc3b5aa5c44ae3b77 Mon Sep 17 00:00:00 2001 From: Neelesh Gupta Date: Tue, 14 Oct 2014 14:08:36 +0530 Subject: rtc/tpo: Driver to support rtc and wakeup on PowerNV platform The patch implements the OPAL rtc driver that binds with the rtc driver subsystem. The driver uses the platform device infrastructure to probe the rtc device and register it to rtc class framework. The 'wakeup' is supported depending upon the property 'has-tpo' present in the OF node. It provides a way to load the generic rtc driver in in the absence of an OPAL driver. The patch also moves the existing OPAL rtc get/set time interfaces to the new driver and exposes the necessary OPAL calls using EXPORT_SYMBOL_GPL. Test results: ------------- Host: [root@tul169p1 ~]# ls -l /sys/class/rtc/ total 0 lrwxrwxrwx 1 root root 0 Oct 14 03:07 rtc0 -> ../../devices/opal-rtc/rtc/rtc0 [root@tul169p1 ~]# cat /sys/devices/opal-rtc/rtc/rtc0/time 08:10:07 [root@tul169p1 ~]# echo `date '+%s' -d '+ 2 minutes'` > /sys/class/rtc/rtc0/wakealarm [root@tul169p1 ~]# cat /sys/class/rtc/rtc0/wakealarm 1413274345 [root@tul169p1 ~]# FSP: $ smgr mfgState standby $ rtim timeofday System time is valid: 2014/10/14 08:12:04.225115 $ smgr mfgState ipling $ CC: devicetree@vger.kernel.org CC: tglx@linutronix.de CC: rtc-linux@googlegroups.com CC: a.zummo@towertech.it Signed-off-by: Neelesh Gupta Signed-off-by: Michael Ellerman --- drivers/rtc/Kconfig | 11 +++ drivers/rtc/Makefile | 1 + drivers/rtc/rtc-opal.c | 261 +++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 273 insertions(+) create mode 100644 drivers/rtc/rtc-opal.c (limited to 'drivers') diff --git a/drivers/rtc/Kconfig b/drivers/rtc/Kconfig index 6dd12ddbabc6..c8f0ec7464ce 100644 --- a/drivers/rtc/Kconfig +++ b/drivers/rtc/Kconfig @@ -979,6 +979,17 @@ config RTC_DRV_NUC900 If you say yes here you get support for the RTC subsystem of the NUC910/NUC920 used in embedded systems. +config RTC_DRV_OPAL + tristate "IBM OPAL RTC driver" + depends on PPC_POWERNV + default y + help + If you say yes here you get support for the PowerNV platform RTC + driver based on OPAL interfaces. + + This driver can also be built as a module. If so, the module + will be called rtc-opal. + comment "on-CPU RTC drivers" config RTC_DRV_DAVINCI diff --git a/drivers/rtc/Makefile b/drivers/rtc/Makefile index b188323c096a..c8ef3e1e6ccd 100644 --- a/drivers/rtc/Makefile +++ b/drivers/rtc/Makefile @@ -92,6 +92,7 @@ obj-$(CONFIG_RTC_DRV_MSM6242) += rtc-msm6242.o obj-$(CONFIG_RTC_DRV_MPC5121) += rtc-mpc5121.o obj-$(CONFIG_RTC_DRV_MV) += rtc-mv.o obj-$(CONFIG_RTC_DRV_NUC900) += rtc-nuc900.o +obj-$(CONFIG_RTC_DRV_OPAL) += rtc-opal.o obj-$(CONFIG_RTC_DRV_OMAP) += rtc-omap.o obj-$(CONFIG_RTC_DRV_PALMAS) += rtc-palmas.o obj-$(CONFIG_RTC_DRV_PCAP) += rtc-pcap.o diff --git a/drivers/rtc/rtc-opal.c b/drivers/rtc/rtc-opal.c new file mode 100644 index 000000000000..95f652165fe9 --- /dev/null +++ b/drivers/rtc/rtc-opal.c @@ -0,0 +1,261 @@ +/* + * IBM OPAL RTC driver + * Copyright (C) 2014 IBM + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. + */ + +#define DRVNAME "rtc-opal" +#define pr_fmt(fmt) DRVNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +static void opal_to_tm(u32 y_m_d, u64 h_m_s_ms, struct rtc_time *tm) +{ + tm->tm_year = ((bcd2bin(y_m_d >> 24) * 100) + + bcd2bin((y_m_d >> 16) & 0xff)) - 1900; + tm->tm_mon = bcd2bin((y_m_d >> 8) & 0xff) - 1; + tm->tm_mday = bcd2bin(y_m_d & 0xff); + tm->tm_hour = bcd2bin((h_m_s_ms >> 56) & 0xff); + tm->tm_min = bcd2bin((h_m_s_ms >> 48) & 0xff); + tm->tm_sec = bcd2bin((h_m_s_ms >> 40) & 0xff); + + GregorianDay(tm); +} + +static void tm_to_opal(struct rtc_time *tm, u32 *y_m_d, u64 *h_m_s_ms) +{ + *y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) / 100)) << 24; + *y_m_d |= ((u32)bin2bcd((tm->tm_year + 1900) % 100)) << 16; + *y_m_d |= ((u32)bin2bcd((tm->tm_mon + 1))) << 8; + *y_m_d |= ((u32)bin2bcd(tm->tm_mday)); + + *h_m_s_ms |= ((u64)bin2bcd(tm->tm_hour)) << 56; + *h_m_s_ms |= ((u64)bin2bcd(tm->tm_min)) << 48; + *h_m_s_ms |= ((u64)bin2bcd(tm->tm_sec)) << 40; +} + +static int opal_get_rtc_time(struct device *dev, struct rtc_time *tm) +{ + long rc = OPAL_BUSY; + u32 y_m_d; + u64 h_m_s_ms; + __be32 __y_m_d; + __be64 __h_m_s_ms; + + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_rtc_read(&__y_m_d, &__h_m_s_ms); + if (rc == OPAL_BUSY_EVENT) + opal_poll_events(NULL); + else + msleep(10); + } + + if (rc != OPAL_SUCCESS) + return -EIO; + + y_m_d = be32_to_cpu(__y_m_d); + h_m_s_ms = be64_to_cpu(__h_m_s_ms); + opal_to_tm(y_m_d, h_m_s_ms, tm); + + return 0; +} + +static int opal_set_rtc_time(struct device *dev, struct rtc_time *tm) +{ + long rc = OPAL_BUSY; + u32 y_m_d = 0; + u64 h_m_s_ms = 0; + + tm_to_opal(tm, &y_m_d, &h_m_s_ms); + while (rc == OPAL_BUSY || rc == OPAL_BUSY_EVENT) { + rc = opal_rtc_write(y_m_d, h_m_s_ms); + if (rc == OPAL_BUSY_EVENT) + opal_poll_events(NULL); + else + msleep(10); + } + + return rc == OPAL_SUCCESS ? 0 : -EIO; +} + +/* + * TPO Timed Power-On + * + * TPO get/set OPAL calls care about the hour and min and to make it consistent + * with the rtc utility time conversion functions, we use the 'u64' to store + * its value and perform bit shift by 32 before use.. + */ +static int opal_get_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) +{ + __be32 __y_m_d, __h_m; + struct opal_msg msg; + int rc, token; + u64 h_m_s_ms; + u32 y_m_d; + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("Failed to get the async token\n"); + + return token; + } + + rc = opal_tpo_read(token, &__y_m_d, &__h_m); + if (rc != OPAL_ASYNC_COMPLETION) { + rc = -EIO; + goto exit; + } + + rc = opal_async_wait_response(token, &msg); + if (rc) { + rc = -EIO; + goto exit; + } + + rc = be64_to_cpu(msg.params[1]); + if (rc != OPAL_SUCCESS) { + rc = -EIO; + goto exit; + } + + y_m_d = be32_to_cpu(__y_m_d); + h_m_s_ms = ((u64)be32_to_cpu(__h_m) << 32); + opal_to_tm(y_m_d, h_m_s_ms, &alarm->time); + +exit: + opal_async_release_token(token); + return rc; +} + +/* Set Timed Power-On */ +static int opal_set_tpo_time(struct device *dev, struct rtc_wkalrm *alarm) +{ + u64 h_m_s_ms = 0, token; + struct opal_msg msg; + u32 y_m_d = 0; + int rc; + + tm_to_opal(&alarm->time, &y_m_d, &h_m_s_ms); + + token = opal_async_get_token_interruptible(); + if (token < 0) { + if (token != -ERESTARTSYS) + pr_err("Failed to get the async token\n"); + + return token; + } + + /* TPO, we care about hour and minute */ + rc = opal_tpo_write(token, y_m_d, + (u32)((h_m_s_ms >> 32) & 0xffff0000)); + if (rc != OPAL_ASYNC_COMPLETION) { + rc = -EIO; + goto exit; + } + + rc = opal_async_wait_response(token, &msg); + if (rc) { + rc = -EIO; + goto exit; + } + + rc = be64_to_cpu(msg.params[1]); + if (rc != OPAL_SUCCESS) + rc = -EIO; + +exit: + opal_async_release_token(token); + return rc; +} + +static const struct rtc_class_ops opal_rtc_ops = { + .read_time = opal_get_rtc_time, + .set_time = opal_set_rtc_time, + .read_alarm = opal_get_tpo_time, + .set_alarm = opal_set_tpo_time, +}; + +static int opal_rtc_probe(struct platform_device *pdev) +{ + struct rtc_device *rtc; + + if (pdev->dev.of_node && of_get_property(pdev->dev.of_node, "has-tpo", + NULL)) + device_set_wakeup_capable(&pdev->dev, true); + + rtc = devm_rtc_device_register(&pdev->dev, DRVNAME, &opal_rtc_ops, + THIS_MODULE); + if (IS_ERR(rtc)) + return PTR_ERR(rtc); + + rtc->uie_unsupported = 1; + + return 0; +} + +static const struct of_device_id opal_rtc_match[] = { + { + .compatible = "ibm,opal-rtc", + }, + { } +}; +MODULE_DEVICE_TABLE(of, opal_rtc_match); + +static const struct platform_device_id opal_rtc_driver_ids[] = { + { + .name = "opal-rtc", + }, + { } +}; +MODULE_DEVICE_TABLE(platform, opal_rtc_driver_ids); + +static struct platform_driver opal_rtc_driver = { + .probe = opal_rtc_probe, + .id_table = opal_rtc_driver_ids, + .driver = { + .name = DRVNAME, + .owner = THIS_MODULE, + .of_match_table = opal_rtc_match, + }, +}; + +static int __init opal_rtc_init(void) +{ + if (!firmware_has_feature(FW_FEATURE_OPAL)) + return -ENODEV; + + return platform_driver_register(&opal_rtc_driver); +} + +static void __exit opal_rtc_exit(void) +{ + platform_driver_unregister(&opal_rtc_driver); +} + +MODULE_AUTHOR("Neelesh Gupta "); +MODULE_DESCRIPTION("IBM OPAL RTC driver"); +MODULE_LICENSE("GPL"); + +module_init(opal_rtc_init); +module_exit(opal_rtc_exit); -- cgit v1.2.3 From bc78b05bb412fad135715551fc536ca511a3cff2 Mon Sep 17 00:00:00 2001 From: Ian Munsie Date: Fri, 14 Nov 2014 17:37:50 +1100 Subject: cxl: Return error to PSL if IRQ demultiplexing fails & print clearer warning If an AFU has a hardware bug that causes it to acknowledge a context terminate or remove while that context has outstanding transactions, it is possible for the kernel to receive an interrupt for that context after we have removed it from the context list. The kernel will not be able to demultiplex the interrupt (or worse - if we have already reallocated the process handle we could mis-attribute it to the new context), and printed a big scary warning. It did not acknowledge the interrupt, which would effectively halt further translation fault processing on the PSL. This patch makes the warning clearer about the likely cause of the issue (i.e. hardware bug) to make it obvious to future AFU designers of what needs to be fixed. It also prints out the process handle which can then be matched up with hardware and software traces for debugging. It also acknowledges the interrupt to the PSL with either an address error or acknowledge, so that the PSL can continue with other translations. Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/cxl.h | 2 +- drivers/misc/cxl/irq.c | 46 +++++++++++++++++++++++++++++----------------- drivers/misc/cxl/native.c | 14 +++++++------- 3 files changed, 37 insertions(+), 25 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 3d2b8677ec8a..64a4aa3a5c5d 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -612,7 +612,7 @@ int cxl_attach_process(struct cxl_context *ctx, bool kernel, u64 wed, u64 amr); int cxl_detach_process(struct cxl_context *ctx); -int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info); +int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info); int cxl_ack_irq(struct cxl_context *ctx, u64 tfc, u64 psl_reset_mask); int cxl_check_error(struct cxl_afu *afu); diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 336020c8e1af..35fcb3d43dc0 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -92,20 +92,13 @@ static irqreturn_t schedule_cxl_fault(struct cxl_context *ctx, u64 dsisr, u64 da return IRQ_HANDLED; } -static irqreturn_t cxl_irq(int irq, void *data) +static irqreturn_t cxl_irq(int irq, void *data, struct cxl_irq_info *irq_info) { struct cxl_context *ctx = data; - struct cxl_irq_info irq_info; u64 dsisr, dar; - int result; - if ((result = cxl_get_irq(ctx, &irq_info))) { - WARN(1, "Unable to get CXL IRQ Info: %i\n", result); - return IRQ_HANDLED; - } - - dsisr = irq_info.dsisr; - dar = irq_info.dar; + dsisr = irq_info->dsisr; + dar = irq_info->dar; pr_devel("CXL interrupt %i for afu pe: %i DSISR: %#llx DAR: %#llx\n", irq, ctx->pe, dsisr, dar); @@ -149,9 +142,9 @@ static irqreturn_t cxl_irq(int irq, void *data) if (dsisr & CXL_PSL_DSISR_An_UR) pr_devel("CXL interrupt: AURP PTE not found\n"); if (dsisr & CXL_PSL_DSISR_An_PE) - return handle_psl_slice_error(ctx, dsisr, irq_info.errstat); + return handle_psl_slice_error(ctx, dsisr, irq_info->errstat); if (dsisr & CXL_PSL_DSISR_An_AE) { - pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info.afu_err); + pr_devel("CXL interrupt: AFU Error %.llx\n", irq_info->afu_err); if (ctx->pending_afu_err) { /* @@ -163,10 +156,10 @@ static irqreturn_t cxl_irq(int irq, void *data) */ dev_err_ratelimited(&ctx->afu->dev, "CXL AFU Error " "undelivered to pe %i: %.llx\n", - ctx->pe, irq_info.afu_err); + ctx->pe, irq_info->afu_err); } else { spin_lock(&ctx->lock); - ctx->afu_err = irq_info.afu_err; + ctx->afu_err = irq_info->afu_err; ctx->pending_afu_err = 1; spin_unlock(&ctx->lock); @@ -182,24 +175,43 @@ static irqreturn_t cxl_irq(int irq, void *data) return IRQ_HANDLED; } +static irqreturn_t fail_psl_irq(struct cxl_afu *afu, struct cxl_irq_info *irq_info) +{ + if (irq_info->dsisr & CXL_PSL_DSISR_TRANS) + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_AE); + else + cxl_p2n_write(afu, CXL_PSL_TFC_An, CXL_PSL_TFC_An_A); + + return IRQ_HANDLED; +} + static irqreturn_t cxl_irq_multiplexed(int irq, void *data) { struct cxl_afu *afu = data; struct cxl_context *ctx; + struct cxl_irq_info irq_info; int ph = cxl_p2n_read(afu, CXL_PSL_PEHandle_An) & 0xffff; int ret; + if ((ret = cxl_get_irq(afu, &irq_info))) { + WARN(1, "Unable to get CXL IRQ Info: %i\n", ret); + return fail_psl_irq(afu, &irq_info); + } + rcu_read_lock(); ctx = idr_find(&afu->contexts_idr, ph); if (ctx) { - ret = cxl_irq(irq, ctx); + ret = cxl_irq(irq, ctx, &irq_info); rcu_read_unlock(); return ret; } rcu_read_unlock(); - WARN(1, "Unable to demultiplex CXL PSL IRQ\n"); - return IRQ_HANDLED; + WARN(1, "Unable to demultiplex CXL PSL IRQ for PE %i DSISR %.16llx DAR" + " %.16llx\n(Possible AFU HW issue - was a term/remove acked" + " with outstanding transactions?)\n", ph, irq_info.dsisr, + irq_info.dar); + return fail_psl_irq(afu, &irq_info); } static irqreturn_t cxl_irq_afu(int irq, void *data) diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index d47532e8f4f1..9a5a442269a8 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -637,18 +637,18 @@ int cxl_detach_process(struct cxl_context *ctx) return detach_process_native_afu_directed(ctx); } -int cxl_get_irq(struct cxl_context *ctx, struct cxl_irq_info *info) +int cxl_get_irq(struct cxl_afu *afu, struct cxl_irq_info *info) { u64 pidtid; - info->dsisr = cxl_p2n_read(ctx->afu, CXL_PSL_DSISR_An); - info->dar = cxl_p2n_read(ctx->afu, CXL_PSL_DAR_An); - info->dsr = cxl_p2n_read(ctx->afu, CXL_PSL_DSR_An); - pidtid = cxl_p2n_read(ctx->afu, CXL_PSL_PID_TID_An); + info->dsisr = cxl_p2n_read(afu, CXL_PSL_DSISR_An); + info->dar = cxl_p2n_read(afu, CXL_PSL_DAR_An); + info->dsr = cxl_p2n_read(afu, CXL_PSL_DSR_An); + pidtid = cxl_p2n_read(afu, CXL_PSL_PID_TID_An); info->pid = pidtid >> 32; info->tid = pidtid & 0xffffffff; - info->afu_err = cxl_p2n_read(ctx->afu, CXL_AFU_ERR_An); - info->errstat = cxl_p2n_read(ctx->afu, CXL_PSL_ErrStat_An); + info->afu_err = cxl_p2n_read(afu, CXL_AFU_ERR_An); + info->errstat = cxl_p2n_read(afu, CXL_PSL_ErrStat_An); return 0; } -- cgit v1.2.3 From 80fa93fce37d3490f4bb0da8a5b239a6745bc744 Mon Sep 17 00:00:00 2001 From: Michael Neuling Date: Fri, 14 Nov 2014 18:09:28 +1100 Subject: cxl: Name interrupts in /proc/interrupt Currently all interrupts generated by cxl are named "cxl". This is not very informative as we can't distinguish between cards, AFUs, error interrupts, user contexts and user interrupts numbers. Being able to distinguish them is useful for setting affinity. This patch gives each of these names in /proc/interrupts. A two card CAPI system, with afu0.0 having 2 active contexts each with 4 user IRQs each, will now look like this: % grep cxl /proc/interrupts 444: 0 OPAL ICS 141312 Level cxl-card1-err 445: 0 OPAL ICS 141313 Level cxl-afu1.0-err 446: 0 OPAL ICS 141314 Level cxl-afu1.0 462: 0 OPAL ICS 2052 Level cxl-afu0.0-pe0-1 463: 75517 OPAL ICS 2053 Level cxl-afu0.0-pe0-2 468: 0 OPAL ICS 2054 Level cxl-afu0.0-pe0-3 469: 0 OPAL ICS 2055 Level cxl-afu0.0-pe0-4 470: 0 OPAL ICS 2056 Level cxl-afu0.0-pe1-1 471: 75506 OPAL ICS 2057 Level cxl-afu0.0-pe1-2 472: 0 OPAL ICS 2058 Level cxl-afu0.0-pe1-3 473: 0 OPAL ICS 2059 Level cxl-afu0.0-pe1-4 502: 1066 OPAL ICS 2050 Level cxl-afu0.0 514: 0 OPAL ICS 2048 Level cxl-card0-err 515: 0 OPAL ICS 2049 Level cxl-afu0.0-err Signed-off-by: Michael Neuling Signed-off-by: Ian Munsie Signed-off-by: Michael Ellerman --- drivers/misc/cxl/cxl.h | 13 +++++-- drivers/misc/cxl/irq.c | 98 ++++++++++++++++++++++++++++++++++++++++++++------ 2 files changed, 98 insertions(+), 13 deletions(-) (limited to 'drivers') diff --git a/drivers/misc/cxl/cxl.h b/drivers/misc/cxl/cxl.h index 64a4aa3a5c5d..b5b6bda44a00 100644 --- a/drivers/misc/cxl/cxl.h +++ b/drivers/misc/cxl/cxl.h @@ -336,6 +336,8 @@ struct cxl_sste { struct cxl_afu { irq_hw_number_t psl_hwirq; irq_hw_number_t serr_hwirq; + char *err_irq_name; + char *psl_irq_name; unsigned int serr_virq; void __iomem *p1n_mmio; void __iomem *p2n_mmio; @@ -379,6 +381,12 @@ struct cxl_afu { bool enabled; }; + +struct cxl_irq_name { + struct list_head list; + char *name; +}; + /* * This is a cxl context. If the PSL is in dedicated mode, there will be one * of these per AFU. If in AFU directed there can be lots of these. @@ -403,6 +411,7 @@ struct cxl_context { unsigned long *irq_bitmap; /* Accessed from IRQ context */ struct cxl_irq_ranges irqs; + struct list_head irq_names; u64 fault_addr; u64 fault_dsisr; u64 afu_err; @@ -444,6 +453,7 @@ struct cxl { struct dentry *trace; struct dentry *psl_err_chk; struct dentry *debugfs; + char *irq_name; struct bin_attribute cxl_attr; int adapter_num; int user_irqs; @@ -563,9 +573,6 @@ int _cxl_afu_deactivate_mode(struct cxl_afu *afu, int mode); int cxl_afu_deactivate_mode(struct cxl_afu *afu); int cxl_afu_select_best_mode(struct cxl_afu *afu); -unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, - irq_handler_t handler, void *cookie); -void cxl_unmap_irq(unsigned int virq, void *cookie); int cxl_register_psl_irq(struct cxl_afu *afu); void cxl_release_psl_irq(struct cxl_afu *afu); int cxl_register_psl_err_irq(struct cxl *adapter); diff --git a/drivers/misc/cxl/irq.c b/drivers/misc/cxl/irq.c index 35fcb3d43dc0..c294925f73ee 100644 --- a/drivers/misc/cxl/irq.c +++ b/drivers/misc/cxl/irq.c @@ -255,7 +255,7 @@ static irqreturn_t cxl_irq_afu(int irq, void *data) } unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, - irq_handler_t handler, void *cookie) + irq_handler_t handler, void *cookie, const char *name) { unsigned int virq; int result; @@ -271,7 +271,7 @@ unsigned int cxl_map_irq(struct cxl *adapter, irq_hw_number_t hwirq, pr_devel("hwirq %#lx mapped to virq %u\n", hwirq, virq); - result = request_irq(virq, handler, 0, "cxl", cookie); + result = request_irq(virq, handler, 0, name, cookie); if (result) { dev_warn(&adapter->dev, "cxl_map_irq: request_irq failed: %i\n", result); return 0; @@ -290,14 +290,15 @@ static int cxl_register_one_irq(struct cxl *adapter, irq_handler_t handler, void *cookie, irq_hw_number_t *dest_hwirq, - unsigned int *dest_virq) + unsigned int *dest_virq, + const char *name) { int hwirq, virq; if ((hwirq = cxl_alloc_one_irq(adapter)) < 0) return hwirq; - if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie))) + if (!(virq = cxl_map_irq(adapter, hwirq, handler, cookie, name))) goto err; *dest_hwirq = hwirq; @@ -314,10 +315,19 @@ int cxl_register_psl_err_irq(struct cxl *adapter) { int rc; + adapter->irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&adapter->dev)); + if (!adapter->irq_name) + return -ENOMEM; + if ((rc = cxl_register_one_irq(adapter, cxl_irq_err, adapter, &adapter->err_hwirq, - &adapter->err_virq))) + &adapter->err_virq, + adapter->irq_name))) { + kfree(adapter->irq_name); + adapter->irq_name = NULL; return rc; + } cxl_p1_write(adapter, CXL_PSL_ErrIVTE, adapter->err_hwirq & 0xffff); @@ -329,6 +339,7 @@ void cxl_release_psl_err_irq(struct cxl *adapter) cxl_p1_write(adapter, CXL_PSL_ErrIVTE, 0x0000000000000000); cxl_unmap_irq(adapter->err_virq, adapter); cxl_release_one_irq(adapter, adapter->err_hwirq); + kfree(adapter->irq_name); } int cxl_register_serr_irq(struct cxl_afu *afu) @@ -336,10 +347,18 @@ int cxl_register_serr_irq(struct cxl_afu *afu) u64 serr; int rc; + afu->err_irq_name = kasprintf(GFP_KERNEL, "cxl-%s-err", + dev_name(&afu->dev)); + if (!afu->err_irq_name) + return -ENOMEM; + if ((rc = cxl_register_one_irq(afu->adapter, cxl_slice_irq_err, afu, &afu->serr_hwirq, - &afu->serr_virq))) + &afu->serr_virq, afu->err_irq_name))) { + kfree(afu->err_irq_name); + afu->err_irq_name = NULL; return rc; + } serr = cxl_p1n_read(afu, CXL_PSL_SERR_An); serr = (serr & 0x00ffffffffff0000ULL) | (afu->serr_hwirq & 0xffff); @@ -353,24 +372,50 @@ void cxl_release_serr_irq(struct cxl_afu *afu) cxl_p1n_write(afu, CXL_PSL_SERR_An, 0x0000000000000000); cxl_unmap_irq(afu->serr_virq, afu); cxl_release_one_irq(afu->adapter, afu->serr_hwirq); + kfree(afu->err_irq_name); } int cxl_register_psl_irq(struct cxl_afu *afu) { - return cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu, - &afu->psl_hwirq, &afu->psl_virq); + int rc; + + afu->psl_irq_name = kasprintf(GFP_KERNEL, "cxl-%s", + dev_name(&afu->dev)); + if (!afu->psl_irq_name) + return -ENOMEM; + + if ((rc = cxl_register_one_irq(afu->adapter, cxl_irq_multiplexed, afu, + &afu->psl_hwirq, &afu->psl_virq, + afu->psl_irq_name))) { + kfree(afu->psl_irq_name); + afu->psl_irq_name = NULL; + } + return rc; } void cxl_release_psl_irq(struct cxl_afu *afu) { cxl_unmap_irq(afu->psl_virq, afu); cxl_release_one_irq(afu->adapter, afu->psl_hwirq); + kfree(afu->psl_irq_name); +} + +void afu_irq_name_free(struct cxl_context *ctx) +{ + struct cxl_irq_name *irq_name, *tmp; + + list_for_each_entry_safe(irq_name, tmp, &ctx->irq_names, list) { + kfree(irq_name->name); + list_del(&irq_name->list); + kfree(irq_name); + } } int afu_register_irqs(struct cxl_context *ctx, u32 count) { irq_hw_number_t hwirq; - int rc, r, i; + int rc, r, i, j = 1; + struct cxl_irq_name *irq_name; if ((rc = cxl_alloc_irq_ranges(&ctx->irqs, ctx->afu->adapter, count))) return rc; @@ -384,15 +429,47 @@ int afu_register_irqs(struct cxl_context *ctx, u32 count) sizeof(*ctx->irq_bitmap), GFP_KERNEL); if (!ctx->irq_bitmap) return -ENOMEM; + + /* + * Allocate names first. If any fail, bail out before allocating + * actual hardware IRQs. + */ + INIT_LIST_HEAD(&ctx->irq_names); + for (r = 1; r < CXL_IRQ_RANGES; r++) { + for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { + irq_name = kmalloc(sizeof(struct cxl_irq_name), + GFP_KERNEL); + if (!irq_name) + goto out; + irq_name->name = kasprintf(GFP_KERNEL, "cxl-%s-pe%i-%i", + dev_name(&ctx->afu->dev), + ctx->pe, j); + if (!irq_name->name) { + kfree(irq_name); + goto out; + } + /* Add to tail so next look get the correct order */ + list_add_tail(&irq_name->list, &ctx->irq_names); + j++; + } + } + + /* We've allocated all memory now, so let's do the irq allocations */ + irq_name = list_first_entry(&ctx->irq_names, struct cxl_irq_name, list); for (r = 1; r < CXL_IRQ_RANGES; r++) { hwirq = ctx->irqs.offset[r]; for (i = 0; i < ctx->irqs.range[r]; hwirq++, i++) { cxl_map_irq(ctx->afu->adapter, hwirq, - cxl_irq_afu, ctx); + cxl_irq_afu, ctx, irq_name->name); + irq_name = list_next_entry(irq_name, list); } } return 0; + +out: + afu_irq_name_free(ctx); + return -ENOMEM; } void afu_release_irqs(struct cxl_context *ctx) @@ -410,5 +487,6 @@ void afu_release_irqs(struct cxl_context *ctx) } } + afu_irq_name_free(ctx); cxl_release_irq_ranges(&ctx->irqs, ctx->afu->adapter); } -- cgit v1.2.3 From aefa5688c070727b8729de1aef85cad7b9933fc7 Mon Sep 17 00:00:00 2001 From: "Aneesh Kumar K.V" Date: Thu, 4 Dec 2014 11:00:14 +0530 Subject: powerpc/mm: don't do tlbie for updatepp request with NO HPTE fault upatepp can get called for a nohpte fault when we find from the linux page table that the translation was hashed before. In that case we are sure that there is no existing translation, hence we could avoid doing tlbie. We could possibly race with a parallel fault filling the TLB. But that should be ok because updatepp is only ever relaxing permissions. We also look at linux pte permission bits when filling hash pte permission bits. We also hold the linux pte busy bits while inserting/updating a hashpte entry, hence a paralle update of linux pte is not possible. On the other hand mprotect involves ptep_modify_prot_start which cause a hpte invalidate and not updatepp. Performance number: We use randbox_access_bench written by Anton. Kernel with THP disabled and smaller hash page table size. 86.60% random_access_b [kernel.kallsyms] [k] .native_hpte_updatepp 2.10% random_access_b random_access_bench [.] doit 1.99% random_access_b [kernel.kallsyms] [k] .do_raw_spin_lock 1.85% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 1.26% random_access_b [kernel.kallsyms] [k] .native_flush_hash_range 1.18% random_access_b [kernel.kallsyms] [k] .__delay 0.69% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 0.37% random_access_b [kernel.kallsyms] [k] .clear_user_page 0.34% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 0.32% random_access_b [kernel.kallsyms] [k] fast_exception_return 0.30% random_access_b [kernel.kallsyms] [k] .hash_page_mm With Fix: 27.54% random_access_b random_access_bench [.] doit 22.90% random_access_b [kernel.kallsyms] [k] .native_hpte_insert 5.76% random_access_b [kernel.kallsyms] [k] .native_hpte_remove 5.20% random_access_b [kernel.kallsyms] [k] fast_exception_return 5.12% random_access_b [kernel.kallsyms] [k] .__hash_page_64K 4.80% random_access_b [kernel.kallsyms] [k] .hash_page_mm 3.31% random_access_b [kernel.kallsyms] [k] data_access_common 1.84% random_access_b [kernel.kallsyms] [k] .trace_hardirqs_on_caller Signed-off-by: Aneesh Kumar K.V Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/machdep.h | 2 +- arch/powerpc/include/asm/mmu-hash64.h | 22 ++++++++++------ arch/powerpc/include/asm/tlbflush.h | 4 +-- arch/powerpc/kernel/exceptions-64s.S | 2 ++ arch/powerpc/mm/hash_low_64.S | 15 ++++++----- arch/powerpc/mm/hash_native_64.c | 15 ++++++++--- arch/powerpc/mm/hash_utils_64.c | 44 ++++++++++++++++++++----------- arch/powerpc/mm/hugepage-hash64.c | 8 +++--- arch/powerpc/mm/hugetlbpage-hash64.c | 6 ++--- arch/powerpc/mm/pgtable_64.c | 7 ++--- arch/powerpc/platforms/cell/beat_htab.c | 4 +-- arch/powerpc/platforms/cell/spu_base.c | 5 ++-- arch/powerpc/platforms/cell/spufs/fault.c | 2 +- arch/powerpc/platforms/ps3/htab.c | 2 +- arch/powerpc/platforms/pseries/lpar.c | 2 +- drivers/misc/cxl/fault.c | 8 ++++-- 16 files changed, 91 insertions(+), 57 deletions(-) (limited to 'drivers') diff --git a/arch/powerpc/include/asm/machdep.h b/arch/powerpc/include/asm/machdep.h index e5c0919acca4..c8175a3fe560 100644 --- a/arch/powerpc/include/asm/machdep.h +++ b/arch/powerpc/include/asm/machdep.h @@ -42,7 +42,7 @@ struct machdep_calls { unsigned long newpp, unsigned long vpn, int bpsize, int apsize, - int ssize, int local); + int ssize, unsigned long flags); void (*hpte_updateboltedpp)(unsigned long newpp, unsigned long ea, int psize, int ssize); diff --git a/arch/powerpc/include/asm/mmu-hash64.h b/arch/powerpc/include/asm/mmu-hash64.h index aeebc94b2bce..4f13c3ed7acf 100644 --- a/arch/powerpc/include/asm/mmu-hash64.h +++ b/arch/powerpc/include/asm/mmu-hash64.h @@ -316,27 +316,33 @@ static inline unsigned long hpt_hash(unsigned long vpn, return hash & 0x7fffffffffUL; } +#define HPTE_LOCAL_UPDATE 0x1 +#define HPTE_NOHPTE_UPDATE 0x2 + extern int __hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local, int ssize, int subpage_prot); + unsigned long flags, int ssize, int subpage_prot); extern int __hash_page_64K(unsigned long ea, unsigned long access, unsigned long vsid, pte_t *ptep, unsigned long trap, - unsigned int local, int ssize); + unsigned long flags, int ssize); struct mm_struct; unsigned int hash_page_do_lazy_icache(unsigned int pp, pte_t pte, int trap); -extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap); -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); +extern int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags); +extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize); + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize); #ifdef CONFIG_TRANSPARENT_HUGEPAGE extern int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, unsigned long trap, - int local, int ssize, unsigned int psize); + unsigned long flags, int ssize, unsigned int psize); #else static inline int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, pmd_t *pmdp, - unsigned long trap, int local, + unsigned long trap, unsigned long flags, int ssize, unsigned int psize) { BUG(); diff --git a/arch/powerpc/include/asm/tlbflush.h b/arch/powerpc/include/asm/tlbflush.h index 4d3ecd8d8929..23d351ca0303 100644 --- a/arch/powerpc/include/asm/tlbflush.h +++ b/arch/powerpc/include/asm/tlbflush.h @@ -125,11 +125,11 @@ static inline void arch_leave_lazy_mmu_mode(void) extern void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, - int ssize, int local); + int ssize, unsigned long flags); extern void flush_hash_range(unsigned long number, int local); extern void flush_hash_hugepage(unsigned long vsid, unsigned long addr, pmd_t *pmdp, unsigned int psize, int ssize, - int local); + unsigned long flags); static inline void local_flush_tlb_mm(struct mm_struct *mm) { diff --git a/arch/powerpc/kernel/exceptions-64s.S b/arch/powerpc/kernel/exceptions-64s.S index ad62f4d6ce31..6213f494f40b 100644 --- a/arch/powerpc/kernel/exceptions-64s.S +++ b/arch/powerpc/kernel/exceptions-64s.S @@ -1565,9 +1565,11 @@ do_hash_page: * r3 contains the faulting address * r4 contains the required access permissions * r5 contains the trap number + * r6 contains dsisr * * at return r3 = 0 for success, 1 for page fault, negative for error */ + ld r6,_DSISR(r1) bl hash_page /* build HPTE if possible */ cmpdi r3,0 /* see if hash_page succeeded */ diff --git a/arch/powerpc/mm/hash_low_64.S b/arch/powerpc/mm/hash_low_64.S index 5094f32b706e..463174a4a647 100644 --- a/arch/powerpc/mm/hash_low_64.S +++ b/arch/powerpc/mm/hash_low_64.S @@ -46,7 +46,8 @@ /* * _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize) + * pte_t *ptep, unsigned long trap, unsigned long flags, + * int ssize) * * Adds a 4K page to the hash table in a segment of 4K pages only */ @@ -298,7 +299,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* Patched by htab_finish_init() */ @@ -338,8 +339,8 @@ htab_pte_insert_failure: *****************************************************************************/ /* _hash_page_4K(unsigned long ea, unsigned long access, unsigned long vsid, - * pte_t *ptep, unsigned long trap, int local, int ssize, - * int subpg_prot) + * pte_t *ptep, unsigned long trap, unsigned local flags, + * int ssize, int subpg_prot) */ /* @@ -594,7 +595,7 @@ htab_inval_old_hpte: li r5,0 /* PTE.hidx */ li r6,MMU_PAGE_64K /* psize */ ld r7,STK_PARAM(R9)(r1) /* ssize */ - ld r8,STK_PARAM(R8)(r1) /* local */ + ld r8,STK_PARAM(R8)(r1) /* flags */ bl flush_hash_page /* Clear out _PAGE_HPTE_SUB bits in the new linux PTE */ lis r0,_PAGE_HPTE_SUB@h @@ -666,7 +667,7 @@ htab_modify_pte: li r6,MMU_PAGE_4K /* base page size */ li r7,MMU_PAGE_4K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl htab_call_hpte_updatepp htab_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ @@ -962,7 +963,7 @@ ht64_modify_pte: li r6,MMU_PAGE_64K /* base page size */ li r7,MMU_PAGE_64K /* actual page size */ ld r8,STK_PARAM(R9)(r1) /* segment size */ - ld r9,STK_PARAM(R8)(r1) /* get "local" param */ + ld r9,STK_PARAM(R8)(r1) /* get "flags" param */ .globl ht64_call_hpte_updatepp ht64_call_hpte_updatepp: bl . /* patched by htab_finish_init() */ diff --git a/arch/powerpc/mm/hash_native_64.c b/arch/powerpc/mm/hash_native_64.c index 13700911b522..9c4880ddecd6 100644 --- a/arch/powerpc/mm/hash_native_64.c +++ b/arch/powerpc/mm/hash_native_64.c @@ -283,11 +283,11 @@ static long native_hpte_remove(unsigned long hpte_group) static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int bpsize, - int apsize, int ssize, int local) + int apsize, int ssize, unsigned long flags) { struct hash_pte *hptep = htab_address + slot; unsigned long hpte_v, want_v; - int ret = 0; + int ret = 0, local = 0; want_v = hpte_encode_avpn(vpn, bpsize, ssize); @@ -322,8 +322,15 @@ static long native_hpte_updatepp(unsigned long slot, unsigned long newpp, } native_unlock_hpte(hptep); } - /* Ensure it is out of the tlb too. */ - tlbie(vpn, bpsize, apsize, ssize, local); + + if (flags & HPTE_LOCAL_UPDATE) + local = 1; + /* + * Ensure it is out of the tlb too if it is not a nohpte fault + */ + if (!(flags & HPTE_NOHPTE_UPDATE)) + tlbie(vpn, bpsize, apsize, ssize, local); + return ret; } diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 68211d398fdb..e56a307bc676 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -989,7 +989,9 @@ static void check_paca_psize(unsigned long ea, struct mm_struct *mm, * -1 - critical hash insertion error * -2 - access not permitted by subpage protection mechanism */ -int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, unsigned long trap) +int hash_page_mm(struct mm_struct *mm, unsigned long ea, + unsigned long access, unsigned long trap, + unsigned long flags) { enum ctx_state prev_state = exception_enter(); pgd_t *pgdir; @@ -997,7 +999,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u pte_t *ptep; unsigned hugeshift; const struct cpumask *tmp; - int rc, user_region = 0, local = 0; + int rc, user_region = 0; int psize, ssize; DBG_LOW("hash_page(ea=%016lx, access=%lx, trap=%lx\n", @@ -1049,7 +1051,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u /* Check CPU locality */ tmp = cpumask_of(smp_processor_id()); if (user_region && cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; #ifndef CONFIG_PPC_64K_PAGES /* If we use 4K pages and our psize is not 4K, then we might @@ -1086,11 +1088,11 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u if (hugeshift) { if (pmd_trans_huge(*(pmd_t *)ptep)) rc = __hash_page_thp(ea, access, vsid, (pmd_t *)ptep, - trap, local, ssize, psize); + trap, flags, ssize, psize); #ifdef CONFIG_HUGETLB_PAGE else rc = __hash_page_huge(ea, access, vsid, ptep, trap, - local, ssize, hugeshift, psize); + flags, ssize, hugeshift, psize); #else else { /* @@ -1149,7 +1151,8 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u #ifdef CONFIG_PPC_HAS_HASH_64K if (psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ { @@ -1158,7 +1161,7 @@ int hash_page_mm(struct mm_struct *mm, unsigned long ea, unsigned long access, u rc = -2; else rc = __hash_page_4K(ea, access, vsid, ptep, trap, - local, ssize, spp); + flags, ssize, spp); } /* Dump some info in case of hash insertion failure, they should @@ -1181,14 +1184,19 @@ bail: } EXPORT_SYMBOL_GPL(hash_page_mm); -int hash_page(unsigned long ea, unsigned long access, unsigned long trap) +int hash_page(unsigned long ea, unsigned long access, unsigned long trap, + unsigned long dsisr) { + unsigned long flags = 0; struct mm_struct *mm = current->mm; if (REGION_ID(ea) == VMALLOC_REGION_ID) mm = &init_mm; - return hash_page_mm(mm, ea, access, trap); + if (dsisr & DSISR_NOHPTE) + flags |= HPTE_NOHPTE_UPDATE; + + return hash_page_mm(mm, ea, access, trap, flags); } EXPORT_SYMBOL_GPL(hash_page); @@ -1200,7 +1208,7 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, pgd_t *pgdir; pte_t *ptep; unsigned long flags; - int rc, ssize, local = 0; + int rc, ssize, update_flags = 0; BUG_ON(REGION_ID(ea) != USER_REGION_ID); @@ -1251,16 +1259,17 @@ void hash_preload(struct mm_struct *mm, unsigned long ea, /* Is that local to this CPU ? */ if (cpumask_equal(mm_cpumask(mm), cpumask_of(smp_processor_id()))) - local = 1; + update_flags |= HPTE_LOCAL_UPDATE; /* Hash it in */ #ifdef CONFIG_PPC_HAS_HASH_64K if (mm->context.user_psize == MMU_PAGE_64K) - rc = __hash_page_64K(ea, access, vsid, ptep, trap, local, ssize); + rc = __hash_page_64K(ea, access, vsid, ptep, trap, + update_flags, ssize); else #endif /* CONFIG_PPC_HAS_HASH_64K */ - rc = __hash_page_4K(ea, access, vsid, ptep, trap, local, ssize, - subpage_protection(mm, ea)); + rc = __hash_page_4K(ea, access, vsid, ptep, trap, update_flags, + ssize, subpage_protection(mm, ea)); /* Dump some info in case of hash insertion failure, they should * never happen so it is really useful to know if/when they do @@ -1278,9 +1287,10 @@ out_exit: * do not forget to update the assembly call site ! */ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, - int local) + unsigned long flags) { unsigned long hash, index, shift, hidx, slot; + int local = flags & HPTE_LOCAL_UPDATE; DBG_LOW("flush_hash_page(vpn=%016lx)\n", vpn); pte_iterate_hashed_subpages(pte, psize, vpn, index, shift) { @@ -1317,12 +1327,14 @@ void flush_hash_page(unsigned long vpn, real_pte_t pte, int psize, int ssize, #ifdef CONFIG_TRANSPARENT_HUGEPAGE void flush_hash_hugepage(unsigned long vsid, unsigned long addr, - pmd_t *pmdp, unsigned int psize, int ssize, int local) + pmd_t *pmdp, unsigned int psize, int ssize, + unsigned long flags) { int i, max_hpte_count, valid; unsigned long s_addr; unsigned char *hpte_slot_array; unsigned long hidx, shift, vpn, hash, slot; + int local = flags & HPTE_LOCAL_UPDATE; s_addr = addr & HPAGE_PMD_MASK; hpte_slot_array = get_hpte_slot_array(pmdp); diff --git a/arch/powerpc/mm/hugepage-hash64.c b/arch/powerpc/mm/hugepage-hash64.c index 3a648cd363ae..86686514ae13 100644 --- a/arch/powerpc/mm/hugepage-hash64.c +++ b/arch/powerpc/mm/hugepage-hash64.c @@ -19,8 +19,8 @@ #include int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, - pmd_t *pmdp, unsigned long trap, int local, int ssize, - unsigned int psize) + pmd_t *pmdp, unsigned long trap, unsigned long flags, + int ssize, unsigned int psize) { unsigned int index, valid; unsigned char *hpte_slot_array; @@ -95,7 +95,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, */ if ((old_pmd & _PAGE_HASHPTE) && !(old_pmd & _PAGE_COMBO)) flush_hash_hugepage(vsid, ea, pmdp, MMU_PAGE_64K, - ssize, local); + ssize, flags); } valid = hpte_valid(hpte_slot_array, index); @@ -108,7 +108,7 @@ int __hash_page_thp(unsigned long ea, unsigned long access, unsigned long vsid, slot += hidx & _PTEIDX_GROUP_IX; ret = ppc_md.hpte_updatepp(slot, rflags, vpn, - psize, lpsize, ssize, local); + psize, lpsize, ssize, flags); /* * We failed to update, try to insert a new entry. */ diff --git a/arch/powerpc/mm/hugetlbpage-hash64.c b/arch/powerpc/mm/hugetlbpage-hash64.c index a5bcf9301196..d94b1af53a93 100644 --- a/arch/powerpc/mm/hugetlbpage-hash64.c +++ b/arch/powerpc/mm/hugetlbpage-hash64.c @@ -19,8 +19,8 @@ extern long hpte_insert_repeating(unsigned long hash, unsigned long vpn, unsigned long vflags, int psize, int ssize); int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, - pte_t *ptep, unsigned long trap, int local, int ssize, - unsigned int shift, unsigned int mmu_psize) + pte_t *ptep, unsigned long trap, unsigned long flags, + int ssize, unsigned int shift, unsigned int mmu_psize) { unsigned long vpn; unsigned long old_pte, new_pte; @@ -81,7 +81,7 @@ int __hash_page_huge(unsigned long ea, unsigned long access, unsigned long vsid, slot += (old_pte & _PAGE_F_GIX) >> 12; if (ppc_md.hpte_updatepp(slot, rflags, vpn, mmu_psize, - mmu_psize, ssize, local) == -1) + mmu_psize, ssize, flags) == -1) old_pte &= ~_PAGE_HPTEFLAGS; } diff --git a/arch/powerpc/mm/pgtable_64.c b/arch/powerpc/mm/pgtable_64.c index eea9fa1f8ae7..4fe5f64cc179 100644 --- a/arch/powerpc/mm/pgtable_64.c +++ b/arch/powerpc/mm/pgtable_64.c @@ -739,9 +739,10 @@ void pmdp_invalidate(struct vm_area_struct *vma, unsigned long address, void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, unsigned long old_pmd) { - int ssize, local = 0; + int ssize; unsigned int psize; unsigned long vsid; + unsigned long flags = 0; const struct cpumask *tmp; /* get the base page size,vsid and segment size */ @@ -765,9 +766,9 @@ void hpte_do_hugepage_flush(struct mm_struct *mm, unsigned long addr, tmp = cpumask_of(smp_processor_id()); if (cpumask_equal(mm_cpumask(mm), tmp)) - local = 1; + flags |= HPTE_LOCAL_UPDATE; - return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, local); + return flush_hash_hugepage(vsid, addr, pmdp, psize, ssize, flags); } static pmd_t pmd_set_protbits(pmd_t pmd, pgprot_t pgprot) diff --git a/arch/powerpc/platforms/cell/beat_htab.c b/arch/powerpc/platforms/cell/beat_htab.c index d4d245c0d787..bee9232fe619 100644 --- a/arch/powerpc/platforms/cell/beat_htab.c +++ b/arch/powerpc/platforms/cell/beat_htab.c @@ -186,7 +186,7 @@ static long beat_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long flags) { unsigned long lpar_rc; u64 dummy0, dummy1; @@ -369,7 +369,7 @@ static long beat_lpar_hpte_updatepp_v3(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long flags) { unsigned long lpar_rc; unsigned long want_v; diff --git a/arch/powerpc/platforms/cell/spu_base.c b/arch/powerpc/platforms/cell/spu_base.c index ffcbd242e669..f7af74f83693 100644 --- a/arch/powerpc/platforms/cell/spu_base.c +++ b/arch/powerpc/platforms/cell/spu_base.c @@ -181,7 +181,8 @@ static int __spu_trap_data_seg(struct spu *spu, unsigned long ea) return 0; } -extern int hash_page(unsigned long ea, unsigned long access, unsigned long trap); //XXX +extern int hash_page(unsigned long ea, unsigned long access, + unsigned long trap, unsigned long dsisr); //XXX static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) { int ret; @@ -196,7 +197,7 @@ static int __spu_trap_data_map(struct spu *spu, unsigned long ea, u64 dsisr) (REGION_ID(ea) != USER_REGION_ID)) { spin_unlock(&spu->register_lock); - ret = hash_page(ea, _PAGE_PRESENT, 0x300); + ret = hash_page(ea, _PAGE_PRESENT, 0x300, dsisr); spin_lock(&spu->register_lock); if (!ret) { diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index e45894a08118..d98f845ac777 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -144,7 +144,7 @@ int spufs_handle_class1(struct spu_context *ctx) access = (_PAGE_PRESENT | _PAGE_USER); access |= (dsisr & MFC_DSISR_ACCESS_PUT) ? _PAGE_RW : 0UL; local_irq_save(flags); - ret = hash_page(ea, access, 0x300); + ret = hash_page(ea, access, 0x300, dsisr); local_irq_restore(flags); /* hashing failed, so try the actual fault handler */ diff --git a/arch/powerpc/platforms/ps3/htab.c b/arch/powerpc/platforms/ps3/htab.c index 3e270e3412ae..2f95d33cf34a 100644 --- a/arch/powerpc/platforms/ps3/htab.c +++ b/arch/powerpc/platforms/ps3/htab.c @@ -110,7 +110,7 @@ static long ps3_hpte_remove(unsigned long hpte_group) static long ps3_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long inv_flags) { int result; u64 hpte_v, want_v, hpte_rs; diff --git a/arch/powerpc/platforms/pseries/lpar.c b/arch/powerpc/platforms/pseries/lpar.c index 832f221840f2..469751d92004 100644 --- a/arch/powerpc/platforms/pseries/lpar.c +++ b/arch/powerpc/platforms/pseries/lpar.c @@ -284,7 +284,7 @@ static long pSeries_lpar_hpte_updatepp(unsigned long slot, unsigned long newpp, unsigned long vpn, int psize, int apsize, - int ssize, int local) + int ssize, unsigned long inv_flags) { unsigned long lpar_rc; unsigned long flags = (newpp & 7) | H_AVPN; diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index c99e896604ee..f8684bca2d79 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -133,7 +133,7 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, { unsigned flt = 0; int result; - unsigned long access, flags; + unsigned long access, flags, inv_flags = 0; if ((result = copro_handle_mm_fault(mm, dar, dsisr, &flt))) { pr_devel("copro_handle_mm_fault failed: %#x\n", result); @@ -149,8 +149,12 @@ static void cxl_handle_page_fault(struct cxl_context *ctx, access |= _PAGE_RW; if ((!ctx->kernel) || ~(dar & (1ULL << 63))) access |= _PAGE_USER; + + if (dsisr & DSISR_NOHPTE) + inv_flags |= HPTE_NOHPTE_UPDATE; + local_irq_save(flags); - hash_page_mm(mm, dar, access, 0x300); + hash_page_mm(mm, dar, access, 0x300, inv_flags); local_irq_restore(flags); pr_devel("Page fault successfully handled for pe: %i!\n", ctx->pe); -- cgit v1.2.3