From 9ae433bc79f97bae221d53bb1a8e21415ea58625 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 9 Dec 2016 14:33:51 +0000 Subject: crypto: chacha20 - convert generic and x86 versions to skcipher This converts the ChaCha20 code from a blkcipher to a skcipher, which is now the preferred way to implement symmetric block and stream ciphers. This ports the generic and x86 versions at the same time because the latter reuses routines of the former. Note that the skcipher_walk() API guarantees that all presented blocks except the final one are a multiple of the chunk size, so we can simplify the encrypt() routine somewhat. Signed-off-by: Ard Biesheuvel Signed-off-by: Herbert Xu --- arch/x86/crypto/chacha20_glue.c | 69 ++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 38 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index f910d1d449f0..78f75b07dc25 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -11,7 +11,7 @@ #include #include -#include +#include #include #include #include @@ -63,36 +63,34 @@ static void chacha20_dosimd(u32 *state, u8 *dst, const u8 *src, } } -static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, - struct scatterlist *src, unsigned int nbytes) +static int chacha20_simd(struct skcipher_request *req) { - u32 *state, state_buf[16 + (CHACHA20_STATE_ALIGN / sizeof(u32)) - 1]; - struct blkcipher_walk walk; + struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); + struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); + u32 state[16] __aligned(CHACHA20_STATE_ALIGN); + struct skcipher_walk walk; int err; - if (nbytes <= CHACHA20_BLOCK_SIZE || !may_use_simd()) - return crypto_chacha20_crypt(desc, dst, src, nbytes); + if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) + return crypto_chacha20_crypt(req); - state = (u32 *)roundup((uintptr_t)state_buf, CHACHA20_STATE_ALIGN); + err = skcipher_walk_virt(&walk, req, true); - blkcipher_walk_init(&walk, dst, src, nbytes); - err = blkcipher_walk_virt_block(desc, &walk, CHACHA20_BLOCK_SIZE); - - crypto_chacha20_init(state, crypto_blkcipher_ctx(desc->tfm), walk.iv); + crypto_chacha20_init(state, ctx, walk.iv); kernel_fpu_begin(); while (walk.nbytes >= CHACHA20_BLOCK_SIZE) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, rounddown(walk.nbytes, CHACHA20_BLOCK_SIZE)); - err = blkcipher_walk_done(desc, &walk, - walk.nbytes % CHACHA20_BLOCK_SIZE); + err = skcipher_walk_done(&walk, + walk.nbytes % CHACHA20_BLOCK_SIZE); } if (walk.nbytes) { chacha20_dosimd(state, walk.dst.virt.addr, walk.src.virt.addr, walk.nbytes); - err = blkcipher_walk_done(desc, &walk, 0); + err = skcipher_walk_done(&walk, 0); } kernel_fpu_end(); @@ -100,27 +98,22 @@ static int chacha20_simd(struct blkcipher_desc *desc, struct scatterlist *dst, return err; } -static struct crypto_alg alg = { - .cra_name = "chacha20", - .cra_driver_name = "chacha20-simd", - .cra_priority = 300, - .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, - .cra_blocksize = 1, - .cra_type = &crypto_blkcipher_type, - .cra_ctxsize = sizeof(struct chacha20_ctx), - .cra_alignmask = sizeof(u32) - 1, - .cra_module = THIS_MODULE, - .cra_u = { - .blkcipher = { - .min_keysize = CHACHA20_KEY_SIZE, - .max_keysize = CHACHA20_KEY_SIZE, - .ivsize = CHACHA20_IV_SIZE, - .geniv = "seqiv", - .setkey = crypto_chacha20_setkey, - .encrypt = chacha20_simd, - .decrypt = chacha20_simd, - }, - }, +static struct skcipher_alg alg = { + .base.cra_name = "chacha20", + .base.cra_driver_name = "chacha20-simd", + .base.cra_priority = 300, + .base.cra_blocksize = 1, + .base.cra_ctxsize = sizeof(struct chacha20_ctx), + .base.cra_alignmask = sizeof(u32) - 1, + .base.cra_module = THIS_MODULE, + + .min_keysize = CHACHA20_KEY_SIZE, + .max_keysize = CHACHA20_KEY_SIZE, + .ivsize = CHACHA20_IV_SIZE, + .chunksize = CHACHA20_BLOCK_SIZE, + .setkey = crypto_chacha20_setkey, + .encrypt = chacha20_simd, + .decrypt = chacha20_simd, }; static int __init chacha20_simd_mod_init(void) @@ -133,12 +126,12 @@ static int __init chacha20_simd_mod_init(void) boot_cpu_has(X86_FEATURE_AVX2) && cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM, NULL); #endif - return crypto_register_alg(&alg); + return crypto_register_skcipher(&alg); } static void __exit chacha20_simd_mod_fini(void) { - crypto_unregister_alg(&alg); + crypto_unregister_skcipher(&alg); } module_init(chacha20_simd_mod_init); -- cgit v1.2.3 From 50fb57042402c819d247ac4231b80b0da86e2fd7 Mon Sep 17 00:00:00 2001 From: Ilya Lesokhin Date: Tue, 13 Dec 2016 16:32:06 +0200 Subject: crypto: aesni-intel - RFC4106 can zero copy when !PageHighMem In the common case of !PageHighMem we can do zero copy crypto even if sg crosses a pages boundary. Signed-off-by: Ilya Lesokhin Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 31c34ee131f3..36ca1502630c 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -740,9 +740,11 @@ static int helper_rfc4106_encrypt(struct aead_request *req) *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && - req->src->offset + req->src->length <= PAGE_SIZE && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length <= PAGE_SIZE) && sg_is_last(req->dst) && - req->dst->offset + req->dst->length <= PAGE_SIZE) { + (!PageHighMem(sg_page(req->dst)) || + req->dst->offset + req->dst->length <= PAGE_SIZE)) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); assoc = scatterwalk_map(&src_sg_walk); @@ -822,9 +824,11 @@ static int helper_rfc4106_decrypt(struct aead_request *req) *((__be32 *)(iv+12)) = counter; if (sg_is_last(req->src) && - req->src->offset + req->src->length <= PAGE_SIZE && + (!PageHighMem(sg_page(req->src)) || + req->src->offset + req->src->length <= PAGE_SIZE) && sg_is_last(req->dst) && - req->dst->offset + req->dst->length <= PAGE_SIZE) { + (!PageHighMem(sg_page(req->dst)) || + req->dst->offset + req->dst->length <= PAGE_SIZE)) { one_entry_in_sg = 1; scatterwalk_start(&src_sg_walk, req->src); assoc = scatterwalk_map(&src_sg_walk); -- cgit v1.2.3 From b4ed1d15b453c86b4b9362128bd7a0ecd95a105c Mon Sep 17 00:00:00 2001 From: Wei Yang Date: Sun, 25 Dec 2016 14:35:51 +0000 Subject: x86/e820: Make e820_search_gap() static and remove unused variables e820_search_gap() is just used locally now and the 'start_addr' and 'end_addr' parameters are fixed values. Also, 'gapstart' is not checked in this function anymore. So make the function static and remove those unused variables. Signed-off-by: Wei Yang Acked-by: Yinghai Lu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akataria@vmware.com Link: http://lkml.kernel.org/r/1482676551-11411-1-git-send-email-richard.weiyang@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/e820.h | 2 -- arch/x86/kernel/e820.c | 16 +++++----------- 2 files changed, 5 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/e820.h b/arch/x86/include/asm/e820.h index ec23d8e1297c..67313f3a9874 100644 --- a/arch/x86/include/asm/e820.h +++ b/arch/x86/include/asm/e820.h @@ -30,8 +30,6 @@ extern u64 e820_remove_range(u64 start, u64 size, unsigned old_type, int checktype); extern void update_e820(void); extern void e820_setup_gap(void); -extern int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr); struct setup_data; extern void parse_e820_ext(u64 phys_addr, u32 data_len); diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 90e8dde3ec26..46f2afd3577a 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -580,24 +580,19 @@ static void __init update_e820_saved(void) } #define MAX_GAP_END 0x100000000ull /* - * Search for a gap in the e820 memory space from start_addr to end_addr. + * Search for a gap in the e820 memory space from 0 to MAX_GAP_END. */ -__init int e820_search_gap(unsigned long *gapstart, unsigned long *gapsize, - unsigned long start_addr, unsigned long long end_addr) +static int __init e820_search_gap(unsigned long *gapstart, + unsigned long *gapsize) { - unsigned long long last; + unsigned long long last = MAX_GAP_END; int i = e820->nr_map; int found = 0; - last = (end_addr && end_addr < MAX_GAP_END) ? end_addr : MAX_GAP_END; - while (--i >= 0) { unsigned long long start = e820->map[i].addr; unsigned long long end = start + e820->map[i].size; - if (end < start_addr) - continue; - /* * Since "last" is at most 4GB, we know we'll * fit in 32 bits if this condition is true @@ -628,9 +623,8 @@ __init void e820_setup_gap(void) unsigned long gapstart, gapsize; int found; - gapstart = 0x10000000; gapsize = 0x400000; - found = e820_search_gap(&gapstart, &gapsize, 0, MAX_GAP_END); + found = e820_search_gap(&gapstart, &gapsize); #ifdef CONFIG_X86_64 if (!found) { -- cgit v1.2.3 From c4158ff536439619fa342810cc575ae2c809f03f Mon Sep 17 00:00:00 2001 From: Daniel Bristot de Oliveira Date: Wed, 4 Jan 2017 12:20:33 +0100 Subject: x86/irq, trace: Add __irq_entry annotation to x86's platform IRQ handlers This patch adds the __irq_entry annotation to the default x86 platform IRQ handlers. ftrace's function_graph tracer uses the __irq_entry annotation to notify the entry and return of IRQ handlers. For example, before the patch: 354549.667252 | 3) d..1 | default_idle_call() { 354549.667252 | 3) d..1 | arch_cpu_idle() { 354549.667253 | 3) d..1 | default_idle() { 354549.696886 | 3) d..1 | smp_trace_reschedule_interrupt() { 354549.696886 | 3) d..1 | irq_enter() { 354549.696886 | 3) d..1 | rcu_irq_enter() { After the patch: 366416.254476 | 3) d..1 | arch_cpu_idle() { 366416.254476 | 3) d..1 | default_idle() { 366416.261566 | 3) d..1 ==========> | 366416.261566 | 3) d..1 | smp_trace_reschedule_interrupt() { 366416.261566 | 3) d..1 | irq_enter() { 366416.261566 | 3) d..1 | rcu_irq_enter() { KASAN also uses this annotation. The smp_apic_timer_interrupt() was already annotated. Signed-off-by: Daniel Bristot de Oliveira Acked-by: Steven Rostedt (VMware) Cc: Aaron Lu Cc: Andrew Morton Cc: Baoquan He Cc: Borislav Petkov Cc: Claudio Fontana Cc: Denys Vlasenko Cc: Dou Liyang Cc: Gu Zheng Cc: Hidehiro Kawai Cc: Linus Torvalds Cc: Nicolai Stange Cc: Peter Zijlstra (Intel) Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Cc: linux-edac@vger.kernel.org Link: http://lkml.kernel.org/r/059fdf437c2f0c09b13c18c8fe4e69999d3ffe69.1483528431.git.bristot@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 8 ++++---- arch/x86/kernel/apic/vector.c | 2 +- arch/x86/kernel/cpu/mcheck/mce_amd.c | 4 ++-- arch/x86/kernel/cpu/mcheck/therm_throt.c | 6 ++++-- arch/x86/kernel/cpu/mcheck/threshold.c | 4 ++-- arch/x86/kernel/irq.c | 4 ++-- arch/x86/kernel/irq_work.c | 5 +++-- arch/x86/kernel/smp.c | 15 +++++++++------ 8 files changed, 27 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5b7e43eff139..30b122987906 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1864,14 +1864,14 @@ static void __smp_spurious_interrupt(u8 vector) "should never happen.\n", vector, smp_processor_id()); } -__visible void smp_spurious_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_spurious_interrupt(struct pt_regs *regs) { entering_irq(); __smp_spurious_interrupt(~regs->orig_ax); exiting_irq(); } -__visible void smp_trace_spurious_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_spurious_interrupt(struct pt_regs *regs) { u8 vector = ~regs->orig_ax; @@ -1922,14 +1922,14 @@ static void __smp_error_interrupt(struct pt_regs *regs) } -__visible void smp_error_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_error_interrupt(struct pt_regs *regs) { entering_irq(); __smp_error_interrupt(regs); exiting_irq(); } -__visible void smp_trace_error_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_error_interrupt(struct pt_regs *regs) { entering_irq(); trace_error_apic_entry(ERROR_APIC_VECTOR); diff --git a/arch/x86/kernel/apic/vector.c b/arch/x86/kernel/apic/vector.c index 5d30c5e42bb1..f3557a1eb562 100644 --- a/arch/x86/kernel/apic/vector.c +++ b/arch/x86/kernel/apic/vector.c @@ -559,7 +559,7 @@ void send_cleanup_vector(struct irq_cfg *cfg) __send_cleanup_vector(data); } -asmlinkage __visible void smp_irq_move_cleanup_interrupt(void) +asmlinkage __visible void __irq_entry smp_irq_move_cleanup_interrupt(void) { unsigned vector, me; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index a5fd137417a2..9e655292cf10 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -814,14 +814,14 @@ static inline void __smp_deferred_error_interrupt(void) deferred_error_int_vector(); } -asmlinkage __visible void smp_deferred_error_interrupt(void) +asmlinkage __visible void __irq_entry smp_deferred_error_interrupt(void) { entering_irq(); __smp_deferred_error_interrupt(); exiting_ack_irq(); } -asmlinkage __visible void smp_trace_deferred_error_interrupt(void) +asmlinkage __visible void __irq_entry smp_trace_deferred_error_interrupt(void) { entering_irq(); trace_deferred_error_apic_entry(DEFERRED_ERROR_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 465aca8be009..772d9400930d 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -404,14 +404,16 @@ static inline void __smp_thermal_interrupt(void) smp_thermal_vector(); } -asmlinkage __visible void smp_thermal_interrupt(struct pt_regs *regs) +asmlinkage __visible void __irq_entry +smp_thermal_interrupt(struct pt_regs *regs) { entering_irq(); __smp_thermal_interrupt(); exiting_ack_irq(); } -asmlinkage __visible void smp_trace_thermal_interrupt(struct pt_regs *regs) +asmlinkage __visible void __irq_entry +smp_trace_thermal_interrupt(struct pt_regs *regs) { entering_irq(); trace_thermal_apic_entry(THERMAL_APIC_VECTOR); diff --git a/arch/x86/kernel/cpu/mcheck/threshold.c b/arch/x86/kernel/cpu/mcheck/threshold.c index 9beb092d68a5..bb0e75eed10a 100644 --- a/arch/x86/kernel/cpu/mcheck/threshold.c +++ b/arch/x86/kernel/cpu/mcheck/threshold.c @@ -23,14 +23,14 @@ static inline void __smp_threshold_interrupt(void) mce_threshold_vector(); } -asmlinkage __visible void smp_threshold_interrupt(void) +asmlinkage __visible void __irq_entry smp_threshold_interrupt(void) { entering_irq(); __smp_threshold_interrupt(); exiting_ack_irq(); } -asmlinkage __visible void smp_trace_threshold_interrupt(void) +asmlinkage __visible void __irq_entry smp_trace_threshold_interrupt(void) { entering_irq(); trace_threshold_apic_entry(THRESHOLD_APIC_VECTOR); diff --git a/arch/x86/kernel/irq.c b/arch/x86/kernel/irq.c index 7c6e9ffe4424..4d8183b5f113 100644 --- a/arch/x86/kernel/irq.c +++ b/arch/x86/kernel/irq.c @@ -264,7 +264,7 @@ void __smp_x86_platform_ipi(void) x86_platform_ipi_callback(); } -__visible void smp_x86_platform_ipi(struct pt_regs *regs) +__visible void __irq_entry smp_x86_platform_ipi(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); @@ -315,7 +315,7 @@ __visible void smp_kvm_posted_intr_wakeup_ipi(struct pt_regs *regs) } #endif -__visible void smp_trace_x86_platform_ipi(struct pt_regs *regs) +__visible void __irq_entry smp_trace_x86_platform_ipi(struct pt_regs *regs) { struct pt_regs *old_regs = set_irq_regs(regs); diff --git a/arch/x86/kernel/irq_work.c b/arch/x86/kernel/irq_work.c index 3512ba607361..275487872be2 100644 --- a/arch/x86/kernel/irq_work.c +++ b/arch/x86/kernel/irq_work.c @@ -9,6 +9,7 @@ #include #include #include +#include static inline void __smp_irq_work_interrupt(void) { @@ -16,14 +17,14 @@ static inline void __smp_irq_work_interrupt(void) irq_work_run(); } -__visible void smp_irq_work_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_irq_work_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); __smp_irq_work_interrupt(); exiting_irq(); } -__visible void smp_trace_irq_work_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_irq_work_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_irq_work_entry(IRQ_WORK_VECTOR); diff --git a/arch/x86/kernel/smp.c b/arch/x86/kernel/smp.c index 68f8cc222f25..d3c66a15bbde 100644 --- a/arch/x86/kernel/smp.c +++ b/arch/x86/kernel/smp.c @@ -259,7 +259,7 @@ static inline void __smp_reschedule_interrupt(void) scheduler_ipi(); } -__visible void smp_reschedule_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_reschedule_interrupt(struct pt_regs *regs) { ack_APIC_irq(); __smp_reschedule_interrupt(); @@ -268,7 +268,7 @@ __visible void smp_reschedule_interrupt(struct pt_regs *regs) */ } -__visible void smp_trace_reschedule_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_trace_reschedule_interrupt(struct pt_regs *regs) { /* * Need to call irq_enter() before calling the trace point. @@ -292,14 +292,15 @@ static inline void __smp_call_function_interrupt(void) inc_irq_stat(irq_call_count); } -__visible void smp_call_function_interrupt(struct pt_regs *regs) +__visible void __irq_entry smp_call_function_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); __smp_call_function_interrupt(); exiting_irq(); } -__visible void smp_trace_call_function_interrupt(struct pt_regs *regs) +__visible void __irq_entry +smp_trace_call_function_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_call_function_entry(CALL_FUNCTION_VECTOR); @@ -314,14 +315,16 @@ static inline void __smp_call_function_single_interrupt(void) inc_irq_stat(irq_call_count); } -__visible void smp_call_function_single_interrupt(struct pt_regs *regs) +__visible void __irq_entry +smp_call_function_single_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); __smp_call_function_single_interrupt(); exiting_irq(); } -__visible void smp_trace_call_function_single_interrupt(struct pt_regs *regs) +__visible void __irq_entry +smp_trace_call_function_single_interrupt(struct pt_regs *regs) { ipi_entering_ack_irq(); trace_call_function_single_entry(CALL_FUNCTION_SINGLE_VECTOR); -- cgit v1.2.3 From 4b5b61eaf8b70838750a1e6dc80ecd044c8f4b3f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 5 Jan 2017 15:02:34 +0200 Subject: x86/platform/intel-mid: Remove Moorestown code The Moorestown support code was removed by: a8359e411eb ("x86/mid: Remove Intel Moorestown"). Remove this leftover as well. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170105130235.177792-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/Makefile | 1 - .../platform/intel-mid/device_libs/platform_ipc.c | 9 ---- .../intel-mid/device_libs/platform_pmic_gpio.c | 54 ---------------------- 3 files changed, 64 deletions(-) delete mode 100644 arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 90e4f2a6625b..4d8c14a783d5 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -12,7 +12,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_MSIC)) += platform_msic_gpio.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_ocd.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic_battery.o obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_msic_power_btn.o -obj-$(subst m,y,$(CONFIG_GPIO_INTEL_PMIC)) += platform_pmic_gpio.o obj-$(subst m,y,$(CONFIG_INTEL_MFLD_THERMAL)) += platform_msic_thermal.o # SPI Devices obj-$(subst m,y,$(CONFIG_SPI_SPIDEV)) += platform_mrfld_spidev.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c index a84b73d6c4a0..a428c051e7bb 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c @@ -57,12 +57,3 @@ void __init ipc_device_handler(struct sfi_device_table_entry *pentry, pdev->dev.platform_data = pdata; intel_scu_device_register(pdev); } - -static const struct devs_id pmic_audio_dev_id __initconst = { - .name = "pmic_audio", - .type = SFI_DEV_TYPE_IPC, - .delay = 1, - .device_handler = &ipc_device_handler, -}; - -sfi_device(pmic_audio_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c deleted file mode 100644 index e30cb62e3300..000000000000 --- a/arch/x86/platform/intel-mid/device_libs/platform_pmic_gpio.c +++ /dev/null @@ -1,54 +0,0 @@ -/* - * platform_pmic_gpio.c: PMIC GPIO platform data initialization file - * - * (C) Copyright 2013 Intel Corporation - * Author: Sathyanarayanan Kuppuswamy - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include -#include -#include -#include -#include -#include -#include -#include - -#include "platform_ipc.h" - -static void __init *pmic_gpio_platform_data(void *info) -{ - static struct intel_pmic_gpio_platform_data pmic_gpio_pdata; - int gpio_base = get_gpio_by_name("pmic_gpio_base"); - - if (gpio_base < 0) - gpio_base = 64; - pmic_gpio_pdata.gpio_base = gpio_base; - pmic_gpio_pdata.irq_base = gpio_base + INTEL_MID_IRQ_OFFSET; - pmic_gpio_pdata.gpiointr = 0xffffeff8; - - return &pmic_gpio_pdata; -} - -static const struct devs_id pmic_gpio_spi_dev_id __initconst = { - .name = "pmic_gpio", - .type = SFI_DEV_TYPE_SPI, - .delay = 1, - .get_platform_data = &pmic_gpio_platform_data, -}; - -static const struct devs_id pmic_gpio_ipc_dev_id __initconst = { - .name = "pmic_gpio", - .type = SFI_DEV_TYPE_IPC, - .delay = 1, - .get_platform_data = &pmic_gpio_platform_data, - .device_handler = &ipc_device_handler -}; - -sfi_device(pmic_gpio_spi_dev_id); -sfi_device(pmic_gpio_ipc_dev_id); -- cgit v1.2.3 From a01b3391b542aaaed539f9d9d6d0d4d6502ab9c6 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 5 Jan 2017 15:02:35 +0200 Subject: x86/platform/intel-mid: Get rid of duplication of IPC handler There is no other device handler than ipc_device_handler() and sfi.c already has a handler for IPC devices. Replace a pointer to custom handler by a flag. Due to this change adjust sfi_handle_ipc_dev() to handle it instead of ipc_device_handler(). Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170105130235.177792-2-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/intel-mid.h | 4 +- arch/x86/platform/intel-mid/device_libs/Makefile | 1 - .../platform/intel-mid/device_libs/platform_ipc.c | 59 ---------------------- .../platform/intel-mid/device_libs/platform_ipc.h | 18 ------- .../intel-mid/device_libs/platform_msic_audio.c | 3 +- .../intel-mid/device_libs/platform_msic_battery.c | 3 +- .../intel-mid/device_libs/platform_msic_gpio.c | 3 +- .../intel-mid/device_libs/platform_msic_ocd.c | 3 +- .../device_libs/platform_msic_power_btn.c | 3 +- .../intel-mid/device_libs/platform_msic_thermal.c | 3 +- arch/x86/platform/intel-mid/sfi.c | 55 ++++++++++++-------- 11 files changed, 40 insertions(+), 115 deletions(-) delete mode 100644 arch/x86/platform/intel-mid/device_libs/platform_ipc.c delete mode 100644 arch/x86/platform/intel-mid/device_libs/platform_ipc.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 49da9f497b90..91ead0cefa76 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -42,10 +42,8 @@ struct devs_id { char name[SFI_NAME_LEN + 1]; u8 type; u8 delay; + u8 msic; void *(*get_platform_data)(void *info); - /* Custom handler for devices */ - void (*device_handler)(struct sfi_device_table_entry *pentry, - struct devs_id *dev); }; #define sfi_device(i) \ diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index 4d8c14a783d5..d4af7785844e 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -5,7 +5,6 @@ obj-$(subst m,y,$(CONFIG_MMC_SDHCI_PCI)) += platform_mrfld_sd.o # WiFi obj-$(subst m,y,$(CONFIG_BRCMFMAC_SDIO)) += platform_bcm43xx.o # IPC Devices -obj-y += platform_ipc.o obj-$(subst m,y,$(CONFIG_MFD_INTEL_MSIC)) += platform_msic.o obj-$(subst m,y,$(CONFIG_SND_MFLD_MACHINE)) += platform_msic_audio.o obj-$(subst m,y,$(CONFIG_GPIO_MSIC)) += platform_msic_gpio.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c b/arch/x86/platform/intel-mid/device_libs/platform_ipc.c deleted file mode 100644 index a428c051e7bb..000000000000 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.c +++ /dev/null @@ -1,59 +0,0 @@ -/* - * platform_ipc.c: IPC platform library file - * - * (C) Copyright 2013 Intel Corporation - * Author: Sathyanarayanan Kuppuswamy - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include -#include -#include -#include -#include -#include -#include "platform_ipc.h" - -void __init ipc_device_handler(struct sfi_device_table_entry *pentry, - struct devs_id *dev) -{ - struct platform_device *pdev; - void *pdata = NULL; - static struct resource res __initdata = { - .name = "IRQ", - .flags = IORESOURCE_IRQ, - }; - - pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", - pentry->name, pentry->irq); - - /* - * We need to call platform init of IPC devices to fill misc_pdata - * structure. It will be used in msic_init for initialization. - */ - if (dev != NULL) - pdata = dev->get_platform_data(pentry); - - /* - * On Medfield the platform device creation is handled by the MSIC - * MFD driver so we don't need to do it here. - */ - if (intel_mid_has_msic()) - return; - - pdev = platform_device_alloc(pentry->name, 0); - if (pdev == NULL) { - pr_err("out of memory for SFI platform device '%s'.\n", - pentry->name); - return; - } - res.start = pentry->irq; - platform_device_add_resources(pdev, &res, 1); - - pdev->dev.platform_data = pdata; - intel_scu_device_register(pdev); -} diff --git a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h b/arch/x86/platform/intel-mid/device_libs/platform_ipc.h deleted file mode 100644 index 79bb09d4f718..000000000000 --- a/arch/x86/platform/intel-mid/device_libs/platform_ipc.h +++ /dev/null @@ -1,18 +0,0 @@ -/* - * platform_ipc.h: IPC platform library header file - * - * (C) Copyright 2013 Intel Corporation - * Author: Sathyanarayanan Kuppuswamy - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#ifndef _PLATFORM_IPC_H_ -#define _PLATFORM_IPC_H_ - -void __init -ipc_device_handler(struct sfi_device_table_entry *pentry, struct devs_id *dev); - -#endif diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c index cb3490ecb341..d4dc744dd5a5 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_audio.c @@ -20,7 +20,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void *msic_audio_platform_data(void *info) { @@ -40,8 +39,8 @@ static const struct devs_id msic_audio_dev_id __initconst = { .name = "msic_audio", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_audio_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_audio_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c index 4f72193939a6..5c3e9919633f 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_battery.c @@ -19,7 +19,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_battery_platform_data(void *info) { @@ -30,8 +29,8 @@ static const struct devs_id msic_battery_dev_id __initconst = { .name = "msic_battery", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_battery_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_battery_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c index 70de5b531ba0..9fdb88d460d7 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_gpio.c @@ -20,7 +20,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_gpio_platform_data(void *info) { @@ -41,8 +40,8 @@ static const struct devs_id msic_gpio_dev_id __initconst = { .name = "msic_gpio", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_gpio_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_gpio_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c index 3d7c2011b6cf..7ae37cdbf256 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_ocd.c @@ -20,7 +20,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_ocd_platform_data(void *info) { @@ -42,8 +41,8 @@ static const struct devs_id msic_ocd_dev_id __initconst = { .name = "msic_ocd", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_ocd_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_ocd_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c index 038f618fbc52..96809b98cf69 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_power_btn.c @@ -18,7 +18,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_power_btn_platform_data(void *info) { @@ -29,8 +28,8 @@ static const struct devs_id msic_power_btn_dev_id __initconst = { .name = "msic_power_btn", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_power_btn_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_power_btn_dev_id); diff --git a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c index 114a5755b1e4..3e4167d246cd 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_msic_thermal.c @@ -19,7 +19,6 @@ #include #include "platform_msic.h" -#include "platform_ipc.h" static void __init *msic_thermal_platform_data(void *info) { @@ -30,8 +29,8 @@ static const struct devs_id msic_thermal_dev_id __initconst = { .name = "msic_thermal", .type = SFI_DEV_TYPE_IPC, .delay = 1, + .msic = 1, .get_platform_data = &msic_thermal_platform_data, - .device_handler = &ipc_device_handler, }; sfi_device(msic_thermal_dev_id); diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 051d264fce2e..e8f68f652087 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -335,10 +335,22 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, pr_debug("IPC bus, name = %16.16s, irq = 0x%2x\n", pentry->name, pentry->irq); + + /* + * We need to call platform init of IPC devices to fill misc_pdata + * structure. It will be used in msic_init for initialization. + */ pdata = intel_mid_sfi_get_pdata(dev, pentry); if (IS_ERR(pdata)) return; + /* + * On Medfield the platform device creation is handled by the MSIC + * MFD driver so we don't need to do it here. + */ + if (dev->msic && intel_mid_has_msic()) + return; + pdev = platform_device_alloc(pentry->name, 0); if (pdev == NULL) { pr_err("out of memory for SFI platform device '%s'.\n", @@ -348,7 +360,10 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, install_irq_resource(pdev, pentry->irq); pdev->dev.platform_data = pdata; - platform_device_add(pdev); + if (dev->delay) + intel_scu_device_register(pdev); + else + platform_device_add(pdev); } static void __init sfi_handle_spi_dev(struct sfi_device_table_entry *pentry, @@ -503,27 +518,23 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) if (!dev) continue; - if (dev->device_handler) { - dev->device_handler(pentry, dev); - } else { - switch (pentry->type) { - case SFI_DEV_TYPE_IPC: - sfi_handle_ipc_dev(pentry, dev); - break; - case SFI_DEV_TYPE_SPI: - sfi_handle_spi_dev(pentry, dev); - break; - case SFI_DEV_TYPE_I2C: - sfi_handle_i2c_dev(pentry, dev); - break; - case SFI_DEV_TYPE_SD: - sfi_handle_sd_dev(pentry, dev); - break; - case SFI_DEV_TYPE_UART: - case SFI_DEV_TYPE_HSI: - default: - break; - } + switch (pentry->type) { + case SFI_DEV_TYPE_IPC: + sfi_handle_ipc_dev(pentry, dev); + break; + case SFI_DEV_TYPE_SPI: + sfi_handle_spi_dev(pentry, dev); + break; + case SFI_DEV_TYPE_I2C: + sfi_handle_i2c_dev(pentry, dev); + break; + case SFI_DEV_TYPE_SD: + sfi_handle_sd_dev(pentry, dev); + break; + case SFI_DEV_TYPE_UART: + case SFI_DEV_TYPE_HSI: + default: + break; } } return 0; -- cgit v1.2.3 From ecc7ea5dd1409d4e6dfba2f0ff0ee1c6ccd855bd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 5 Jan 2017 18:17:17 +0200 Subject: x86/platform/intel-mid: Enable GPIO keys on Merrifield The Merrifield firmware provides 3 descriptions of buttons connected to GPIO. Append them to the list of supported GPIO keys. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170105161717.115261-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c index 52534ec29765..74283875c7e8 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_gpio_keys.c @@ -32,6 +32,9 @@ static struct gpio_keys_button gpio_button[] = { {SW_LID, -1, 1, "lid_switch", EV_SW, 0, 20}, {KEY_VOLUMEUP, -1, 1, "vol_up", EV_KEY, 0, 20}, {KEY_VOLUMEDOWN, -1, 1, "vol_down", EV_KEY, 0, 20}, + {KEY_MUTE, -1, 1, "mute_enable", EV_KEY, 0, 20}, + {KEY_VOLUMEUP, -1, 1, "volume_up", EV_KEY, 0, 20}, + {KEY_VOLUMEDOWN, -1, 1, "volume_down", EV_KEY, 0, 20}, {KEY_CAMERA, -1, 1, "camera_full", EV_KEY, 0, 20}, {KEY_CAMERA_FOCUS, -1, 1, "camera_half", EV_KEY, 0, 20}, {SW_KEYPAD_SLIDE, -1, 1, "MagSw1", EV_SW, 0, 20}, -- cgit v1.2.3 From 1e620f9b23e598ab936ece12233e98e97930b692 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Thu, 8 Dec 2016 11:44:31 -0500 Subject: x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C The new Xen PVH entry point requires page tables to be setup by the kernel since it is entered with paging disabled. Pull the common code out of head_32.S so that mk_early_pgtbl_32() can be invoked from both the new Xen entry point and the existing startup_32() code. Convert resulting common code to C. Signed-off-by: Boris Ostrovsky Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: matt@codeblueprint.co.uk Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1481215471-9639-1-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_32.h | 32 ++++++++++ arch/x86/kernel/head32.c | 62 +++++++++++++++++++ arch/x86/kernel/head_32.S | 121 +++----------------------------------- 3 files changed, 101 insertions(+), 114 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index b6c0b404898a..fbc73360aea0 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -27,6 +27,7 @@ struct vm_area_struct; extern pgd_t swapper_pg_dir[1024]; extern pgd_t initial_page_table[1024]; +extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } @@ -75,4 +76,35 @@ do { \ #define kern_addr_valid(kaddr) (0) #endif +/* + * This is how much memory in addition to the memory covered up to + * and including _end we need mapped initially. + * We need: + * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) + * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) + * + * Modulo rounding, each megabyte assigned here requires a kilobyte of + * memory, which is currently unreclaimed. + * + * This should be a multiple of a page. + * + * KERNEL_IMAGE_SIZE should be greater than pa(_end) + * and small than max_low_pfn, otherwise will waste some page table entries + */ +#if PTRS_PER_PMD > 1 +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) +#else +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) +#endif + +/* + * Number of possible pages in the lowmem region. + * + * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a + * gas warning about overflowing shift count when gas has been compiled + * with only a host target support using a 32-bit type for internal + * representation. + */ +#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) + #endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index f16c55bfc090..e5fb436a6548 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void) start_kernel(); } + +/* + * Initialize page tables. This creates a PDE and a set of page + * tables, which are located immediately beyond __brk_base. The variable + * _brk_end is set up to point to the first "safe" location. + * Mappings are created both at virtual address 0 (identity mapping) + * and PAGE_OFFSET for up to _end. + * + * In PAE mode initial_page_table is statically defined to contain + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD entries + * to the first kernel PMD. Note the upper half of each PMD or PTE are + * always zero at this stage. + */ +void __init mk_early_pgtbl_32(void) +{ +#ifdef __pa +#undef __pa +#endif +#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) + pte_t pte, *ptep; + int i; + unsigned long *ptr; + /* Enough space to fit pagetables for the low memory linear map */ + const unsigned long limit = __pa(_end) + + (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT); +#ifdef CONFIG_X86_PAE + pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd); +#define SET_PL2(pl2, val) { (pl2).pmd = (val); } +#else + pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table); +#define SET_PL2(pl2, val) { (pl2).pgd = (val); } +#endif + + ptep = (pte_t *)__pa(__brk_base); + pte.pte = PTE_IDENT_ATTR; + + while ((pte.pte & PTE_PFN_MASK) < limit) { + + SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR); + *pl2p = pl2; +#ifndef CONFIG_X86_PAE + /* Kernel PDE entry */ + *(pl2p + ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2; +#endif + for (i = 0; i < PTRS_PER_PTE; i++) { + *ptep = pte; + pte.pte += PAGE_SIZE; + ptep++; + } + + pl2p++; + } + + ptr = (unsigned long *)__pa(&max_pfn_mapped); + /* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */ + *ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; + + ptr = (unsigned long *)__pa(&_brk_end); + *ptr = (unsigned long)ptep + PAGE_OFFSET; +} + diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 4e8577d03372..1f85ee8f9439 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -24,6 +24,7 @@ #include #include #include +#include /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -41,43 +42,9 @@ #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id -/* - * This is how much memory in addition to the memory covered up to - * and including _end we need mapped initially. - * We need: - * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) - * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) - * - * Modulo rounding, each megabyte assigned here requires a kilobyte of - * memory, which is currently unreclaimed. - * - * This should be a multiple of a page. - * - * KERNEL_IMAGE_SIZE should be greater than pa(_end) - * and small than max_low_pfn, otherwise will waste some page table entries - */ - -#if PTRS_PER_PMD > 1 -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) -#else -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) -#endif #define SIZEOF_PTREGS 17*4 -/* - * Number of possible pages in the lowmem region. - * - * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a - * gas warning about overflowing shift count when gas has been compiled - * with only a host target support using a 32-bit type for internal - * representation. - */ -LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) - -/* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT - /* * Worst-case size of the kernel mapping we need to make: * a relocatable kernel can live anywhere in lowmem, so we need to be able @@ -160,90 +127,15 @@ ENTRY(startup_32) call load_ucode_bsp #endif -/* - * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond __brk_base. The variable - * _brk_end is set up to point to the first "safe" location. - * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end. - */ -#ifdef CONFIG_X86_PAE - - /* - * In PAE mode initial_page_table is statically defined to contain - * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 - * entries). The identity mapping is handled by pointing two PGD entries - * to the first kernel PMD. - * - * Note the upper half of each PMD or PTE are always zero at this stage. - */ - -#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ - - xorl %ebx,%ebx /* %ebx is kept at zero */ - - movl $pa(__brk_base), %edi - movl $pa(initial_pg_pmd), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ - movl %ecx,(%edx) /* Store PMD entry */ - /* Upper half already zero */ - addl $8,%edx - movl $512,%ecx -11: - stosl - xchgl %eax,%ebx - stosl - xchgl %eax,%ebx - addl $0x1000,%eax - loop 11b - - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b -1: - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) + /* Create early pagetables. */ + call mk_early_pgtbl_32 /* Do early initialization of the fixmap area */ movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax +#ifdef CONFIG_X86_PAE +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) -#else /* Not PAE */ - -page_pde_offset = (__PAGE_OFFSET >> 20); - - movl $pa(__brk_base), %edi - movl $pa(initial_page_table), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ - movl %ecx,(%edx) /* Store identity PDE entry */ - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ - addl $4,%edx - movl $1024, %ecx -11: - stosl - addl $0x1000,%eax - loop 11b - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax +#else movl %eax,pa(initial_page_table+0xffc) #endif @@ -666,6 +558,7 @@ ENTRY(setup_once_ref) __PAGE_ALIGNED_BSS .align PAGE_SIZE #ifdef CONFIG_X86_PAE +.globl initial_pg_pmd initial_pg_pmd: .fill 1024*KPMDS,4,0 #else -- cgit v1.2.3 From 12bf98b91f7aa8a9a526309aba645ccdcc470cab Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 5 Jan 2017 17:54:43 +0800 Subject: x86/apic: Fix typos in comments s/ID/IDs/ s/inr_logical_cpuidi/nr_logical_cpuids/ s/generic_processor_info()/__generic_processor_info()/ Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1483610083-24314-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5b7e43eff139..5c4fdcfda109 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2028,8 +2028,8 @@ void disconnect_bsp_APIC(int virt_wire_setup) /* * The number of allocated logical CPU IDs. Since logical CPU IDs are allocated * contiguously, it equals to current allocated max logical CPU ID plus 1. - * All allocated CPU ID should be in [0, nr_logical_cpuidi), so the maximum of - * nr_logical_cpuids is nr_cpu_ids. + * All allocated CPU IDs should be in the [0, nr_logical_cpuids) range, + * so the maximum of nr_logical_cpuids is nr_cpu_ids. * * NOTE: Reserve 0 for BSP. */ @@ -2094,7 +2094,7 @@ int __generic_processor_info(int apicid, int version, bool enabled) * Since fixing handling of boot_cpu_physical_apicid requires * another discussion and tests on each platform, we leave it * for now and here we use read_apic_id() directly in this - * function, generic_processor_info(). + * function, __generic_processor_info(). */ if (disabled_cpu_apicid != BAD_APICID && disabled_cpu_apicid != read_apic_id() && -- cgit v1.2.3 From 914122c389d091a02f7b5476209af715e77ccb73 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 29 Dec 2016 17:45:49 +0100 Subject: x86/apic: Implement set_state_oneshot_stopped() callback When clock_event_device::set_state_oneshot_stopped() is not implemented, hrtimer_cancel() can't stop the clock when there is no more timer in the queue. So the ghost of the freshly cancelled hrtimer haunts us back later with an extra interrupt: -0 [002] d..2 2248.557659: hrtimer_cancel: hrtimer=ffff88021fa92d80 -0 [002] d.h1 2249.303659: local_timer_entry: vector=239 So let's implement this missing callback for the lapic clock. This consist in calling its set_state_shutdown() callback. There don't seem to be a lighter way to stop the clock. Simply writing 0 to APIC_TMICT won't be enough to stop the clock and avoid the extra interrupt, as opposed to what is specified in the specs. We must also mask the timer interrupt in the device. Signed-off-by: Frederic Weisbecker Cc: Borislav Petkov Reviewed-by: Wanpeng Li Reviewed-by: Viresh Kumar Link: http://lkml.kernel.org/r/1483029949-6925-1-git-send-email-fweisbec@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/apic.c | 25 +++++++++++++------------ 1 file changed, 13 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 5c4fdcfda109..fdb9c46227cc 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -529,18 +529,19 @@ static void lapic_timer_broadcast(const struct cpumask *mask) * The local apic timer can be used for any function which is CPU local. */ static struct clock_event_device lapic_clockevent = { - .name = "lapic", - .features = CLOCK_EVT_FEAT_PERIODIC | - CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP - | CLOCK_EVT_FEAT_DUMMY, - .shift = 32, - .set_state_shutdown = lapic_timer_shutdown, - .set_state_periodic = lapic_timer_set_periodic, - .set_state_oneshot = lapic_timer_set_oneshot, - .set_next_event = lapic_next_event, - .broadcast = lapic_timer_broadcast, - .rating = 100, - .irq = -1, + .name = "lapic", + .features = CLOCK_EVT_FEAT_PERIODIC | + CLOCK_EVT_FEAT_ONESHOT | CLOCK_EVT_FEAT_C3STOP + | CLOCK_EVT_FEAT_DUMMY, + .shift = 32, + .set_state_shutdown = lapic_timer_shutdown, + .set_state_periodic = lapic_timer_set_periodic, + .set_state_oneshot = lapic_timer_set_oneshot, + .set_state_oneshot_stopped = lapic_timer_shutdown, + .set_next_event = lapic_next_event, + .broadcast = lapic_timer_broadcast, + .rating = 100, + .irq = -1, }; static DEFINE_PER_CPU(struct clock_event_device, lapic_events); -- cgit v1.2.3 From 35e6eaa3df55822d0cb1df3bf08e6cb816737131 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Fri, 16 Dec 2016 16:10:01 +0100 Subject: KVM: x86: don't allow kernel irqchip with split irqchip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Split irqchip cannot be created after creating the kernel irqchip, but we forgot to restrict the other way. This is an API change. Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2f22810a7e0c..c72a8d00a1c0 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3961,7 +3961,7 @@ long kvm_arch_vm_ioctl(struct file *filp, mutex_lock(&kvm->lock); r = -EEXIST; - if (kvm->arch.vpic) + if (irqchip_in_kernel(kvm)) goto create_irqchip_unlock; r = -EINVAL; if (kvm->created_vcpus) -- cgit v1.2.3 From 49776faf93f8074bb4990beac04781a9507d3650 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Fri, 16 Dec 2016 16:10:02 +0100 Subject: KVM: x86: decouple irqchip_in_kernel() and pic_irqchip() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit irqchip_in_kernel() tried to save a bit by reusing pic_irqchip(), but it just complicated the code. Add a separate state for the irqchip mode. Reviewed-by: David Hildenbrand [Used Paolo's version of condition in irqchip_in_kernel().] Signed-off-by: Radim Krčmář --- arch/x86/include/asm/kvm_host.h | 8 +++++++- arch/x86/kvm/irq.h | 15 ++++++++------- arch/x86/kvm/x86.c | 5 +++-- 3 files changed, 18 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index a7066dc1a7e9..fc03ab1f6110 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -716,6 +716,12 @@ struct kvm_hv { HV_REFERENCE_TSC_PAGE tsc_ref; }; +enum kvm_irqchip_mode { + KVM_IRQCHIP_NONE, + KVM_IRQCHIP_KERNEL, /* created with KVM_CREATE_IRQCHIP */ + KVM_IRQCHIP_SPLIT, /* created with KVM_CAP_SPLIT_IRQCHIP */ +}; + struct kvm_arch { unsigned int n_used_mmu_pages; unsigned int n_requested_mmu_pages; @@ -788,7 +794,7 @@ struct kvm_arch { u64 disabled_quirks; - bool irqchip_split; + enum kvm_irqchip_mode irqchip_mode; u8 nr_reserved_ioapic_pins; bool disabled_lapic_found; diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index 035731eb3897..f4965bc2613c 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -93,18 +93,19 @@ static inline int pic_in_kernel(struct kvm *kvm) static inline int irqchip_split(struct kvm *kvm) { - return kvm->arch.irqchip_split; + return kvm->arch.irqchip_mode == KVM_IRQCHIP_SPLIT; } -static inline int irqchip_in_kernel(struct kvm *kvm) +static inline int irqchip_kernel(struct kvm *kvm) { - struct kvm_pic *vpic = pic_irqchip(kvm); - bool ret; + return kvm->arch.irqchip_mode == KVM_IRQCHIP_KERNEL; +} - ret = (vpic != NULL); - ret |= irqchip_split(kvm); +static inline int irqchip_in_kernel(struct kvm *kvm) +{ + bool ret = kvm->arch.irqchip_mode != KVM_IRQCHIP_NONE; - /* Read vpic before kvm->irq_routing. */ + /* Matches with wmb after initializing kvm->irq_routing. */ smp_rmb(); return ret; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c72a8d00a1c0..0630ab438bd5 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3894,7 +3894,7 @@ static int kvm_vm_ioctl_enable_cap(struct kvm *kvm, goto split_irqchip_unlock; /* Pairs with irqchip_in_kernel. */ smp_wmb(); - kvm->arch.irqchip_split = true; + kvm->arch.irqchip_mode = KVM_IRQCHIP_SPLIT; kvm->arch.nr_reserved_ioapic_pins = cap->args[0]; r = 0; split_irqchip_unlock: @@ -3988,8 +3988,9 @@ long kvm_arch_vm_ioctl(struct file *filp, mutex_unlock(&kvm->slots_lock); goto create_irqchip_unlock; } - /* Write kvm->irq_routing before kvm->arch.vpic. */ + /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ smp_wmb(); + kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL; kvm->arch.vpic = vpic; create_irqchip_unlock: mutex_unlock(&kvm->lock); -- cgit v1.2.3 From 099413664c71fcf9d0099eba4f8a4dd59653d5a3 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Fri, 16 Dec 2016 16:10:03 +0100 Subject: KVM: x86: make pic setup code look like ioapic setup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We don't treat kvm->arch.vpic specially anymore, so the setup can look like ioapic. This gets a bit more information out of return values. Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/i8259.c | 16 +++++++++++----- arch/x86/kvm/irq.h | 4 ++-- arch/x86/kvm/x86.c | 30 +++++++++++++++--------------- 3 files changed, 28 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 7cc2360f1848..73ea24d4f119 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -598,14 +598,14 @@ static const struct kvm_io_device_ops picdev_eclr_ops = { .write = picdev_eclr_write, }; -struct kvm_pic *kvm_create_pic(struct kvm *kvm) +int kvm_pic_init(struct kvm *kvm) { struct kvm_pic *s; int ret; s = kzalloc(sizeof(struct kvm_pic), GFP_KERNEL); if (!s) - return NULL; + return -ENOMEM; spin_lock_init(&s->lock); s->kvm = kvm; s->pics[0].elcr_mask = 0xf8; @@ -635,7 +635,9 @@ struct kvm_pic *kvm_create_pic(struct kvm *kvm) mutex_unlock(&kvm->slots_lock); - return s; + kvm->arch.vpic = s; + + return 0; fail_unreg_1: kvm_io_bus_unregister_dev(kvm, KVM_PIO_BUS, &s->dev_slave); @@ -648,13 +650,17 @@ fail_unlock: kfree(s); - return NULL; + return ret; } -void kvm_destroy_pic(struct kvm_pic *vpic) +void kvm_pic_destroy(struct kvm *kvm) { + struct kvm_pic *vpic = kvm->arch.vpic; + kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); + + kvm->arch.vpic = NULL; kfree(vpic); } diff --git a/arch/x86/kvm/irq.h b/arch/x86/kvm/irq.h index f4965bc2613c..40d5b2cf6061 100644 --- a/arch/x86/kvm/irq.h +++ b/arch/x86/kvm/irq.h @@ -73,8 +73,8 @@ struct kvm_pic { unsigned long irq_states[PIC_NUM_PINS]; }; -struct kvm_pic *kvm_create_pic(struct kvm *kvm); -void kvm_destroy_pic(struct kvm_pic *vpic); +int kvm_pic_init(struct kvm *kvm); +void kvm_pic_destroy(struct kvm *kvm); int kvm_pic_read_irq(struct kvm *kvm); void kvm_pic_update_irq(struct kvm_pic *s); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0630ab438bd5..05ac71a01f99 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3957,33 +3957,34 @@ long kvm_arch_vm_ioctl(struct file *filp, r = kvm_vm_ioctl_get_nr_mmu_pages(kvm); break; case KVM_CREATE_IRQCHIP: { - struct kvm_pic *vpic; - mutex_lock(&kvm->lock); + r = -EEXIST; if (irqchip_in_kernel(kvm)) goto create_irqchip_unlock; + r = -EINVAL; if (kvm->created_vcpus) goto create_irqchip_unlock; - r = -ENOMEM; - vpic = kvm_create_pic(kvm); - if (vpic) { - r = kvm_ioapic_init(kvm); - if (r) { - mutex_lock(&kvm->slots_lock); - kvm_destroy_pic(vpic); - mutex_unlock(&kvm->slots_lock); - goto create_irqchip_unlock; - } - } else + + r = kvm_pic_init(kvm); + if (r) goto create_irqchip_unlock; + + r = kvm_ioapic_init(kvm); + if (r) { + mutex_lock(&kvm->slots_lock); + kvm_pic_destroy(kvm); + mutex_unlock(&kvm->slots_lock); + goto create_irqchip_unlock; + } + r = kvm_setup_default_irq_routing(kvm); if (r) { mutex_lock(&kvm->slots_lock); mutex_lock(&kvm->irq_lock); kvm_ioapic_destroy(kvm); - kvm_destroy_pic(vpic); + kvm_pic_destroy(kvm); mutex_unlock(&kvm->irq_lock); mutex_unlock(&kvm->slots_lock); goto create_irqchip_unlock; @@ -3991,7 +3992,6 @@ long kvm_arch_vm_ioctl(struct file *filp, /* Write kvm->irq_routing before enabling irqchip_in_kernel. */ smp_wmb(); kvm->arch.irqchip_mode = KVM_IRQCHIP_KERNEL; - kvm->arch.vpic = vpic; create_irqchip_unlock: mutex_unlock(&kvm->lock); break; -- cgit v1.2.3 From e5dc48777dcc898210e2f16d80d44718db38cdc3 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Fri, 16 Dec 2016 16:10:04 +0100 Subject: KVM: x86: refactor pic setup in kvm_set_routing_entry MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq_comm.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 6c0191615f23..1dfeb185a1e3 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -297,14 +297,12 @@ int kvm_set_routing_entry(struct kvm *kvm, case KVM_IRQ_ROUTING_IRQCHIP: delta = 0; switch (ue->u.irqchip.irqchip) { - case KVM_IRQCHIP_PIC_MASTER: - e->set = kvm_set_pic_irq; - max_pin = PIC_NUM_PINS; - break; case KVM_IRQCHIP_PIC_SLAVE: + delta = 8; + /* fall through */ + case KVM_IRQCHIP_PIC_MASTER: e->set = kvm_set_pic_irq; max_pin = PIC_NUM_PINS; - delta = 8; break; case KVM_IRQCHIP_IOAPIC: max_pin = KVM_IOAPIC_NUM_PINS; -- cgit v1.2.3 From 8231f50d9853274ed104aac86b6b6263ca666c4d Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Fri, 16 Dec 2016 16:10:05 +0100 Subject: KVM: x86: prevent setup of invalid routes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The check in kvm_set_pic_irq() and kvm_set_ioapic_irq() was just a temporary measure until the code improved enough for us to do this. This changes APIC in a case when KVM_SET_GSI_ROUTING is called to set up pic and ioapic routes before KVM_CREATE_IRQCHIP. Those rules would get overwritten by KVM_CREATE_IRQCHIP at best, so it is pointless to allow it. Userspaces hopefully noticed that things don't work if they do that and don't do that. Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq_comm.c | 19 ++++++------------- 1 file changed, 6 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 1dfeb185a1e3..2639b8d3dce2 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -41,15 +41,6 @@ static int kvm_set_pic_irq(struct kvm_kernel_irq_routing_entry *e, bool line_status) { struct kvm_pic *pic = pic_irqchip(kvm); - - /* - * XXX: rejecting pic routes when pic isn't in use would be better, - * but the default routing table is installed while kvm->arch.vpic is - * NULL and KVM_CREATE_IRQCHIP can race with KVM_IRQ_LINE. - */ - if (!pic) - return -1; - return kvm_pic_set_irq(pic, e->irqchip.pin, irq_source_id, level); } @@ -58,10 +49,6 @@ static int kvm_set_ioapic_irq(struct kvm_kernel_irq_routing_entry *e, bool line_status) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; - - if (!ioapic) - return -1; - return kvm_ioapic_set_irq(ioapic, e->irqchip.pin, irq_source_id, level, line_status); } @@ -301,10 +288,16 @@ int kvm_set_routing_entry(struct kvm *kvm, delta = 8; /* fall through */ case KVM_IRQCHIP_PIC_MASTER: + if (!pic_in_kernel(kvm)) + goto out; + e->set = kvm_set_pic_irq; max_pin = PIC_NUM_PINS; break; case KVM_IRQCHIP_IOAPIC: + if (!ioapic_in_kernel(kvm)) + goto out; + max_pin = KVM_IOAPIC_NUM_PINS; e->set = kvm_set_ioapic_irq; break; -- cgit v1.2.3 From 826da32140dada1467f4216410525511393317e8 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Fri, 16 Dec 2016 16:10:06 +0100 Subject: KVM: x86: simplify conditions with split/kernel irqchip MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Paolo Bonzini Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/irq_comm.c | 2 +- arch/x86/kvm/x86.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index 2639b8d3dce2..b96d3893f121 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -400,7 +400,7 @@ int kvm_setup_empty_irq_routing(struct kvm *kvm) void kvm_arch_post_irq_routing_update(struct kvm *kvm) { - if (ioapic_in_kernel(kvm) || !irqchip_in_kernel(kvm)) + if (!irqchip_split(kvm)) return; kvm_make_scan_ioapic_request(kvm); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 05ac71a01f99..a356d8e12c2f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4027,7 +4027,7 @@ long kvm_arch_vm_ioctl(struct file *filp, } r = -ENXIO; - if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) + if (!irqchip_kernel(kvm)) goto get_irqchip_out; r = kvm_vm_ioctl_get_irqchip(kvm, chip); if (r) @@ -4051,7 +4051,7 @@ long kvm_arch_vm_ioctl(struct file *filp, } r = -ENXIO; - if (!irqchip_in_kernel(kvm) || irqchip_split(kvm)) + if (!irqchip_kernel(kvm)) goto set_irqchip_out; r = kvm_vm_ioctl_set_irqchip(kvm, chip); if (r) -- cgit v1.2.3 From f3414bc77419463c0d81eaa2cea7ee4ccb447c7d Mon Sep 17 00:00:00 2001 From: David Matlack Date: Tue, 20 Dec 2016 15:25:57 -0800 Subject: kvm: x86: export maximum number of mmu_page_hash collisions Report the maximum number of mmu_page_hash collisions as a per-VM stat. This will make it easy to identify problems with the mmu_page_hash in the future. Signed-off-by: David Matlack Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/mmu.c | 25 +++++++++++++++++-------- arch/x86/kvm/x86.c | 2 ++ 3 files changed, 20 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index fc03ab1f6110..1bb1ffc0024c 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -821,6 +821,7 @@ struct kvm_vm_stat { ulong mmu_unsync; ulong remote_tlb_flush; ulong lpages; + ulong max_mmu_page_hash_collisions; }; struct kvm_vcpu_stat { diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 7012de4a1fed..45ee7ae88239 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -1904,17 +1904,17 @@ static void kvm_mmu_commit_zap_page(struct kvm *kvm, * since it has been deleted from active_mmu_pages but still can be found * at hast list. * - * for_each_gfn_valid_sp() has skipped that kind of pages. + * for_each_valid_sp() has skipped that kind of pages. */ -#define for_each_gfn_valid_sp(_kvm, _sp, _gfn) \ +#define for_each_valid_sp(_kvm, _sp, _gfn) \ hlist_for_each_entry(_sp, \ &(_kvm)->arch.mmu_page_hash[kvm_page_table_hashfn(_gfn)], hash_link) \ - if ((_sp)->gfn != (_gfn) || is_obsolete_sp((_kvm), (_sp)) \ - || (_sp)->role.invalid) {} else + if (is_obsolete_sp((_kvm), (_sp)) || (_sp)->role.invalid) { \ + } else #define for_each_gfn_indirect_valid_sp(_kvm, _sp, _gfn) \ - for_each_gfn_valid_sp(_kvm, _sp, _gfn) \ - if ((_sp)->role.direct) {} else + for_each_valid_sp(_kvm, _sp, _gfn) \ + if ((_sp)->gfn != (_gfn) || (_sp)->role.direct) {} else /* @sp->gfn should be write-protected at the call site */ static bool __kvm_sync_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, @@ -2116,6 +2116,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp; bool need_sync = false; bool flush = false; + int collisions = 0; LIST_HEAD(invalid_list); role = vcpu->arch.mmu.base_role; @@ -2130,7 +2131,12 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, quadrant &= (1 << ((PT32_PT_BITS - PT64_PT_BITS) * level)) - 1; role.quadrant = quadrant; } - for_each_gfn_valid_sp(vcpu->kvm, sp, gfn) { + for_each_valid_sp(vcpu->kvm, sp, gfn) { + if (sp->gfn != gfn) { + collisions++; + continue; + } + if (!need_sync && sp->unsync) need_sync = true; @@ -2153,7 +2159,7 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, __clear_sp_write_flooding_count(sp); trace_kvm_mmu_get_page(sp, false); - return sp; + goto out; } ++vcpu->kvm->stat.mmu_cache_miss; @@ -2183,6 +2189,9 @@ static struct kvm_mmu_page *kvm_mmu_get_page(struct kvm_vcpu *vcpu, trace_kvm_mmu_get_page(sp, true); kvm_mmu_flush_or_zap(vcpu, &invalid_list, false, flush); +out: + if (collisions > vcpu->kvm->stat.max_mmu_page_hash_collisions) + vcpu->kvm->stat.max_mmu_page_hash_collisions = collisions; return sp; } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index a356d8e12c2f..4aece8b0a4aa 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -190,6 +190,8 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "mmu_unsync", VM_STAT(mmu_unsync) }, { "remote_tlb_flush", VM_STAT(remote_tlb_flush) }, { "largepages", VM_STAT(lpages) }, + { "max_mmu_page_hash_collisions", + VM_STAT(max_mmu_page_hash_collisions) }, { NULL } }; -- cgit v1.2.3 From 114df303a7eeae8b50ebf68229b7e647714a9bea Mon Sep 17 00:00:00 2001 From: David Matlack Date: Mon, 19 Dec 2016 13:58:25 -0800 Subject: kvm: x86: reduce collisions in mmu_page_hash When using two-dimensional paging, the mmu_page_hash (which provides lookups for existing kvm_mmu_page structs), becomes imbalanced; with too many collisions in buckets 0 and 512. This has been seen to cause mmu_lock to be held for multiple milliseconds in kvm_mmu_get_page on VMs with a large amount of RAM mapped with 4K pages. The current hash function uses the lower 10 bits of gfn to index into mmu_page_hash. When doing shadow paging, gfn is the address of the guest page table being shadow. These tables are 4K-aligned, which makes the low bits of gfn a good hash. However, with two-dimensional paging, no guest page tables are being shadowed, so gfn is the base address that is mapped by the table. Thus page tables (level=1) have a 2MB aligned gfn, page directories (level=2) have a 1GB aligned gfn, etc. This means hashes will only differ in their 10th bit. hash_64() provides a better hash. For example, on a VM with ~200G (99458 direct=1 kvm_mmu_page structs): hash max_mmu_page_hash_collisions -------------------------------------------- low 10 bits 49847 hash_64 105 perfect 97 While we're changing the hash, increase the table size by 4x to better support large VMs (further reduces number of collisions in 200G VM to 29). Note that hash_64() does not provide a good distribution prior to commit ef703f49a6c5 ("Eliminate bad hash multipliers from hash_32() and hash_64()"). Signed-off-by: David Matlack Change-Id: I5aa6b13c834722813c6cca46b8b1ed6f53368ade Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/mmu.c | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 1bb1ffc0024c..7e594a325158 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -115,7 +115,7 @@ static inline gfn_t gfn_to_index(gfn_t gfn, gfn_t base_gfn, int level) #define KVM_PERMILLE_MMU_PAGES 20 #define KVM_MIN_ALLOC_MMU_PAGES 64 -#define KVM_MMU_HASH_SHIFT 10 +#define KVM_MMU_HASH_SHIFT 12 #define KVM_NUM_MMU_PAGES (1 << KVM_MMU_HASH_SHIFT) #define KVM_MIN_FREE_MMU_PAGES 5 #define KVM_REFILL_PAGES 25 diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 45ee7ae88239..3f9fa39f1469 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include @@ -1713,7 +1714,7 @@ static void kvm_mmu_free_page(struct kvm_mmu_page *sp) static unsigned kvm_page_table_hashfn(gfn_t gfn) { - return gfn & ((1 << KVM_MMU_HASH_SHIFT) - 1); + return hash_64(gfn, KVM_MMU_HASH_SHIFT); } static void mmu_page_add_parent_pte(struct kvm_vcpu *vcpu, -- cgit v1.2.3 From 27959a4415a5a00881a7b9353ab9b1274da2ca47 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Tue, 6 Dec 2016 16:46:10 -0800 Subject: kvm: x86: mmu: Use symbolic constants for EPT Violation Exit Qualifications This change adds some symbolic constants for VM Exit Qualifications related to EPT Violations and updates handle_ept_violation() to use these constants instead of hard-coded numbers. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 16 ++++++++++++++++ arch/x86/kvm/vmx.c | 22 ++++++++++++++-------- 2 files changed, 30 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 2b5b2d4b924e..25a482fb5241 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -499,6 +499,22 @@ struct vmx_msr_entry { #define ENTRY_FAIL_NMI 3 #define ENTRY_FAIL_VMCS_LINK_PTR 4 +/* + * Exit Qualifications for EPT Violations + */ +#define EPT_VIOLATION_READ_BIT 0 +#define EPT_VIOLATION_WRITE_BIT 1 +#define EPT_VIOLATION_INSTR_BIT 2 +#define EPT_VIOLATION_READABLE_BIT 3 +#define EPT_VIOLATION_WRITABLE_BIT 4 +#define EPT_VIOLATION_EXECUTABLE_BIT 5 +#define EPT_VIOLATION_READ (1 << EPT_VIOLATION_READ_BIT) +#define EPT_VIOLATION_WRITE (1 << EPT_VIOLATION_WRITE_BIT) +#define EPT_VIOLATION_INSTR (1 << EPT_VIOLATION_INSTR_BIT) +#define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT) +#define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT) +#define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) + /* * VM-instruction error numbers */ diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index a236decb81e4..81159a3878f4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6374,14 +6374,20 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) gpa = vmcs_read64(GUEST_PHYSICAL_ADDRESS); trace_kvm_page_fault(gpa, exit_qualification); - /* it is a read fault? */ - error_code = (exit_qualification << 2) & PFERR_USER_MASK; - /* it is a write fault? */ - error_code |= exit_qualification & PFERR_WRITE_MASK; - /* It is a fetch fault? */ - error_code |= (exit_qualification << 2) & PFERR_FETCH_MASK; - /* ept page table is present? */ - error_code |= (exit_qualification & 0x38) != 0; + /* Is it a read fault? */ + error_code = (exit_qualification & EPT_VIOLATION_READ) + ? PFERR_USER_MASK : 0; + /* Is it a write fault? */ + error_code |= (exit_qualification & EPT_VIOLATION_WRITE) + ? PFERR_WRITE_MASK : 0; + /* Is it a fetch fault? */ + error_code |= (exit_qualification & EPT_VIOLATION_INSTR) + ? PFERR_FETCH_MASK : 0; + /* ept page table entry is present? */ + error_code |= (exit_qualification & + (EPT_VIOLATION_READABLE | EPT_VIOLATION_WRITABLE | + EPT_VIOLATION_EXECUTABLE)) + ? PFERR_PRESENT_MASK : 0; vcpu->arch.exit_qualification = exit_qualification; -- cgit v1.2.3 From ea4114bcd3a8c84f0eb0b52e56d348c27ddede2e Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Tue, 6 Dec 2016 16:46:11 -0800 Subject: kvm: x86: mmu: Rename spte_is_locklessly_modifiable() This change renames spte_is_locklessly_modifiable() to spte_can_locklessly_be_made_writable() to distinguish it from other forms of lockless modifications. The full set of lockless modifications is covered by spte_has_volatile_bits(). Signed-off-by: Junaid Shahid Reviewed-by: Paolo Bonzini Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 3f9fa39f1469..e923f393ac26 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -474,7 +474,7 @@ retry: } #endif -static bool spte_is_locklessly_modifiable(u64 spte) +static bool spte_can_locklessly_be_made_writable(u64 spte) { return (spte & (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE)) == (SPTE_HOST_WRITEABLE | SPTE_MMU_WRITEABLE); @@ -488,7 +488,7 @@ static bool spte_has_volatile_bits(u64 spte) * also, it can help us to get a stable is_writable_pte() * to ensure tlb flush is not missed. */ - if (spte_is_locklessly_modifiable(spte)) + if (spte_can_locklessly_be_made_writable(spte)) return true; if (!shadow_accessed_mask) @@ -557,7 +557,7 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) * we always atomically update it, see the comments in * spte_has_volatile_bits(). */ - if (spte_is_locklessly_modifiable(old_spte) && + if (spte_can_locklessly_be_made_writable(old_spte) && !is_writable_pte(new_spte)) ret = true; @@ -1213,7 +1213,7 @@ static bool spte_write_protect(u64 *sptep, bool pt_protect) u64 spte = *sptep; if (!is_writable_pte(spte) && - !(pt_protect && spte_is_locklessly_modifiable(spte))) + !(pt_protect && spte_can_locklessly_be_made_writable(spte))) return false; rmap_printk("rmap_write_protect: spte %p %llx\n", sptep, *sptep); @@ -2975,7 +2975,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, * Currently, to simplify the code, only the spte write-protected * by dirty-log can be fast fixed. */ - if (!spte_is_locklessly_modifiable(spte)) + if (!spte_can_locklessly_be_made_writable(spte)) goto exit; /* -- cgit v1.2.3 From 97dceba29a6acbb28d16c8c5757ae9f4e1e482ea Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Tue, 6 Dec 2016 16:46:12 -0800 Subject: kvm: x86: mmu: Fast Page Fault path retries This change adds retries into the Fast Page Fault path. Without the retries, the code still works, but if a retry does end up being needed, then it will result in a second page fault for the same memory access, which will cause much more overhead compared to just retrying within the original fault. This would be especially useful with the upcoming fast access tracking change, as that would make it more likely for retries to be needed (e.g. due to read and write faults happening on different CPUs at the same time). Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 124 +++++++++++++++++++++++++++++++---------------------- 1 file changed, 73 insertions(+), 51 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e923f393ac26..f6d3505c8d18 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -2891,6 +2891,10 @@ static bool page_fault_can_be_fast(u32 error_code) return true; } +/* + * Returns true if the SPTE was fixed successfully. Otherwise, + * someone else modified the SPTE from its original value. + */ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, u64 *sptep, u64 spte) @@ -2917,8 +2921,10 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * * Compare with set_spte where instead shadow_dirty_mask is set. */ - if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) == spte) - kvm_vcpu_mark_page_dirty(vcpu, gfn); + if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) != spte) + return false; + + kvm_vcpu_mark_page_dirty(vcpu, gfn); return true; } @@ -2933,8 +2939,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, { struct kvm_shadow_walk_iterator iterator; struct kvm_mmu_page *sp; - bool ret = false; + bool fault_handled = false; u64 spte = 0ull; + uint retry_count = 0; if (!VALID_PAGE(vcpu->arch.mmu.root_hpa)) return false; @@ -2947,62 +2954,77 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, if (!is_shadow_present_pte(spte) || iterator.level < level) break; - /* - * If the mapping has been changed, let the vcpu fault on the - * same address again. - */ - if (!is_shadow_present_pte(spte)) { - ret = true; - goto exit; - } + do { + /* + * If the mapping has been changed, let the vcpu fault on the + * same address again. + */ + if (!is_shadow_present_pte(spte)) { + fault_handled = true; + break; + } - sp = page_header(__pa(iterator.sptep)); - if (!is_last_spte(spte, sp->role.level)) - goto exit; + sp = page_header(__pa(iterator.sptep)); + if (!is_last_spte(spte, sp->role.level)) + break; - /* - * Check if it is a spurious fault caused by TLB lazily flushed. - * - * Need not check the access of upper level table entries since - * they are always ACC_ALL. - */ - if (is_writable_pte(spte)) { - ret = true; - goto exit; - } + /* + * Check if it is a spurious fault caused by TLB lazily flushed. + * + * Need not check the access of upper level table entries since + * they are always ACC_ALL. + */ + if (is_writable_pte(spte)) { + fault_handled = true; + break; + } - /* - * Currently, to simplify the code, only the spte write-protected - * by dirty-log can be fast fixed. - */ - if (!spte_can_locklessly_be_made_writable(spte)) - goto exit; + /* + * Currently, to simplify the code, only the spte + * write-protected by dirty-log can be fast fixed. + */ + if (!spte_can_locklessly_be_made_writable(spte)) + break; - /* - * Do not fix write-permission on the large spte since we only dirty - * the first page into the dirty-bitmap in fast_pf_fix_direct_spte() - * that means other pages are missed if its slot is dirty-logged. - * - * Instead, we let the slow page fault path create a normal spte to - * fix the access. - * - * See the comments in kvm_arch_commit_memory_region(). - */ - if (sp->role.level > PT_PAGE_TABLE_LEVEL) - goto exit; + /* + * Do not fix write-permission on the large spte since we only + * dirty the first page into the dirty-bitmap in + * fast_pf_fix_direct_spte() that means other pages are missed + * if its slot is dirty-logged. + * + * Instead, we let the slow page fault path create a normal spte + * to fix the access. + * + * See the comments in kvm_arch_commit_memory_region(). + */ + if (sp->role.level > PT_PAGE_TABLE_LEVEL) + break; + + /* + * Currently, fast page fault only works for direct mapping + * since the gfn is not stable for indirect shadow page. See + * Documentation/virtual/kvm/locking.txt to get more detail. + */ + fault_handled = fast_pf_fix_direct_spte(vcpu, sp, + iterator.sptep, spte); + if (fault_handled) + break; + + if (++retry_count > 4) { + printk_once(KERN_WARNING + "kvm: Fast #PF retrying more than 4 times.\n"); + break; + } + + spte = mmu_spte_get_lockless(iterator.sptep); + + } while (true); - /* - * Currently, fast page fault only works for direct mapping since - * the gfn is not stable for indirect shadow page. - * See Documentation/virtual/kvm/locking.txt to get more detail. - */ - ret = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte); -exit: trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, - spte, ret); + spte, fault_handled); walk_shadow_page_lockless_end(vcpu); - return ret; + return fault_handled; } static bool try_async_pf(struct kvm_vcpu *vcpu, bool prefault, gfn_t gfn, -- cgit v1.2.3 From 83ef6c8155c0ecb4c1a7e6bfbe425c85f7cb676d Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Tue, 6 Dec 2016 16:46:13 -0800 Subject: kvm: x86: mmu: Refactor accessed/dirty checks in mmu_spte_update/clear This simplifies mmu_spte_update() a little bit. The checks for clearing of accessed and dirty bits are refactored into separate functions, which are used inside both mmu_spte_update() and mmu_spte_clear_track_bits(), as well as kvm_test_age_rmapp(). The new helper functions handle both the case when A/D bits are supported in hardware and the case when they are not. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 66 +++++++++++++++++++++++++----------------------------- 1 file changed, 31 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index f6d3505c8d18..cfef95969335 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -504,14 +504,16 @@ static bool spte_has_volatile_bits(u64 spte) return true; } -static bool spte_is_bit_cleared(u64 old_spte, u64 new_spte, u64 bit_mask) +static bool is_accessed_spte(u64 spte) { - return (old_spte & bit_mask) && !(new_spte & bit_mask); + return shadow_accessed_mask ? spte & shadow_accessed_mask + : true; } -static bool spte_is_bit_changed(u64 old_spte, u64 new_spte, u64 bit_mask) +static bool is_dirty_spte(u64 spte) { - return (old_spte & bit_mask) != (new_spte & bit_mask); + return shadow_dirty_mask ? spte & shadow_dirty_mask + : spte & PT_WRITABLE_MASK; } /* Rules for using mmu_spte_set: @@ -534,17 +536,19 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte) * will find a read-only spte, even though the writable spte * might be cached on a CPU's TLB, the return value indicates this * case. + * + * Returns true if the TLB needs to be flushed */ static bool mmu_spte_update(u64 *sptep, u64 new_spte) { u64 old_spte = *sptep; - bool ret = false; + bool flush = false; WARN_ON(!is_shadow_present_pte(new_spte)); if (!is_shadow_present_pte(old_spte)) { mmu_spte_set(sptep, new_spte); - return ret; + return flush; } if (!spte_has_volatile_bits(old_spte)) @@ -552,6 +556,8 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) else old_spte = __update_clear_spte_slow(sptep, new_spte); + WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte)); + /* * For the spte updated out of mmu-lock is safe, since * we always atomically update it, see the comments in @@ -559,38 +565,31 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) */ if (spte_can_locklessly_be_made_writable(old_spte) && !is_writable_pte(new_spte)) - ret = true; - - if (!shadow_accessed_mask) { - /* - * We don't set page dirty when dropping non-writable spte. - * So do it now if the new spte is becoming non-writable. - */ - if (ret) - kvm_set_pfn_dirty(spte_to_pfn(old_spte)); - return ret; - } + flush = true; /* - * Flush TLB when accessed/dirty bits are changed in the page tables, + * Flush TLB when accessed/dirty states are changed in the page tables, * to guarantee consistency between TLB and page tables. */ - if (spte_is_bit_changed(old_spte, new_spte, - shadow_accessed_mask | shadow_dirty_mask)) - ret = true; - if (spte_is_bit_cleared(old_spte, new_spte, shadow_accessed_mask)) + if (is_accessed_spte(old_spte) && !is_accessed_spte(new_spte)) { + flush = true; kvm_set_pfn_accessed(spte_to_pfn(old_spte)); - if (spte_is_bit_cleared(old_spte, new_spte, shadow_dirty_mask)) + } + + if (is_dirty_spte(old_spte) && !is_dirty_spte(new_spte)) { + flush = true; kvm_set_pfn_dirty(spte_to_pfn(old_spte)); + } - return ret; + return flush; } /* * Rules for using mmu_spte_clear_track_bits: * It sets the sptep from present to nonpresent, and track the * state bits, it is used to clear the last level sptep. + * Returns non-zero if the PTE was previously valid. */ static int mmu_spte_clear_track_bits(u64 *sptep) { @@ -614,11 +613,12 @@ static int mmu_spte_clear_track_bits(u64 *sptep) */ WARN_ON(!kvm_is_reserved_pfn(pfn) && !page_count(pfn_to_page(pfn))); - if (!shadow_accessed_mask || old_spte & shadow_accessed_mask) + if (is_accessed_spte(old_spte)) kvm_set_pfn_accessed(pfn); - if (old_spte & (shadow_dirty_mask ? shadow_dirty_mask : - PT_WRITABLE_MASK)) + + if (is_dirty_spte(old_spte)) kvm_set_pfn_dirty(pfn); + return 1; } @@ -1616,7 +1616,6 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, { u64 *sptep; struct rmap_iterator iter; - int young = 0; /* * If there's no access bit in the secondary pte set by the @@ -1626,14 +1625,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, if (!shadow_accessed_mask) goto out; - for_each_rmap_spte(rmap_head, &iter, sptep) { - if (*sptep & shadow_accessed_mask) { - young = 1; - break; - } - } + for_each_rmap_spte(rmap_head, &iter, sptep) + if (is_accessed_spte(*sptep)) + return 1; out: - return young; + return 0; } #define RMAP_RECYCLE_THRESHOLD 1000 -- cgit v1.2.3 From f39a058d0ea2f58b9c69cfcf7c93184f33302c98 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Tue, 6 Dec 2016 16:46:14 -0800 Subject: kvm: x86: mmu: Introduce a no-tracking version of mmu_spte_update mmu_spte_update() tracks changes in the accessed/dirty state of the SPTE being updated and calls kvm_set_pfn_accessed/dirty appropriately. However, in some cases (e.g. when aging the SPTE), this shouldn't be done. mmu_spte_update_no_track() is introduced for use in such cases. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 40 +++++++++++++++++++++++++++------------- 1 file changed, 27 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index cfef95969335..b8b5259c8ebb 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -528,27 +528,19 @@ static void mmu_spte_set(u64 *sptep, u64 new_spte) __set_spte(sptep, new_spte); } -/* Rules for using mmu_spte_update: - * Update the state bits, it means the mapped pfn is not changed. - * - * Whenever we overwrite a writable spte with a read-only one we - * should flush remote TLBs. Otherwise rmap_write_protect - * will find a read-only spte, even though the writable spte - * might be cached on a CPU's TLB, the return value indicates this - * case. - * - * Returns true if the TLB needs to be flushed +/* + * Update the SPTE (excluding the PFN), but do not track changes in its + * accessed/dirty status. */ -static bool mmu_spte_update(u64 *sptep, u64 new_spte) +static u64 mmu_spte_update_no_track(u64 *sptep, u64 new_spte) { u64 old_spte = *sptep; - bool flush = false; WARN_ON(!is_shadow_present_pte(new_spte)); if (!is_shadow_present_pte(old_spte)) { mmu_spte_set(sptep, new_spte); - return flush; + return old_spte; } if (!spte_has_volatile_bits(old_spte)) @@ -558,6 +550,28 @@ static bool mmu_spte_update(u64 *sptep, u64 new_spte) WARN_ON(spte_to_pfn(old_spte) != spte_to_pfn(new_spte)); + return old_spte; +} + +/* Rules for using mmu_spte_update: + * Update the state bits, it means the mapped pfn is not changed. + * + * Whenever we overwrite a writable spte with a read-only one we + * should flush remote TLBs. Otherwise rmap_write_protect + * will find a read-only spte, even though the writable spte + * might be cached on a CPU's TLB, the return value indicates this + * case. + * + * Returns true if the TLB needs to be flushed + */ +static bool mmu_spte_update(u64 *sptep, u64 new_spte) +{ + bool flush = false; + u64 old_spte = mmu_spte_update_no_track(sptep, new_spte); + + if (!is_shadow_present_pte(old_spte)) + return false; + /* * For the spte updated out of mmu-lock is safe, since * we always atomically update it, see the comments in -- cgit v1.2.3 From 37f0e8fe6b10ee2ab52576caa721ee1282de74a6 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Tue, 6 Dec 2016 16:46:15 -0800 Subject: kvm: x86: mmu: Do not use bit 63 for tracking special SPTEs MMIO SPTEs currently set both bits 62 and 63 to distinguish them as special PTEs. However, bit 63 is used as the SVE bit in Intel EPT PTEs. The SVE bit is ignored for misconfigured PTEs but not necessarily for not-Present PTEs. Since MMIO SPTEs use an EPT misconfiguration, so using bit 63 for them is acceptable. However, the upcoming fast access tracking feature adds another type of special tracking PTE, which uses not-Present PTEs and hence should not set bit 63. In order to use common bits to distinguish both type of special PTEs, we now use only bit 62 as the special bit. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 7 +++++++ arch/x86/include/asm/vmx.h | 9 +++++++-- arch/x86/kvm/vmx.c | 6 +++--- 3 files changed, 17 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 7e594a325158..3272a5e4aaad 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -208,6 +208,13 @@ enum { PFERR_WRITE_MASK | \ PFERR_PRESENT_MASK) +/* + * The mask used to denote special SPTEs, which can be either MMIO SPTEs or + * Access Tracking SPTEs. We use bit 62 instead of bit 63 to avoid conflicting + * with the SVE bit in EPT PTEs. + */ +#define SPTE_SPECIAL_MASK (1ULL << 62) + /* apic attention bits */ #define KVM_APIC_CHECK_VAPIC 0 /* diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index 25a482fb5241..fc061cbb46e0 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -467,8 +467,13 @@ enum vmcs_field { #define VMX_EPT_WRITABLE_MASK 0x2ull #define VMX_EPT_EXECUTABLE_MASK 0x4ull #define VMX_EPT_IPAT_BIT (1ull << 6) -#define VMX_EPT_ACCESS_BIT (1ull << 8) -#define VMX_EPT_DIRTY_BIT (1ull << 9) +#define VMX_EPT_ACCESS_BIT (1ull << 8) +#define VMX_EPT_DIRTY_BIT (1ull << 9) + +/* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */ +#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \ + VMX_EPT_EXECUTABLE_MASK) + #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 81159a3878f4..6f53dedd9b96 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5236,10 +5236,10 @@ static void ept_set_mmio_spte_mask(void) /* * EPT Misconfigurations can be generated if the value of bits 2:0 * of an EPT paging-structure entry is 110b (write/execute). - * Also, magic bits (0x3ull << 62) is set to quickly identify mmio - * spte. + * Also, special bit (62) is set to quickly identify mmio spte. */ - kvm_mmu_set_mmio_spte_mask((0x3ull << 62) | 0x6ull); + kvm_mmu_set_mmio_spte_mask(SPTE_SPECIAL_MASK | + VMX_EPT_MISCONFIG_WX_VALUE); } #define VMX_XSS_EXIT_BITMAP 0 -- cgit v1.2.3 From f160c7b7bb322bf079a5bb4dd34c58f17553f193 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Tue, 6 Dec 2016 16:46:16 -0800 Subject: kvm: x86: mmu: Lockless access tracking for Intel CPUs without EPT A bits. This change implements lockless access tracking for Intel CPUs without EPT A bits. This is achieved by marking the PTEs as not-present (but not completely clearing them) when clear_flush_young() is called after marking the pages as accessed. When an EPT Violation is generated as a result of the VM accessing those pages, the PTEs are restored to their original values. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 +- arch/x86/include/asm/vmx.h | 9 +- arch/x86/kvm/mmu.c | 279 ++++++++++++++++++++++++++++++---------- arch/x86/kvm/vmx.c | 26 ++-- arch/x86/kvm/x86.c | 2 +- 5 files changed, 239 insertions(+), 80 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 3272a5e4aaad..99a71d90b6ae 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -1064,7 +1064,8 @@ void kvm_mmu_setup(struct kvm_vcpu *vcpu); void kvm_mmu_init_vm(struct kvm *kvm); void kvm_mmu_uninit_vm(struct kvm *kvm); void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask); + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, + u64 acc_track_mask); void kvm_mmu_reset_context(struct kvm_vcpu *vcpu); void kvm_mmu_slot_remove_write_access(struct kvm *kvm, diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index fc061cbb46e0..a22a4790f1ac 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -469,11 +469,14 @@ enum vmcs_field { #define VMX_EPT_IPAT_BIT (1ull << 6) #define VMX_EPT_ACCESS_BIT (1ull << 8) #define VMX_EPT_DIRTY_BIT (1ull << 9) +#define VMX_EPT_RWX_MASK (VMX_EPT_READABLE_MASK | \ + VMX_EPT_WRITABLE_MASK | \ + VMX_EPT_EXECUTABLE_MASK) +#define VMX_EPT_MT_MASK (7ull << VMX_EPT_MT_EPTE_SHIFT) /* The mask to use to trigger an EPT Misconfiguration in order to track MMIO */ -#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \ - VMX_EPT_EXECUTABLE_MASK) - +#define VMX_EPT_MISCONFIG_WX_VALUE (VMX_EPT_WRITABLE_MASK | \ + VMX_EPT_EXECUTABLE_MASK) #define VMX_EPT_IDENTITY_PAGETABLE_ADDR 0xfffbc000ul diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index b8b5259c8ebb..64821ca3a7c3 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -130,6 +131,10 @@ module_param(dbg, bool, 0644); #define ACC_USER_MASK PT_USER_MASK #define ACC_ALL (ACC_EXEC_MASK | ACC_WRITE_MASK | ACC_USER_MASK) +/* The mask for the R/X bits in EPT PTEs */ +#define PT64_EPT_READABLE_MASK 0x1ull +#define PT64_EPT_EXECUTABLE_MASK 0x4ull + #include #define CREATE_TRACE_POINTS @@ -179,6 +184,25 @@ static u64 __read_mostly shadow_dirty_mask; static u64 __read_mostly shadow_mmio_mask; static u64 __read_mostly shadow_present_mask; +/* + * The mask/value to distinguish a PTE that has been marked not-present for + * access tracking purposes. + * The mask would be either 0 if access tracking is disabled, or + * SPTE_SPECIAL_MASK|VMX_EPT_RWX_MASK if access tracking is enabled. + */ +static u64 __read_mostly shadow_acc_track_mask; +static const u64 shadow_acc_track_value = SPTE_SPECIAL_MASK; + +/* + * The mask/shift to use for saving the original R/X bits when marking the PTE + * as not-present for access tracking purposes. We do not save the W bit as the + * PTEs being access tracked also need to be dirty tracked, so the W bit will be + * restored only when a write is attempted to the page. + */ +static const u64 shadow_acc_track_saved_bits_mask = PT64_EPT_READABLE_MASK | + PT64_EPT_EXECUTABLE_MASK; +static const u64 shadow_acc_track_saved_bits_shift = PT64_SECOND_AVAIL_BITS_SHIFT; + static void mmu_spte_set(u64 *sptep, u64 spte); static void mmu_free_roots(struct kvm_vcpu *vcpu); @@ -188,6 +212,12 @@ void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); +static inline bool is_access_track_spte(u64 spte) +{ + /* Always false if shadow_acc_track_mask is zero. */ + return (spte & shadow_acc_track_mask) == shadow_acc_track_value; +} + /* * the low bit of the generation number is always presumed to be zero. * This disables mmio caching during memslot updates. The concept is @@ -285,7 +315,8 @@ static bool check_mmio_spte(struct kvm_vcpu *vcpu, u64 spte) } void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, - u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask) + u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, + u64 acc_track_mask) { shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; @@ -293,9 +324,23 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, shadow_nx_mask = nx_mask; shadow_x_mask = x_mask; shadow_present_mask = p_mask; + shadow_acc_track_mask = acc_track_mask; + WARN_ON(shadow_accessed_mask != 0 && shadow_acc_track_mask != 0); } EXPORT_SYMBOL_GPL(kvm_mmu_set_mask_ptes); +void kvm_mmu_clear_all_pte_masks(void) +{ + shadow_user_mask = 0; + shadow_accessed_mask = 0; + shadow_dirty_mask = 0; + shadow_nx_mask = 0; + shadow_x_mask = 0; + shadow_mmio_mask = 0; + shadow_present_mask = 0; + shadow_acc_track_mask = 0; +} + static int is_cpuid_PSE36(void) { return 1; @@ -308,7 +353,7 @@ static int is_nx(struct kvm_vcpu *vcpu) static int is_shadow_present_pte(u64 pte) { - return (pte & 0xFFFFFFFFull) && !is_mmio_spte(pte); + return (pte != 0) && !is_mmio_spte(pte); } static int is_large_pte(u64 pte) @@ -482,32 +527,32 @@ static bool spte_can_locklessly_be_made_writable(u64 spte) static bool spte_has_volatile_bits(u64 spte) { + if (!is_shadow_present_pte(spte)) + return false; + /* * Always atomically update spte if it can be updated * out of mmu-lock, it can ensure dirty bit is not lost, * also, it can help us to get a stable is_writable_pte() * to ensure tlb flush is not missed. */ - if (spte_can_locklessly_be_made_writable(spte)) + if (spte_can_locklessly_be_made_writable(spte) || + is_access_track_spte(spte)) return true; - if (!shadow_accessed_mask) - return false; - - if (!is_shadow_present_pte(spte)) - return false; - - if ((spte & shadow_accessed_mask) && - (!is_writable_pte(spte) || (spte & shadow_dirty_mask))) - return false; + if (shadow_accessed_mask) { + if ((spte & shadow_accessed_mask) == 0 || + (is_writable_pte(spte) && (spte & shadow_dirty_mask) == 0)) + return true; + } - return true; + return false; } static bool is_accessed_spte(u64 spte) { return shadow_accessed_mask ? spte & shadow_accessed_mask - : true; + : !is_access_track_spte(spte); } static bool is_dirty_spte(u64 spte) @@ -651,6 +696,61 @@ static u64 mmu_spte_get_lockless(u64 *sptep) return __get_spte_lockless(sptep); } +static u64 mark_spte_for_access_track(u64 spte) +{ + if (shadow_accessed_mask != 0) + return spte & ~shadow_accessed_mask; + + if (shadow_acc_track_mask == 0 || is_access_track_spte(spte)) + return spte; + + /* + * Verify that the write-protection that we do below will be fixable + * via the fast page fault path. Currently, that is always the case, at + * least when using EPT (which is when access tracking would be used). + */ + WARN_ONCE((spte & PT_WRITABLE_MASK) && + !spte_can_locklessly_be_made_writable(spte), + "kvm: Writable SPTE is not locklessly dirty-trackable\n"); + + WARN_ONCE(spte & (shadow_acc_track_saved_bits_mask << + shadow_acc_track_saved_bits_shift), + "kvm: Access Tracking saved bit locations are not zero\n"); + + spte |= (spte & shadow_acc_track_saved_bits_mask) << + shadow_acc_track_saved_bits_shift; + spte &= ~shadow_acc_track_mask; + spte |= shadow_acc_track_value; + + return spte; +} + +/* Returns the Accessed status of the PTE and resets it at the same time. */ +static bool mmu_spte_age(u64 *sptep) +{ + u64 spte = mmu_spte_get_lockless(sptep); + + if (!is_accessed_spte(spte)) + return false; + + if (shadow_accessed_mask) { + clear_bit((ffs(shadow_accessed_mask) - 1), + (unsigned long *)sptep); + } else { + /* + * Capture the dirty status of the page, so that it doesn't get + * lost when the SPTE is marked for access tracking. + */ + if (is_writable_pte(spte)) + kvm_set_pfn_dirty(spte_to_pfn(spte)); + + spte = mark_spte_for_access_track(spte); + mmu_spte_update_no_track(sptep, spte); + } + + return true; +} + static void walk_shadow_page_lockless_begin(struct kvm_vcpu *vcpu) { /* @@ -1435,7 +1535,7 @@ static int kvm_set_pte_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, restart: for_each_rmap_spte(rmap_head, &iter, sptep) { rmap_printk("kvm_set_pte_rmapp: spte %p %llx gfn %llx (%d)\n", - sptep, *sptep, gfn, level); + sptep, *sptep, gfn, level); need_flush = 1; @@ -1448,7 +1548,8 @@ restart: new_spte &= ~PT_WRITABLE_MASK; new_spte &= ~SPTE_HOST_WRITEABLE; - new_spte &= ~shadow_accessed_mask; + + new_spte = mark_spte_for_access_track(new_spte); mmu_spte_clear_track_bits(sptep); mmu_spte_set(sptep, new_spte); @@ -1610,15 +1711,8 @@ static int kvm_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct rmap_iterator uninitialized_var(iter); int young = 0; - BUG_ON(!shadow_accessed_mask); - - for_each_rmap_spte(rmap_head, &iter, sptep) { - if (*sptep & shadow_accessed_mask) { - young = 1; - clear_bit((ffs(shadow_accessed_mask) - 1), - (unsigned long *)sptep); - } - } + for_each_rmap_spte(rmap_head, &iter, sptep) + young |= mmu_spte_age(sptep); trace_kvm_age_page(gfn, level, slot, young); return young; @@ -1632,11 +1726,11 @@ static int kvm_test_age_rmapp(struct kvm *kvm, struct kvm_rmap_head *rmap_head, struct rmap_iterator iter; /* - * If there's no access bit in the secondary pte set by the - * hardware it's up to gup-fast/gup to set the access bit in - * the primary pte or in the page structure. + * If there's no access bit in the secondary pte set by the hardware and + * fast access tracking is also not enabled, it's up to gup-fast/gup to + * set the access bit in the primary pte or in the page structure. */ - if (!shadow_accessed_mask) + if (!shadow_accessed_mask && !shadow_acc_track_mask) goto out; for_each_rmap_spte(rmap_head, &iter, sptep) @@ -1671,7 +1765,7 @@ int kvm_age_hva(struct kvm *kvm, unsigned long start, unsigned long end) * This has some overhead, but not as much as the cost of swapping * out actively used pages or breaking up actively used hugepages. */ - if (!shadow_accessed_mask) + if (!shadow_accessed_mask && !shadow_acc_track_mask) return kvm_handle_hva_range(kvm, start, end, 0, kvm_unmap_rmapp); @@ -2603,6 +2697,9 @@ static int set_spte(struct kvm_vcpu *vcpu, u64 *sptep, spte |= shadow_dirty_mask; } + if (speculative) + spte = mark_spte_for_access_track(spte); + set_pte: if (mmu_spte_update(sptep, spte)) kvm_flush_remote_tlbs(vcpu->kvm); @@ -2656,7 +2753,7 @@ static bool mmu_set_spte(struct kvm_vcpu *vcpu, u64 *sptep, unsigned pte_access, pgprintk("%s: setting spte %llx\n", __func__, *sptep); pgprintk("instantiating %s PTE (%s) at %llx (%llx) addr %p\n", is_large_pte(*sptep)? "2MB" : "4kB", - *sptep & PT_PRESENT_MASK ?"RW":"R", gfn, + *sptep & PT_WRITABLE_MASK ? "RW" : "R", gfn, *sptep, sptep); if (!was_rmapped && is_large_pte(*sptep)) ++vcpu->kvm->stat.lpages; @@ -2889,16 +2986,28 @@ static bool page_fault_can_be_fast(u32 error_code) if (unlikely(error_code & PFERR_RSVD_MASK)) return false; + /* See if the page fault is due to an NX violation */ + if (unlikely(((error_code & (PFERR_FETCH_MASK | PFERR_PRESENT_MASK)) + == (PFERR_FETCH_MASK | PFERR_PRESENT_MASK)))) + return false; + /* - * #PF can be fast only if the shadow page table is present and it - * is caused by write-protect, that means we just need change the - * W bit of the spte which can be done out of mmu-lock. + * #PF can be fast if: + * 1. The shadow page table entry is not present, which could mean that + * the fault is potentially caused by access tracking (if enabled). + * 2. The shadow page table entry is present and the fault + * is caused by write-protect, that means we just need change the W + * bit of the spte which can be done out of mmu-lock. + * + * However, if access tracking is disabled we know that a non-present + * page must be a genuine page fault where we have to create a new SPTE. + * So, if access tracking is disabled, we return true only for write + * accesses to a present page. */ - if (!(error_code & PFERR_PRESENT_MASK) || - !(error_code & PFERR_WRITE_MASK)) - return false; - return true; + return shadow_acc_track_mask != 0 || + ((error_code & (PFERR_WRITE_MASK | PFERR_PRESENT_MASK)) + == (PFERR_WRITE_MASK | PFERR_PRESENT_MASK)); } /* @@ -2907,17 +3016,26 @@ static bool page_fault_can_be_fast(u32 error_code) */ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *sptep, u64 spte) + u64 *sptep, u64 old_spte, + bool remove_write_prot, bool remove_acc_track) { gfn_t gfn; + u64 new_spte = old_spte; WARN_ON(!sp->role.direct); - /* - * The gfn of direct spte is stable since it is calculated - * by sp->gfn. - */ - gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); + if (remove_acc_track) { + u64 saved_bits = (old_spte >> shadow_acc_track_saved_bits_shift) + & shadow_acc_track_saved_bits_mask; + + new_spte &= ~shadow_acc_track_mask; + new_spte &= ~(shadow_acc_track_saved_bits_mask << + shadow_acc_track_saved_bits_shift); + new_spte |= saved_bits; + } + + if (remove_write_prot) + new_spte |= PT_WRITABLE_MASK; /* * Theoretically we could also set dirty bit (and flush TLB) here in @@ -2931,10 +3049,17 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, * * Compare with set_spte where instead shadow_dirty_mask is set. */ - if (cmpxchg64(sptep, spte, spte | PT_WRITABLE_MASK) != spte) + if (cmpxchg64(sptep, old_spte, new_spte) != old_spte) return false; - kvm_vcpu_mark_page_dirty(vcpu, gfn); + if (remove_write_prot) { + /* + * The gfn of direct spte is stable since it is + * calculated by sp->gfn. + */ + gfn = kvm_mmu_page_get_gfn(sp, sptep - sp->spt); + kvm_vcpu_mark_page_dirty(vcpu, gfn); + } return true; } @@ -2965,35 +3090,55 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, break; do { - /* - * If the mapping has been changed, let the vcpu fault on the - * same address again. - */ - if (!is_shadow_present_pte(spte)) { - fault_handled = true; - break; - } + bool remove_write_prot = false; + bool remove_acc_track; sp = page_header(__pa(iterator.sptep)); if (!is_last_spte(spte, sp->role.level)) break; /* - * Check if it is a spurious fault caused by TLB lazily flushed. + * Check whether the memory access that caused the fault would + * still cause it if it were to be performed right now. If not, + * then this is a spurious fault caused by TLB lazily flushed, + * or some other CPU has already fixed the PTE after the + * current CPU took the fault. * * Need not check the access of upper level table entries since * they are always ACC_ALL. */ - if (is_writable_pte(spte)) { - fault_handled = true; - break; + + if (error_code & PFERR_FETCH_MASK) { + if ((spte & (shadow_x_mask | shadow_nx_mask)) + == shadow_x_mask) { + fault_handled = true; + break; + } + } else if (error_code & PFERR_WRITE_MASK) { + if (is_writable_pte(spte)) { + fault_handled = true; + break; + } + + /* + * Currently, to simplify the code, write-protection can + * be removed in the fast path only if the SPTE was + * write-protected for dirty-logging. + */ + remove_write_prot = + spte_can_locklessly_be_made_writable(spte); + } else { + /* Fault was on Read access */ + if (spte & PT_PRESENT_MASK) { + fault_handled = true; + break; + } } - /* - * Currently, to simplify the code, only the spte - * write-protected by dirty-log can be fast fixed. - */ - if (!spte_can_locklessly_be_made_writable(spte)) + remove_acc_track = is_access_track_spte(spte); + + /* Verify that the fault can be handled in the fast path */ + if (!remove_acc_track && !remove_write_prot) break; /* @@ -3007,7 +3152,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, * * See the comments in kvm_arch_commit_memory_region(). */ - if (sp->role.level > PT_PAGE_TABLE_LEVEL) + if (sp->role.level > PT_PAGE_TABLE_LEVEL && remove_write_prot) break; /* @@ -3016,7 +3161,9 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, * Documentation/virtual/kvm/locking.txt to get more detail. */ fault_handled = fast_pf_fix_direct_spte(vcpu, sp, - iterator.sptep, spte); + iterator.sptep, spte, + remove_write_prot, + remove_acc_track); if (fault_handled) break; @@ -5105,6 +5252,8 @@ static void mmu_destroy_caches(void) int kvm_mmu_module_init(void) { + kvm_mmu_clear_all_pte_masks(); + pte_list_desc_cache = kmem_cache_create("pte_list_desc", sizeof(struct pte_list_desc), 0, 0, NULL); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 6f53dedd9b96..d2fe3a51876c 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6578,6 +6578,19 @@ static void wakeup_handler(void) spin_unlock(&per_cpu(blocked_vcpu_on_cpu_lock, cpu)); } +void vmx_enable_tdp(void) +{ + kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, + enable_ept_ad_bits ? VMX_EPT_ACCESS_BIT : 0ull, + enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, + 0ull, VMX_EPT_EXECUTABLE_MASK, + cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, + enable_ept_ad_bits ? 0ull : SPTE_SPECIAL_MASK | VMX_EPT_RWX_MASK); + + ept_set_mmio_spte_mask(); + kvm_enable_tdp(); +} + static __init int hardware_setup(void) { int r = -ENOMEM, i, msr; @@ -6703,16 +6716,9 @@ static __init int hardware_setup(void) /* SELF-IPI */ vmx_disable_intercept_msr_x2apic(0x83f, MSR_TYPE_W, true); - if (enable_ept) { - kvm_mmu_set_mask_ptes(VMX_EPT_READABLE_MASK, - (enable_ept_ad_bits) ? VMX_EPT_ACCESS_BIT : 0ull, - (enable_ept_ad_bits) ? VMX_EPT_DIRTY_BIT : 0ull, - 0ull, VMX_EPT_EXECUTABLE_MASK, - cpu_has_vmx_ept_execute_only() ? - 0ull : VMX_EPT_READABLE_MASK); - ept_set_mmio_spte_mask(); - kvm_enable_tdp(); - } else + if (enable_ept) + vmx_enable_tdp(); + else kvm_disable_tdp(); update_ple_window_actual_max(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4aece8b0a4aa..c3ee5e29ea2a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6025,7 +6025,7 @@ int kvm_arch_init(void *opaque) kvm_mmu_set_mask_ptes(PT_USER_MASK, PT_ACCESSED_MASK, PT_DIRTY_MASK, PT64_NX_MASK, 0, - PT_PRESENT_MASK); + PT_PRESENT_MASK, 0); kvm_timer_init(); perf_register_guest_info_callbacks(&kvm_guest_cbs); -- cgit v1.2.3 From f98a3efb284a7950745d6c95be489193e6d4c657 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 15 Dec 2016 18:06:45 +0100 Subject: KVM: x86: use delivery to self in hyperv synic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Interrupt to self can be sent without knowing the APIC ID. Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/hyperv.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 1572c35b4f1a..08b27e0c7b71 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -305,13 +305,13 @@ static int synic_set_irq(struct kvm_vcpu_hv_synic *synic, u32 sint) return -ENOENT; memset(&irq, 0, sizeof(irq)); - irq.dest_id = kvm_apic_id(vcpu->arch.apic); + irq.shorthand = APIC_DEST_SELF; irq.dest_mode = APIC_DEST_PHYSICAL; irq.delivery_mode = APIC_DM_FIXED; irq.vector = vector; irq.level = 1; - ret = kvm_irq_delivery_to_apic(vcpu->kvm, NULL, &irq, NULL); + ret = kvm_irq_delivery_to_apic(vcpu->kvm, vcpu->arch.apic, &irq, NULL); trace_kvm_hv_synic_set_irq(vcpu->vcpu_id, sint, irq.vector, ret); return ret; } -- cgit v1.2.3 From 6e50043912d9c9c119e3c9c5378869d019df70a9 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 15 Dec 2016 18:06:46 +0100 Subject: KVM: x86: replace kvm_apic_id with kvm_{x,x2}apic_id MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit There were three calls sites: - recalculate_apic_map and kvm_apic_match_physical_addr, where it would only complicate implementation of x2APIC hotplug; - in apic_debug, where it was still somewhat preserved, but keeping the old function just for apic_debug was not worth it Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 31 ++++++++++++++++++++++--------- arch/x86/kvm/lapic.h | 11 ----------- 2 files changed, 22 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 5fe290c1b7d8..7c142f0fe9fd 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -115,6 +115,16 @@ static inline int apic_enabled(struct kvm_lapic *apic) (LVT_MASK | APIC_MODE_MASK | APIC_INPUT_POLARITY | \ APIC_LVT_REMOTE_IRR | APIC_LVT_LEVEL_TRIGGER) +static inline u8 kvm_xapic_id(struct kvm_lapic *apic) +{ + return kvm_lapic_get_reg(apic, APIC_ID) >> 24; +} + +static inline u32 kvm_x2apic_id(struct kvm_lapic *apic) +{ + return apic->vcpu->vcpu_id; +} + static inline bool kvm_apic_map_get_logical_dest(struct kvm_apic_map *map, u32 dest_id, struct kvm_lapic ***cluster, u16 *mask) { switch (map->mode) { @@ -159,13 +169,13 @@ static void recalculate_apic_map(struct kvm *kvm) struct kvm_apic_map *new, *old = NULL; struct kvm_vcpu *vcpu; int i; - u32 max_id = 255; + u32 max_id = 255; /* enough space for any xAPIC ID */ mutex_lock(&kvm->arch.apic_map_lock); kvm_for_each_vcpu(i, vcpu, kvm) if (kvm_apic_present(vcpu)) - max_id = max(max_id, kvm_apic_id(vcpu->arch.apic)); + max_id = max(max_id, kvm_x2apic_id(vcpu->arch.apic)); new = kvm_kvzalloc(sizeof(struct kvm_apic_map) + sizeof(struct kvm_lapic *) * ((u64)max_id + 1)); @@ -184,12 +194,13 @@ static void recalculate_apic_map(struct kvm *kvm) if (!kvm_apic_present(vcpu)) continue; - aid = kvm_apic_id(apic); - ldr = kvm_lapic_get_reg(apic, APIC_LDR); - + aid = apic_x2apic_mode(apic) ? kvm_x2apic_id(apic) + : kvm_xapic_id(apic); if (aid <= new->max_apic_id) new->phys_map[aid] = apic; + ldr = kvm_lapic_get_reg(apic, APIC_LDR); + if (apic_x2apic_mode(apic)) { new->mode |= KVM_APIC_MODE_X2APIC; } else if (ldr) { @@ -250,6 +261,8 @@ static inline void kvm_apic_set_x2apic_id(struct kvm_lapic *apic, u32 id) { u32 ldr = ((id >> 4) << 16) | (1 << (id & 0xf)); + WARN_ON_ONCE(id != apic->vcpu->vcpu_id); + kvm_lapic_set_reg(apic, APIC_ID, id); kvm_lapic_set_reg(apic, APIC_LDR, ldr); recalculate_apic_map(apic->vcpu->kvm); @@ -591,9 +604,9 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) return true; if (apic_x2apic_mode(apic)) - return mda == kvm_apic_id(apic); + return mda == kvm_x2apic_id(apic); - return mda == SET_APIC_DEST_FIELD(kvm_apic_id(apic)); + return mda == SET_APIC_DEST_FIELD(kvm_xapic_id(apic)); } static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) @@ -1907,9 +1920,9 @@ void kvm_lapic_reset(struct kvm_vcpu *vcpu, bool init_event) vcpu->arch.apic_arb_prio = 0; vcpu->arch.apic_attention = 0; - apic_debug("%s: vcpu=%p, id=%d, base_msr=" + apic_debug("%s: vcpu=%p, id=0x%x, base_msr=" "0x%016" PRIx64 ", base_address=0x%0lx.\n", __func__, - vcpu, kvm_apic_id(apic), + vcpu, kvm_lapic_get_reg(apic, APIC_ID), vcpu->arch.apic_base, apic->base_address); } diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index e0c80233b3e1..cb16e6fd2330 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -202,17 +202,6 @@ static inline int kvm_lapic_latched_init(struct kvm_vcpu *vcpu) return lapic_in_kernel(vcpu) && test_bit(KVM_APIC_INIT, &vcpu->arch.apic->pending_events); } -static inline u32 kvm_apic_id(struct kvm_lapic *apic) -{ - /* To avoid a race between apic_base and following APIC_ID update when - * switching to x2apic_mode, the x2apic mode returns initial x2apic id. - */ - if (apic_x2apic_mode(apic)) - return apic->vcpu->vcpu_id; - - return kvm_lapic_get_reg(apic, APIC_ID) >> 24; -} - bool kvm_apic_pending_eoi(struct kvm_vcpu *vcpu, int vector); void wait_lapic_expire(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From b4535b58ae0df8b7cf0fe92a0c23aa3cf862e3cc Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 15 Dec 2016 18:06:47 +0100 Subject: KVM: x86: make interrupt delivery fast and slow path behave the same MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Slow path tried to prevent IPIs from x2APIC VCPUs from being delivered to xAPIC VCPUs and vice-versa. Make slow path behave like fast path, which never distinguished that. Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 20 ++++++++------------ 1 file changed, 8 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7c142f0fe9fd..3ebef53d20a0 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -592,10 +592,8 @@ static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) static bool kvm_apic_broadcast(struct kvm_lapic *apic, u32 mda) { - if (apic_x2apic_mode(apic)) - return mda == X2APIC_BROADCAST; - - return GET_APIC_DEST_FIELD(mda) == APIC_BROADCAST; + return mda == (apic_x2apic_mode(apic) ? + X2APIC_BROADCAST : APIC_BROADCAST); } static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) @@ -606,7 +604,7 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) if (apic_x2apic_mode(apic)) return mda == kvm_x2apic_id(apic); - return mda == SET_APIC_DEST_FIELD(kvm_xapic_id(apic)); + return mda == kvm_xapic_id(apic); } static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) @@ -623,7 +621,6 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) && (logical_id & mda & 0xffff) != 0; logical_id = GET_APIC_LOGICAL_ID(logical_id); - mda = GET_APIC_DEST_FIELD(mda); switch (kvm_lapic_get_reg(apic, APIC_DFR)) { case APIC_DFR_FLAT: @@ -640,9 +637,9 @@ static bool kvm_apic_match_logical_addr(struct kvm_lapic *apic, u32 mda) /* The KVM local APIC implementation has two quirks: * - * - the xAPIC MDA stores the destination at bits 24-31, while this - * is not true of struct kvm_lapic_irq's dest_id field. This is - * just a quirk in the API and is not problematic. + * - Real hardware delivers interrupts destined to x2APIC ID > 0xff to LAPICs + * in xAPIC mode if the "destination & 0xff" matches its xAPIC ID. + * KVM doesn't do that aliasing. * * - in-kernel IOAPIC messages have to be delivered directly to * x2APIC, because the kernel does not support interrupt remapping. @@ -658,13 +655,12 @@ static u32 kvm_apic_mda(struct kvm_vcpu *vcpu, unsigned int dest_id, struct kvm_lapic *source, struct kvm_lapic *target) { bool ipi = source != NULL; - bool x2apic_mda = apic_x2apic_mode(ipi ? source : target); if (!vcpu->kvm->arch.x2apic_broadcast_quirk_disabled && - !ipi && dest_id == APIC_BROADCAST && x2apic_mda) + !ipi && dest_id == APIC_BROADCAST && apic_x2apic_mode(target)) return X2APIC_BROADCAST; - return x2apic_mda ? dest_id : SET_APIC_DEST_FIELD(dest_id); + return dest_id; } bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, -- cgit v1.2.3 From 5bd5db385b3e13c702365574c0b7350c6ea45e84 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Thu, 15 Dec 2016 18:06:48 +0100 Subject: KVM: x86: allow hotplug of VCPU with APIC ID over 0xff MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LAPIC after reset is in xAPIC mode, which poses a problem for hotplug of VCPUs with high APIC ID, because reset VCPU is waiting for INIT/SIPI, but there is no way to uniquely address it using xAPIC. From many possible options, we chose the one that also works on real hardware: accepting interrupts addressed to LAPIC's x2APIC ID even in xAPIC mode. KVM intentionally differs from real hardware, because real hardware (Knights Landing) does just "x2apic_id & 0xff" to decide whether to accept the interrupt in xAPIC mode and it can deliver one interrupt to more than one physical destination, e.g. 0x123 to 0x123 and 0x23. Fixes: 682f732ecf73 ("KVM: x86: bump MAX_VCPUS to 288") Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 3ebef53d20a0..7e9ac4606279 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -189,15 +189,26 @@ static void recalculate_apic_map(struct kvm *kvm) struct kvm_lapic *apic = vcpu->arch.apic; struct kvm_lapic **cluster; u16 mask; - u32 ldr, aid; + u32 ldr; + u8 xapic_id; + u32 x2apic_id; if (!kvm_apic_present(vcpu)) continue; - aid = apic_x2apic_mode(apic) ? kvm_x2apic_id(apic) - : kvm_xapic_id(apic); - if (aid <= new->max_apic_id) - new->phys_map[aid] = apic; + xapic_id = kvm_xapic_id(apic); + x2apic_id = kvm_x2apic_id(apic); + + /* Hotplug hack: see kvm_apic_match_physical_addr(), ... */ + if ((apic_x2apic_mode(apic) || x2apic_id > 0xff) && + x2apic_id <= new->max_apic_id) + new->phys_map[x2apic_id] = apic; + /* + * ... xAPIC ID of VCPUs with APIC ID > 0xff will wrap-around, + * prevent them from masking VCPUs with APIC ID <= 0xff. + */ + if (!apic_x2apic_mode(apic) && !new->phys_map[xapic_id]) + new->phys_map[xapic_id] = apic; ldr = kvm_lapic_get_reg(apic, APIC_LDR); @@ -604,6 +615,15 @@ static bool kvm_apic_match_physical_addr(struct kvm_lapic *apic, u32 mda) if (apic_x2apic_mode(apic)) return mda == kvm_x2apic_id(apic); + /* + * Hotplug hack: Make LAPIC in xAPIC mode also accept interrupts as if + * it were in x2APIC mode. Hotplugged VCPUs start in xAPIC mode and + * this allows unique addressing of VCPUs with APIC ID over 0xff. + * The 0xff condition is needed because writeable xAPIC ID. + */ + if (kvm_x2apic_id(apic) > 0xff && mda == kvm_x2apic_id(apic)) + return true; + return mda == kvm_xapic_id(apic); } -- cgit v1.2.3 From 0f89b207b04a1a399e19d35293658e3a571da3d7 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Wed, 14 Dec 2016 14:59:23 -0500 Subject: kvm: svm: Use the hardware provided GPA instead of page walk When a guest causes a NPF which requires emulation, KVM sometimes walks the guest page tables to translate the GVA to a GPA. This is unnecessary most of the time on AMD hardware since the hardware provides the GPA in EXITINFO2. The only exception cases involve string operations involving rep or operations that use two memory locations. With rep, the GPA will only be the value of the initial NPF and with dual memory locations we won't know which memory address was translated into EXITINFO2. Signed-off-by: Tom Lendacky Reviewed-by: Borislav Petkov Signed-off-by: Brijesh Singh Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_emulate.h | 1 + arch/x86/include/asm/kvm_host.h | 3 +++ arch/x86/kvm/emulate.c | 20 +++++++++++++---- arch/x86/kvm/svm.c | 2 ++ arch/x86/kvm/x86.c | 45 +++++++++++++++++++++++++++++--------- 5 files changed, 57 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_emulate.h b/arch/x86/include/asm/kvm_emulate.h index e9cd7befcb76..3e8c287090e4 100644 --- a/arch/x86/include/asm/kvm_emulate.h +++ b/arch/x86/include/asm/kvm_emulate.h @@ -441,5 +441,6 @@ int emulator_task_switch(struct x86_emulate_ctxt *ctxt, int emulate_int_real(struct x86_emulate_ctxt *ctxt, int irq); void emulator_invalidate_register_cache(struct x86_emulate_ctxt *ctxt); void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt); +bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt); #endif /* _ASM_X86_KVM_X86_EMULATE_H */ diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 99a71d90b6ae..0419e114f27b 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -675,6 +675,9 @@ struct kvm_vcpu_arch { int pending_ioapic_eoi; int pending_external_vector; + + /* GPA available (AMD only) */ + bool gpa_available; }; struct kvm_lpage_info { diff --git a/arch/x86/kvm/emulate.c b/arch/x86/kvm/emulate.c index 56628a44668b..2b8349a2b14b 100644 --- a/arch/x86/kvm/emulate.c +++ b/arch/x86/kvm/emulate.c @@ -173,6 +173,7 @@ #define NearBranch ((u64)1 << 52) /* Near branches */ #define No16 ((u64)1 << 53) /* No 16 bit operand */ #define IncSP ((u64)1 << 54) /* SP is incremented before ModRM calc */ +#define TwoMemOp ((u64)1 << 55) /* Instruction has two memory operand */ #define DstXacc (DstAccLo | SrcAccHi | SrcWrite) @@ -4256,7 +4257,7 @@ static const struct opcode group1[] = { }; static const struct opcode group1A[] = { - I(DstMem | SrcNone | Mov | Stack | IncSP, em_pop), N, N, N, N, N, N, N, + I(DstMem | SrcNone | Mov | Stack | IncSP | TwoMemOp, em_pop), N, N, N, N, N, N, N, }; static const struct opcode group2[] = { @@ -4294,7 +4295,7 @@ static const struct opcode group5[] = { I(SrcMemFAddr | ImplicitOps, em_call_far), I(SrcMem | NearBranch, em_jmp_abs), I(SrcMemFAddr | ImplicitOps, em_jmp_far), - I(SrcMem | Stack, em_push), D(Undefined), + I(SrcMem | Stack | TwoMemOp, em_push), D(Undefined), }; static const struct opcode group6[] = { @@ -4514,8 +4515,8 @@ static const struct opcode opcode_table[256] = { /* 0xA0 - 0xA7 */ I2bv(DstAcc | SrcMem | Mov | MemAbs, em_mov), I2bv(DstMem | SrcAcc | Mov | MemAbs | PageTable, em_mov), - I2bv(SrcSI | DstDI | Mov | String, em_mov), - F2bv(SrcSI | DstDI | String | NoWrite, em_cmp_r), + I2bv(SrcSI | DstDI | Mov | String | TwoMemOp, em_mov), + F2bv(SrcSI | DstDI | String | NoWrite | TwoMemOp, em_cmp_r), /* 0xA8 - 0xAF */ F2bv(DstAcc | SrcImm | NoWrite, em_test), I2bv(SrcAcc | DstDI | Mov | String, em_mov), @@ -5629,3 +5630,14 @@ void emulator_writeback_register_cache(struct x86_emulate_ctxt *ctxt) { writeback_registers(ctxt); } + +bool emulator_can_use_gpa(struct x86_emulate_ctxt *ctxt) +{ + if (ctxt->rep_prefix && (ctxt->d & String)) + return false; + + if (ctxt->d & TwoMemOp) + return false; + + return true; +} diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 08a4d3ab3455..d0414f054bdf 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4182,6 +4182,8 @@ static int handle_exit(struct kvm_vcpu *vcpu) trace_kvm_exit(exit_code, vcpu, KVM_ISA_SVM); + vcpu->arch.gpa_available = (exit_code == SVM_EXIT_NPF); + if (!is_cr_intercept(svm, INTERCEPT_CR0_WRITE)) vcpu->arch.cr0 = svm->vmcb->save.cr0; if (npt_enabled) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index c3ee5e29ea2a..edff19d1df97 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -4462,6 +4462,21 @@ out: } EXPORT_SYMBOL_GPL(kvm_write_guest_virt_system); +static int vcpu_is_mmio_gpa(struct kvm_vcpu *vcpu, unsigned long gva, + gpa_t gpa, bool write) +{ + /* For APIC access vmexit */ + if ((gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) + return 1; + + if (vcpu_match_mmio_gpa(vcpu, gpa)) { + trace_vcpu_match_mmio(gva, gpa, write, true); + return 1; + } + + return 0; +} + static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, gpa_t *gpa, struct x86_exception *exception, bool write) @@ -4488,16 +4503,7 @@ static int vcpu_mmio_gva_to_gpa(struct kvm_vcpu *vcpu, unsigned long gva, if (*gpa == UNMAPPED_GVA) return -1; - /* For APIC access vmexit */ - if ((*gpa & PAGE_MASK) == APIC_DEFAULT_PHYS_BASE) - return 1; - - if (vcpu_match_mmio_gpa(vcpu, *gpa)) { - trace_vcpu_match_mmio(gva, *gpa, write, true); - return 1; - } - - return 0; + return vcpu_is_mmio_gpa(vcpu, gva, *gpa, write); } int emulator_write_phys(struct kvm_vcpu *vcpu, gpa_t gpa, @@ -4594,6 +4600,22 @@ static int emulator_read_write_onepage(unsigned long addr, void *val, int handled, ret; bool write = ops->write; struct kvm_mmio_fragment *frag; + struct x86_emulate_ctxt *ctxt = &vcpu->arch.emulate_ctxt; + + /* + * If the exit was due to a NPF we may already have a GPA. + * If the GPA is present, use it to avoid the GVA to GPA table walk. + * Note, this cannot be used on string operations since string + * operation using rep will only have the initial GPA from the NPF + * occurred. + */ + if (vcpu->arch.gpa_available && + emulator_can_use_gpa(ctxt) && + vcpu_is_mmio_gpa(vcpu, addr, exception->address, write) && + (addr & ~PAGE_MASK) == (exception->address & ~PAGE_MASK)) { + gpa = exception->address; + goto mmio; + } ret = vcpu_mmio_gva_to_gpa(vcpu, addr, &gpa, exception, write); @@ -5610,6 +5632,9 @@ int x86_emulate_instruction(struct kvm_vcpu *vcpu, } restart: + /* Save the faulting GPA (cr2) in the address field */ + ctxt->exception.address = cr2; + r = x86_emulate_insn(ctxt); if (r == EMULATION_INTERCEPTED) -- cgit v1.2.3 From 0f1e261ead16ce09169bf2d223d4c8803576f85e Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sat, 17 Dec 2016 16:05:19 +0100 Subject: KVM: x86: add VCPU stat for KVM_REQ_EVENT processing This statistic can be useful to estimate the cost of an IRQ injection scenario, by comparing it with irq_injections. For example the stat shows that sti;hlt triggers more KVM_REQ_EVENT than sti;nop. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 1 + arch/x86/kvm/x86.c | 2 ++ 2 files changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 0419e114f27b..417502cf42b6 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -861,6 +861,7 @@ struct kvm_vcpu_stat { u64 hypercalls; u64 irq_injections; u64 nmi_injections; + u64 req_event; }; struct x86_instruction_info; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index edff19d1df97..b02af6285887 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -180,6 +180,7 @@ struct kvm_stats_debugfs_item debugfs_entries[] = { { "insn_emulation_fail", VCPU_STAT(insn_emulation_fail) }, { "irq_injections", VCPU_STAT(irq_injections) }, { "nmi_injections", VCPU_STAT(nmi_injections) }, + { "req_event", VCPU_STAT(req_event) }, { "mmu_shadow_zapped", VM_STAT(mmu_shadow_zapped) }, { "mmu_pte_write", VM_STAT(mmu_pte_write) }, { "mmu_pte_updated", VM_STAT(mmu_pte_updated) }, @@ -6756,6 +6757,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { + ++vcpu->stat.req_event; kvm_apic_accept_events(vcpu); if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED) { r = 1; -- cgit v1.2.3 From eb90f3417a0cc4880e979ccc84e41890d410ea5b Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 18 Dec 2016 14:02:21 +0100 Subject: KVM: vmx: speed up TPR below threshold vmexits Since we're already in VCPU context, all we have to do here is recompute the PPR value. That will in turn generate a KVM_REQ_EVENT if necessary. Reviewed-by: Roman Kagan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 6 ++++++ arch/x86/kvm/lapic.h | 1 + arch/x86/kvm/vmx.c | 2 +- 3 files changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7e9ac4606279..6b1d3a76c1d0 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -595,6 +595,12 @@ static void apic_update_ppr(struct kvm_lapic *apic) } } +void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) +{ + apic_update_ppr(vcpu->arch.apic); +} +EXPORT_SYMBOL_GPL(kvm_apic_update_ppr); + static void apic_set_tpr(struct kvm_lapic *apic, u32 tpr) { kvm_lapic_set_reg(apic, APIC_TASKPRI, tpr); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index cb16e6fd2330..5b5b1ba644cb 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -73,6 +73,7 @@ bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, void __kvm_apic_update_irr(u32 *pir, void *regs); void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); +void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, struct dest_map *dest_map); int kvm_apic_local_deliver(struct kvm_lapic *apic, int lvt_type); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d2fe3a51876c..94fda2010f5f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6152,7 +6152,7 @@ static int handle_wrmsr(struct kvm_vcpu *vcpu) static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) { - kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_apic_update_ppr(vcpu); return 1; } -- cgit v1.2.3 From b3c045d33218fe291b04d30e24b6eab0431987e6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 18 Dec 2016 21:47:54 +0100 Subject: KVM: lapic: remove unnecessary KVM_REQ_EVENT on PPR update PPR needs to be updated whenever on every IRR read because we may have missed TPR writes that _increased_ PPR. However, these writes need not generate KVM_REQ_EVENT, because either KVM_REQ_EVENT has been set already in __apic_accept_irq, or we are going to process the interrupt right away. Reviewed-by: Roman Kagan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 6b1d3a76c1d0..a878e33119a3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -570,7 +570,15 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) __clear_bit(KVM_APIC_PV_EOI_PENDING, &vcpu->arch.apic_attention); } -static void apic_update_ppr(struct kvm_lapic *apic) +static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) +{ + int highest_irr = apic_find_highest_irr(apic); + if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) + return -1; + return highest_irr; +} + +static bool __apic_update_ppr(struct kvm_lapic *apic, u32 *new_ppr) { u32 tpr, isrv, ppr, old_ppr; int isr; @@ -588,11 +596,19 @@ static void apic_update_ppr(struct kvm_lapic *apic) apic_debug("vlapic %p, ppr 0x%x, isr 0x%x, isrv 0x%x", apic, ppr, isr, isrv); - if (old_ppr != ppr) { + *new_ppr = ppr; + if (old_ppr != ppr) kvm_lapic_set_reg(apic, APIC_PROCPRI, ppr); - if (ppr < old_ppr) - kvm_make_request(KVM_REQ_EVENT, apic->vcpu); - } + + return ppr < old_ppr; +} + +static void apic_update_ppr(struct kvm_lapic *apic) +{ + u32 ppr; + + if (__apic_update_ppr(apic, &ppr)) + kvm_make_request(KVM_REQ_EVENT, apic->vcpu); } void kvm_apic_update_ppr(struct kvm_vcpu *vcpu) @@ -2056,17 +2072,13 @@ nomem: int kvm_apic_has_interrupt(struct kvm_vcpu *vcpu) { struct kvm_lapic *apic = vcpu->arch.apic; - int highest_irr; + u32 ppr; if (!apic_enabled(apic)) return -1; - apic_update_ppr(apic); - highest_irr = apic_find_highest_irr(apic); - if ((highest_irr == -1) || - ((highest_irr & 0xF0) <= kvm_lapic_get_reg(apic, APIC_PROCPRI))) - return -1; - return highest_irr; + __apic_update_ppr(apic, &ppr); + return apic_has_interrupt_for_ppr(apic, ppr); } int kvm_apic_accept_pic_intr(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 26fbbee5815e9352187ac18f0aa53534f62567ff Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 18 Dec 2016 13:54:58 +0100 Subject: KVM: lapic: do not set KVM_REQ_EVENT unnecessarily on PPR update On PPR update, we set KVM_REQ_EVENT unconditionally anytime PPR is lowered. But we can take into account IRR here already. Reviewed-by: Roman Kagan Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index a878e33119a3..457fb206647d 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -607,7 +607,8 @@ static void apic_update_ppr(struct kvm_lapic *apic) { u32 ppr; - if (__apic_update_ppr(apic, &ppr)) + if (__apic_update_ppr(apic, &ppr) && + apic_has_interrupt_for_ppr(apic, ppr) != -1) kvm_make_request(KVM_REQ_EVENT, apic->vcpu); } -- cgit v1.2.3 From 4d82d12b39132e820b9ac4aa058ccc733db98917 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Sun, 18 Dec 2016 21:43:41 +0100 Subject: KVM: lapic: do not scan IRR when delivering an interrupt On interrupt delivery the PPR can only grow (except for auto-EOI), so it is impossible that non-auto-EOI interrupt delivery results in KVM_REQ_EVENT. We can therefore use __apic_update_ppr. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 457fb206647d..10a745faa659 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2115,6 +2115,7 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) { int vector = kvm_apic_has_interrupt(vcpu); struct kvm_lapic *apic = vcpu->arch.apic; + u32 ppr; if (vector == -1) return -1; @@ -2126,13 +2127,23 @@ int kvm_get_apic_interrupt(struct kvm_vcpu *vcpu) * because the process would deliver it through the IDT. */ - apic_set_isr(vector, apic); - apic_update_ppr(apic); apic_clear_irr(vector, apic); - if (test_bit(vector, vcpu_to_synic(vcpu)->auto_eoi_bitmap)) { - apic_clear_isr(vector, apic); + /* + * For auto-EOI interrupts, there might be another pending + * interrupt above PPR, so check whether to raise another + * KVM_REQ_EVENT. + */ apic_update_ppr(apic); + } else { + /* + * For normal interrupts, PPR has been raised and there cannot + * be a higher-priority pending interrupt---except if there was + * a concurrent interrupt injection, but that would have + * triggered KVM_REQ_EVENT already. + */ + apic_set_isr(vector, apic); + __apic_update_ppr(apic, &ppr); } return vector; -- cgit v1.2.3 From 21e7fbe7db2a983c046a05f12419d88c554a0f5a Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 22 Dec 2016 15:49:55 -0800 Subject: kvm: nVMX: Reorder error checks for emulated VMXON MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Checks on the operand to VMXON are performed after the check for legacy mode operation and the #GP checks, according to the pseudo-code in Intel's SDM. Signed-off-by: Jim Mattson Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 94fda2010f5f..4e691035a32d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7180,9 +7180,6 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } - if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) - return 1; - if (vmx->nested.vmxon) { nested_vmx_failValid(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION); return kvm_skip_emulated_instruction(vcpu); @@ -7194,6 +7191,9 @@ static int handle_vmon(struct kvm_vcpu *vcpu) return 1; } + if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) + return 1; + if (cpu_has_vmx_msr_bitmap()) { vmx->nested.msr_bitmap = (unsigned long *)__get_free_page(GFP_KERNEL); -- cgit v1.2.3 From f1be6cdaf57ce918828b6cff6ff2b4ea87be7f62 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sat, 7 Jan 2017 14:34:57 +0200 Subject: x86/platform/intel-mid: Make intel_scu_device_register() static There is no need anymore to have intel_scu_device_register() exported. Annotate it with static keyword. While here, rename to intel_scu_ipc_device_register() to use same pattern for all SFI enumerated device register helpers. Signed-off-by: Andy Shevchenko Link: http://lkml.kernel.org/r/20170107123457.53033-1-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/intel-mid.h | 1 - arch/x86/platform/intel-mid/sfi.c | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/intel-mid.h b/arch/x86/include/asm/intel-mid.h index 91ead0cefa76..fe04491130ae 100644 --- a/arch/x86/include/asm/intel-mid.h +++ b/arch/x86/include/asm/intel-mid.h @@ -27,7 +27,6 @@ extern void intel_mid_pwr_power_off(void); extern int intel_mid_pwr_get_lss_id(struct pci_dev *pdev); extern int get_gpio_by_name(const char *name); -extern void intel_scu_device_register(struct platform_device *pdev); extern int __init sfi_parse_mrtc(struct sfi_table_header *table); extern int __init sfi_parse_mtmr(struct sfi_table_header *table); extern int sfi_mrtc_num; diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index e8f68f652087..ce1303830231 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -226,7 +226,7 @@ int get_gpio_by_name(const char *name) return -EINVAL; } -void __init intel_scu_device_register(struct platform_device *pdev) +static void __init intel_scu_ipc_device_register(struct platform_device *pdev) { if (ipc_next_dev == MAX_IPCDEVS) pr_err("too many SCU IPC devices"); @@ -361,7 +361,7 @@ static void __init sfi_handle_ipc_dev(struct sfi_device_table_entry *pentry, pdev->dev.platform_data = pdata; if (dev->delay) - intel_scu_device_register(pdev); + intel_scu_ipc_device_register(pdev); else platform_device_add(pdev); } -- cgit v1.2.3 From fa5b6ec9e5274aeae2326e25995506a953e5f878 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 10 Jan 2017 13:35:40 -0800 Subject: lib/Kconfig.debug: Add ARCH_HAS_DEBUG_VIRTUAL DEBUG_VIRTUAL currently depends on DEBUG_KERNEL && X86. arm64 is getting the same support. Rather than add a list of architectures, switch this to ARCH_HAS_DEBUG_VIRTUAL and let architectures select it as appropriate. Acked-by: Ingo Molnar Reviewed-by: Mark Rutland Tested-by: Mark Rutland Suggested-by: Mark Rutland Signed-off-by: Laura Abbott Signed-off-by: Will Deacon --- arch/x86/Kconfig | 1 + lib/Kconfig.debug | 5 ++++- 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e487493bbd47..f1d4e8f2131f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -46,6 +46,7 @@ config X86 select ARCH_CLOCKSOURCE_DATA select ARCH_DISCARD_MEMBLOCK select ARCH_HAS_ACPI_TABLE_UPGRADE if ACPI + select ARCH_HAS_DEBUG_VIRTUAL select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE select ARCH_HAS_FAST_MULTIPLIER diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b06848a104e6..2aed31608b86 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -622,9 +622,12 @@ config DEBUG_VM_PGFLAGS If unsure, say N. +config ARCH_HAS_DEBUG_VIRTUAL + bool + config DEBUG_VIRTUAL bool "Debug VM translations" - depends on DEBUG_KERNEL && X86 + depends on DEBUG_KERNEL && ARCH_HAS_DEBUG_VIRTUAL help Enable some costly sanity checks in virtual to page code. This can catch mistakes with virt_to_page() and friends. -- cgit v1.2.3 From c19a5f35e315837170ee337eed21c7087ea94192 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 11 Jan 2017 15:49:04 +0100 Subject: x86/e820/32: Fix e820_search_gap() error handling on x86-32 GCC correctly points out that on 32-bit kernels, e820_search_gap() not finding a start now leads to pci_mem_start ('gapstart') being set to an uninitialized value: arch/x86/kernel/e820.c: In function 'e820_setup_gap': arch/x86/kernel/e820.c:641:16: error: 'gapstart' may be used uninitialized in this function [-Werror=maybe-uninitialized] This restores the behavior from before this cleanup: b4ed1d15b453 ("x86/e820: Make e820_search_gap() static and remove unused variables") ... defaulting to address 0x10000000 if nothing was found. Signed-off-by: Arnd Bergmann Cc: Dan Williams Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Toshi Kani Cc: Wei Yang Fixes: b4ed1d15b453 ("x86/e820: Make e820_search_gap() static and remove unused variables") Link: http://lkml.kernel.org/r/20170111144926.695369-1-arnd@arndb.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/e820.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/e820.c b/arch/x86/kernel/e820.c index 46f2afd3577a..b2bbad6ebe4d 100644 --- a/arch/x86/kernel/e820.c +++ b/arch/x86/kernel/e820.c @@ -626,14 +626,16 @@ __init void e820_setup_gap(void) gapsize = 0x400000; found = e820_search_gap(&gapstart, &gapsize); -#ifdef CONFIG_X86_64 if (!found) { +#ifdef CONFIG_X86_64 gapstart = (max_pfn << PAGE_SHIFT) + 1024*1024; printk(KERN_ERR "e820: cannot find a gap in the 32bit address range\n" "e820: PCI devices with unassigned 32bit BARs may break!\n"); - } +#else + gapstart = 0x10000000; #endif + } /* * e820_reserve_resources_late protect stolen RAM already -- cgit v1.2.3 From 6e03f66c001790d6ca853c68a56c87460bc86467 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 10 Jan 2017 18:43:54 +0200 Subject: locking/jump_labels: Update bug_at() boot message First of all, %*ph specifier allows to dump data in hex format using the pointer to a buffer. This is suitable to use here. Besides that Thomas suggested to move it to critical level and replace __FILE__ by explicit mention of "jumplabel". Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170110164354.47372-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/jump_label.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/jump_label.c b/arch/x86/kernel/jump_label.c index fc25f698d792..c37bd0f39c70 100644 --- a/arch/x86/kernel/jump_label.c +++ b/arch/x86/kernel/jump_label.c @@ -32,8 +32,7 @@ static void bug_at(unsigned char *ip, int line) * Something went wrong. Crash the box, as something could be * corrupting the kernel. */ - pr_warning("Unexpected op at %pS [%p] (%02x %02x %02x %02x %02x) %s:%d\n", - ip, ip, ip[0], ip[1], ip[2], ip[3], ip[4], __FILE__, line); + pr_crit("jump_label: Fatal kernel bug, unexpected op at %pS [%p] (%5ph) %d\n", ip, ip, ip, line); BUG(); } -- cgit v1.2.3 From b8fbe71f7535d4dfeed0bb8d924107dc58d502e2 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 11 Jan 2017 20:28:06 +0800 Subject: crypto: x86/chacha20 - Manually align stack buffer The kernel on x86-64 cannot use gcc attribute align to align to a 16-byte boundary. This patch reverts to the old way of aligning it by hand. Fixes: 9ae433bc79f9 ("crypto: chacha20 - convert generic and...") Signed-off-by: Herbert Xu Reviewed-by: Ard Biesheuvel --- arch/x86/crypto/chacha20_glue.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/chacha20_glue.c b/arch/x86/crypto/chacha20_glue.c index 78f75b07dc25..1e6af1b35f7b 100644 --- a/arch/x86/crypto/chacha20_glue.c +++ b/arch/x86/crypto/chacha20_glue.c @@ -67,10 +67,13 @@ static int chacha20_simd(struct skcipher_request *req) { struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req); struct chacha20_ctx *ctx = crypto_skcipher_ctx(tfm); - u32 state[16] __aligned(CHACHA20_STATE_ALIGN); + u32 *state, state_buf[16 + 2] __aligned(8); struct skcipher_walk walk; int err; + BUILD_BUG_ON(CHACHA20_STATE_ALIGN != 16); + state = PTR_ALIGN(state_buf + 0, CHACHA20_STATE_ALIGN); + if (req->cryptlen <= CHACHA20_BLOCK_SIZE || !may_use_simd()) return crypto_chacha20_crypt(req); -- cgit v1.2.3 From a665ece8b471de45bc19af05d52a1eaa5bc06dca Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 12 Jan 2017 13:23:31 +0200 Subject: x86/platform/intel: Remove PMIC GPIO block support Moorestown support was removed by commit: 1a8359e411eb ("x86/mid: Remove Intel Moorestown") Remove this leftover. Signed-off-by: Andy Shevchenko Cc: Darren Hart Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: platform-driver-x86@vger.kernel.org Link: http://lkml.kernel.org/r/20170112112331.93236-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/sfi.c | 1 - drivers/platform/x86/Kconfig | 7 - drivers/platform/x86/Makefile | 1 - drivers/platform/x86/intel_pmic_gpio.c | 326 --------------------------------- include/linux/intel_pmic_gpio.h | 15 -- 5 files changed, 350 deletions(-) delete mode 100644 drivers/platform/x86/intel_pmic_gpio.c delete mode 100644 include/linux/intel_pmic_gpio.h (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index ce1303830231..19b43e3a9f0f 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -15,7 +15,6 @@ #include #include #include -#include #include #include #include diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 5fe8be089b8b..fe5a3da3682f 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -816,13 +816,6 @@ config INTEL_SCU_IPC_UTIL low level access for debug work and updating the firmware. Say N unless you will be doing this on an Intel MID platform. -config GPIO_INTEL_PMIC - bool "Intel PMIC GPIO support" - depends on INTEL_SCU_IPC && GPIOLIB - ---help--- - Say Y here to support GPIO via the SCU IPC interface - on Intel MID platforms. - config INTEL_MID_POWER_BUTTON tristate "power button driver for Intel MID platforms" depends on INTEL_SCU_IPC && INPUT diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index d4111f0f8a78..b2f52a7690af 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -50,7 +50,6 @@ obj-$(CONFIG_INTEL_SCU_IPC) += intel_scu_ipc.o obj-$(CONFIG_INTEL_SCU_IPC_UTIL) += intel_scu_ipcutil.o obj-$(CONFIG_INTEL_MFLD_THERMAL) += intel_mid_thermal.o obj-$(CONFIG_INTEL_IPS) += intel_ips.o -obj-$(CONFIG_GPIO_INTEL_PMIC) += intel_pmic_gpio.o obj-$(CONFIG_XO1_RFKILL) += xo1-rfkill.o obj-$(CONFIG_XO15_EBOOK) += xo15-ebook.o obj-$(CONFIG_IBM_RTL) += ibm_rtl.o diff --git a/drivers/platform/x86/intel_pmic_gpio.c b/drivers/platform/x86/intel_pmic_gpio.c deleted file mode 100644 index 91ae58510d92..000000000000 --- a/drivers/platform/x86/intel_pmic_gpio.c +++ /dev/null @@ -1,326 +0,0 @@ -/* Moorestown PMIC GPIO (access through IPC) driver - * Copyright (c) 2008 - 2009, Intel Corporation. - * - * Author: Alek Du - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. - */ - -/* Supports: - * Moorestown platform PMIC chip - */ - -#define pr_fmt(fmt) "%s: " fmt, __func__ - -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include - -#define DRIVER_NAME "pmic_gpio" - -/* register offset that IPC driver should use - * 8 GPIO + 8 GPOSW (6 controllable) + 8GPO - */ -enum pmic_gpio_register { - GPIO0 = 0xE0, - GPIO7 = 0xE7, - GPIOINT = 0xE8, - GPOSWCTL0 = 0xEC, - GPOSWCTL5 = 0xF1, - GPO = 0xF4, -}; - -/* bits definition for GPIO & GPOSW */ -#define GPIO_DRV 0x01 -#define GPIO_DIR 0x02 -#define GPIO_DIN 0x04 -#define GPIO_DOU 0x08 -#define GPIO_INTCTL 0x30 -#define GPIO_DBC 0xc0 - -#define GPOSW_DRV 0x01 -#define GPOSW_DOU 0x08 -#define GPOSW_RDRV 0x30 - -#define GPIO_UPDATE_TYPE 0x80000000 - -#define NUM_GPIO 24 - -struct pmic_gpio { - struct mutex buslock; - struct gpio_chip chip; - void *gpiointr; - int irq; - unsigned irq_base; - unsigned int update_type; - u32 trigger_type; -}; - -static void pmic_program_irqtype(int gpio, int type) -{ - if (type & IRQ_TYPE_EDGE_RISING) - intel_scu_ipc_update_register(GPIO0 + gpio, 0x20, 0x20); - else - intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x20); - - if (type & IRQ_TYPE_EDGE_FALLING) - intel_scu_ipc_update_register(GPIO0 + gpio, 0x10, 0x10); - else - intel_scu_ipc_update_register(GPIO0 + gpio, 0x00, 0x10); -}; - -static int pmic_gpio_direction_input(struct gpio_chip *chip, unsigned offset) -{ - if (offset >= 8) { - pr_err("only pin 0-7 support input\n"); - return -1;/* we only have 8 GPIO can use as input */ - } - return intel_scu_ipc_update_register(GPIO0 + offset, - GPIO_DIR, GPIO_DIR); -} - -static int pmic_gpio_direction_output(struct gpio_chip *chip, - unsigned offset, int value) -{ - int rc = 0; - - if (offset < 8)/* it is GPIO */ - rc = intel_scu_ipc_update_register(GPIO0 + offset, - GPIO_DRV | (value ? GPIO_DOU : 0), - GPIO_DRV | GPIO_DOU | GPIO_DIR); - else if (offset < 16)/* it is GPOSW */ - rc = intel_scu_ipc_update_register(GPOSWCTL0 + offset - 8, - GPOSW_DRV | (value ? GPOSW_DOU : 0), - GPOSW_DRV | GPOSW_DOU | GPOSW_RDRV); - else if (offset > 15 && offset < 24)/* it is GPO */ - rc = intel_scu_ipc_update_register(GPO, - value ? 1 << (offset - 16) : 0, - 1 << (offset - 16)); - else { - pr_err("invalid PMIC GPIO pin %d!\n", offset); - WARN_ON(1); - } - - return rc; -} - -static int pmic_gpio_get(struct gpio_chip *chip, unsigned offset) -{ - u8 r; - int ret; - - /* we only have 8 GPIO pins we can use as input */ - if (offset >= 8) - return -EOPNOTSUPP; - ret = intel_scu_ipc_ioread8(GPIO0 + offset, &r); - if (ret < 0) - return ret; - return r & GPIO_DIN; -} - -static void pmic_gpio_set(struct gpio_chip *chip, unsigned offset, int value) -{ - if (offset < 8)/* it is GPIO */ - intel_scu_ipc_update_register(GPIO0 + offset, - GPIO_DRV | (value ? GPIO_DOU : 0), - GPIO_DRV | GPIO_DOU); - else if (offset < 16)/* it is GPOSW */ - intel_scu_ipc_update_register(GPOSWCTL0 + offset - 8, - GPOSW_DRV | (value ? GPOSW_DOU : 0), - GPOSW_DRV | GPOSW_DOU | GPOSW_RDRV); - else if (offset > 15 && offset < 24) /* it is GPO */ - intel_scu_ipc_update_register(GPO, - value ? 1 << (offset - 16) : 0, - 1 << (offset - 16)); -} - -/* - * This is called from genirq with pg->buslock locked and - * irq_desc->lock held. We can not access the scu bus here, so we - * store the change and update in the bus_sync_unlock() function below - */ -static int pmic_irq_type(struct irq_data *data, unsigned type) -{ - struct pmic_gpio *pg = irq_data_get_irq_chip_data(data); - u32 gpio = data->irq - pg->irq_base; - - if (gpio >= pg->chip.ngpio) - return -EINVAL; - - pg->trigger_type = type; - pg->update_type = gpio | GPIO_UPDATE_TYPE; - return 0; -} - -static int pmic_gpio_to_irq(struct gpio_chip *chip, unsigned offset) -{ - struct pmic_gpio *pg = gpiochip_get_data(chip); - - return pg->irq_base + offset; -} - -static void pmic_bus_lock(struct irq_data *data) -{ - struct pmic_gpio *pg = irq_data_get_irq_chip_data(data); - - mutex_lock(&pg->buslock); -} - -static void pmic_bus_sync_unlock(struct irq_data *data) -{ - struct pmic_gpio *pg = irq_data_get_irq_chip_data(data); - - if (pg->update_type) { - unsigned int gpio = pg->update_type & ~GPIO_UPDATE_TYPE; - - pmic_program_irqtype(gpio, pg->trigger_type); - pg->update_type = 0; - } - mutex_unlock(&pg->buslock); -} - -/* the gpiointr register is read-clear, so just do nothing. */ -static void pmic_irq_unmask(struct irq_data *data) { } - -static void pmic_irq_mask(struct irq_data *data) { } - -static struct irq_chip pmic_irqchip = { - .name = "PMIC-GPIO", - .irq_mask = pmic_irq_mask, - .irq_unmask = pmic_irq_unmask, - .irq_set_type = pmic_irq_type, - .irq_bus_lock = pmic_bus_lock, - .irq_bus_sync_unlock = pmic_bus_sync_unlock, -}; - -static irqreturn_t pmic_irq_handler(int irq, void *data) -{ - struct pmic_gpio *pg = data; - u8 intsts = *((u8 *)pg->gpiointr + 4); - int gpio; - irqreturn_t ret = IRQ_NONE; - - for (gpio = 0; gpio < 8; gpio++) { - if (intsts & (1 << gpio)) { - pr_debug("pmic pin %d triggered\n", gpio); - generic_handle_irq(pg->irq_base + gpio); - ret = IRQ_HANDLED; - } - } - return ret; -} - -static int platform_pmic_gpio_probe(struct platform_device *pdev) -{ - struct device *dev = &pdev->dev; - int irq = platform_get_irq(pdev, 0); - struct intel_pmic_gpio_platform_data *pdata = dev->platform_data; - - struct pmic_gpio *pg; - int retval; - int i; - - if (irq < 0) { - dev_dbg(dev, "no IRQ line\n"); - return -EINVAL; - } - - if (!pdata || !pdata->gpio_base || !pdata->irq_base) { - dev_dbg(dev, "incorrect or missing platform data\n"); - return -EINVAL; - } - - pg = kzalloc(sizeof(*pg), GFP_KERNEL); - if (!pg) - return -ENOMEM; - - dev_set_drvdata(dev, pg); - - pg->irq = irq; - /* setting up SRAM mapping for GPIOINT register */ - pg->gpiointr = ioremap_nocache(pdata->gpiointr, 8); - if (!pg->gpiointr) { - pr_err("Can not map GPIOINT\n"); - retval = -EINVAL; - goto err2; - } - pg->irq_base = pdata->irq_base; - pg->chip.label = "intel_pmic"; - pg->chip.direction_input = pmic_gpio_direction_input; - pg->chip.direction_output = pmic_gpio_direction_output; - pg->chip.get = pmic_gpio_get; - pg->chip.set = pmic_gpio_set; - pg->chip.to_irq = pmic_gpio_to_irq; - pg->chip.base = pdata->gpio_base; - pg->chip.ngpio = NUM_GPIO; - pg->chip.can_sleep = 1; - pg->chip.parent = dev; - - mutex_init(&pg->buslock); - - pg->chip.parent = dev; - retval = gpiochip_add_data(&pg->chip, pg); - if (retval) { - pr_err("Can not add pmic gpio chip\n"); - goto err; - } - - retval = request_irq(pg->irq, pmic_irq_handler, 0, "pmic", pg); - if (retval) { - pr_warn("Interrupt request failed\n"); - goto fail_request_irq; - } - - for (i = 0; i < 8; i++) { - irq_set_chip_and_handler_name(i + pg->irq_base, - &pmic_irqchip, - handle_simple_irq, - "demux"); - irq_set_chip_data(i + pg->irq_base, pg); - } - return 0; - -fail_request_irq: - gpiochip_remove(&pg->chip); -err: - iounmap(pg->gpiointr); -err2: - kfree(pg); - return retval; -} - -/* at the same time, register a platform driver - * this supports the sfi 0.81 fw */ -static struct platform_driver platform_pmic_gpio_driver = { - .driver = { - .name = DRIVER_NAME, - }, - .probe = platform_pmic_gpio_probe, -}; - -static int __init platform_pmic_gpio_init(void) -{ - return platform_driver_register(&platform_pmic_gpio_driver); -} -subsys_initcall(platform_pmic_gpio_init); diff --git a/include/linux/intel_pmic_gpio.h b/include/linux/intel_pmic_gpio.h deleted file mode 100644 index 920109a29191..000000000000 --- a/include/linux/intel_pmic_gpio.h +++ /dev/null @@ -1,15 +0,0 @@ -#ifndef LINUX_INTEL_PMIC_H -#define LINUX_INTEL_PMIC_H - -struct intel_pmic_gpio_platform_data { - /* the first IRQ of the chip */ - unsigned irq_base; - /* number assigned to the first GPIO */ - unsigned gpio_base; - /* sram address for gpiointr register, the langwell chip will map - * the PMIC spi GPIO expander's GPIOINTR register in sram. - */ - unsigned gpiointr; -}; - -#endif -- cgit v1.2.3 From de1c2540aa4f7796f31acf5432597bb0eb086250 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 13 Jan 2017 18:43:55 +0200 Subject: x86/platform/intel-mid: Enable RTC on Intel Merrifield Intel Merrifield has legacy RTC in contrast to the rest on Intel MID platforms. Set legacy RTC flag explicitly in architecture initialization code and allocate interrupt for it. Signed-off-by: Andy Shevchenko Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170113164355.66161-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/mrfld.c | 1 + arch/x86/platform/intel-mid/sfi.c | 14 ++++++++++++++ 2 files changed, 15 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/mrfld.c b/arch/x86/platform/intel-mid/mrfld.c index e0607c77a1bd..ae7bdeb0e507 100644 --- a/arch/x86/platform/intel-mid/mrfld.c +++ b/arch/x86/platform/intel-mid/mrfld.c @@ -91,6 +91,7 @@ static unsigned long __init tangier_calibrate_tsc(void) static void __init tangier_arch_setup(void) { x86_platform.calibrate_tsc = tangier_calibrate_tsc; + x86_platform.legacy.rtc = 1; } /* tangier arch ops */ diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index 19b43e3a9f0f..e4d4cabbb370 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -41,6 +41,7 @@ #include #include #include +#include #define SFI_SIG_OEM0 "OEM0" #define MAX_IPCDEVS 24 @@ -539,8 +540,21 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) return 0; } +static int __init intel_mid_legacy_rtc_init(void) +{ + struct irq_alloc_info info; + + if (!x86_platform.legacy.rtc) + return -ENODEV; + + ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, 0); + return mp_map_gsi_to_irq(RTC_IRQ, IOAPIC_MAP_ALLOC, &info); +} + static int __init intel_mid_platform_init(void) { + intel_mid_legacy_rtc_init(); + sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio); sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs); return 0; -- cgit v1.2.3 From eee5715efd8c268724b14c956de6af5d4931f470 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 13 Jan 2017 09:21:11 -0600 Subject: x86/platform/UV: Fix panic with missing UVsystab support Fix the panic where KEXEC'd kernel does not have access to EFI runtime mappings. This may cause the extended UVsystab to not be available. The solution is to revert to non-UV mode and continue with limited capabilities. Signed-off-by: Mike Travis Reviewed-by: Russ Anderson Reviewed-by: Alex Thorlton Acked-by: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170113152111.118886202@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 35690a168cf7..43930787cab9 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1172,19 +1172,25 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) index, _min_socket, _max_socket, _min_pnode, _max_pnode); } -static void __init decode_uv_systab(void) +static int __init decode_uv_systab(void) { struct uv_systab *st; int i; + if (uv_hub_info->hub_revision < UV4_HUB_REVISION_BASE) + return 0; /* No extended UVsystab required */ + st = uv_systab; - if ((!st || st->revision < UV_SYSTAB_VERSION_UV4) && !is_uv4_hub()) - return; - if (st->revision != UV_SYSTAB_VERSION_UV4_LATEST) { - pr_crit( + if ((!st) || (st->revision < UV_SYSTAB_VERSION_UV4_LATEST)) { + int rev = st ? st->revision : 0; + + pr_err( "UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n", - st->revision, UV_SYSTAB_VERSION_UV4_LATEST); - BUG(); + rev, UV_SYSTAB_VERSION_UV4_LATEST); + pr_err( + "UV: Cannot support UV operations, switching to generic PC\n"); + uv_system_type = UV_NONE; + return -EINVAL; } for (i = 0; st->entry[i].type != UV_SYSTAB_TYPE_UNUSED; i++) { @@ -1205,6 +1211,7 @@ static void __init decode_uv_systab(void) break; } } + return 0; } /* @@ -1373,7 +1380,8 @@ void __init uv_system_init(void) map_low_mmrs(); uv_bios_init(); /* get uv_systab for decoding */ - decode_uv_systab(); + if (decode_uv_systab() < 0) + return; /* UVsystab problem, abort UV init */ build_socket_tables(); build_uv_gr_table(); uv_init_hub_info(&hub_info); -- cgit v1.2.3 From 81a71176740624ef3b1bff50c51e7b4aa187353d Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Fri, 13 Jan 2017 09:21:12 -0600 Subject: x86/platform/UV: Fix 2 socket config problem A UV4 chassis with only 2 sockets configured can unexpectedly target the wrong UV hub. Fix the problem by limiting the minimum size of a partition to 4 sockets even if only 2 are configured. Signed-off-by: Mike Travis Reviewed-by: Russ Anderson Acked-by: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170113152111.313888353@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 43930787cab9..97ea712fc72f 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -56,6 +56,7 @@ static struct { unsigned int socketid_shift; /* aka pnode_shift for UV1/2/3 */ unsigned int pnode_mask; unsigned int gpa_shift; + unsigned int gnode_shift; } uv_cpuid; int uv_min_hub_revision_id; @@ -133,6 +134,7 @@ static int __init early_get_pnodeid(void) break; case UV4_HUB_PART_NUMBER: uv_min_hub_revision_id += UV4_HUB_REVISION_BASE - 1; + uv_cpuid.gnode_shift = 2; /* min partition is 4 sockets */ break; } @@ -1074,8 +1076,10 @@ void __init uv_init_hub_info(struct uv_hub_info_s *hub_info) (1UL << uv_cpuid.gpa_shift) - 1; node_id.v = uv_read_local_mmr(UVH_NODE_ID); + uv_cpuid.gnode_shift = max_t(unsigned int, + uv_cpuid.gnode_shift, mn.n_val); hub_info->gnode_extra = - (node_id.s.node_id & ~((1 << mn.n_val) - 1)) >> 1; + (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1; hub_info->gnode_upper = ((unsigned long)hub_info->gnode_extra << mn.m_val); -- cgit v1.2.3 From aef591cd3d1ddccb268f64c836d38382007373c1 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Thu, 12 Jan 2017 15:27:58 -0500 Subject: locking/spinlocks/x86, paravirt: Remove paravirt_ticketlocks_enabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is a follow-up of commit: cfd8983f03c7b2 ("x86, locking/spinlocks: Remove ticket (spin)lock implementation") The static_key structure 'paravirt_ticketlocks_enabled' is now removed as it is no longer used. As a result, the init functions kvm_spinlock_init_jump() and xen_init_spinlocks_jump() are also removed. A simple build and boot test was done to verify it. Signed-off-by: Waiman Long Reviewed-by: Boris Ostrovsky Cc: Alok Kataria Cc: Chris Wright Cc: Jeremy Fitzhardinge Cc: Juergen Gross Cc: Linus Torvalds Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Radim Krčmář Cc: Rusty Russell Cc: Thomas Gleixner Cc: kvm@vger.kernel.org Cc: linux-arch@vger.kernel.org Cc: virtualization@lists.linux-foundation.org Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1484252878-1962-1-git-send-email-longman@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/spinlock.h | 3 --- arch/x86/kernel/kvm.c | 14 -------------- arch/x86/kernel/paravirt-spinlocks.c | 3 --- arch/x86/xen/spinlock.c | 19 ------------------- 4 files changed, 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 921bea7a2708..6d391909e864 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -23,9 +23,6 @@ /* How long a lock should spin before we consider blocking */ #define SPIN_THRESHOLD (1 << 15) -extern struct static_key paravirt_ticketlocks_enabled; -static __always_inline bool static_key_false(struct static_key *key); - #include /* diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 36bc66416021..099fcba4981d 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -620,18 +620,4 @@ void __init kvm_spinlock_init(void) } } -static __init int kvm_spinlock_init_jump(void) -{ - if (!kvm_para_available()) - return 0; - if (!kvm_para_has_feature(KVM_FEATURE_PV_UNHALT)) - return 0; - - static_key_slow_inc(¶virt_ticketlocks_enabled); - printk(KERN_INFO "KVM setup paravirtual spinlock\n"); - - return 0; -} -early_initcall(kvm_spinlock_init_jump); - #endif /* CONFIG_PARAVIRT_SPINLOCKS */ diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 6d4bf812af45..6259327f3454 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -42,6 +42,3 @@ struct pv_lock_ops pv_lock_ops = { #endif /* SMP */ }; EXPORT_SYMBOL(pv_lock_ops); - -struct static_key paravirt_ticketlocks_enabled = STATIC_KEY_INIT_FALSE; -EXPORT_SYMBOL(paravirt_ticketlocks_enabled); diff --git a/arch/x86/xen/spinlock.c b/arch/x86/xen/spinlock.c index e8a9ea7d7a21..25a7c4302ce7 100644 --- a/arch/x86/xen/spinlock.c +++ b/arch/x86/xen/spinlock.c @@ -141,25 +141,6 @@ void __init xen_init_spinlocks(void) pv_lock_ops.vcpu_is_preempted = PV_CALLEE_SAVE(xen_vcpu_stolen); } -/* - * While the jump_label init code needs to happend _after_ the jump labels are - * enabled and before SMP is started. Hence we use pre-SMP initcall level - * init. We cannot do it in xen_init_spinlocks as that is done before - * jump labels are activated. - */ -static __init int xen_init_spinlocks_jump(void) -{ - if (!xen_pvspin) - return 0; - - if (!xen_domain()) - return 0; - - static_key_slow_inc(¶virt_ticketlocks_enabled); - return 0; -} -early_initcall(xen_init_spinlocks_jump); - static __init int xen_parse_nopvspin(char *arg) { xen_pvspin = false; -- cgit v1.2.3 From 12907fbb1a691807bb0420a27126e15934cb7954 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 15 Dec 2016 11:44:28 +0100 Subject: sched/clock, clocksource: Add optional cs::mark_unstable() method PeterZ reported that we'd fail to mark the TSC unstable when the clocksource watchdog finds it unsuitable. Allow a clocksource to run a custom action when its being marked unstable and hook up the TSC unstable code. Reported-by: Peter Zijlstra (Intel) Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/tsc.c | 11 +++++++++++ include/linux/clocksource.h | 3 +++ kernel/time/clocksource.c | 4 ++++ 3 files changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index be3a49ee0356..c8174c815d83 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1106,6 +1106,16 @@ static u64 read_tsc(struct clocksource *cs) return (u64)rdtsc_ordered(); } +static void tsc_cs_mark_unstable(struct clocksource *cs) +{ + if (tsc_unstable) + return; + tsc_unstable = 1; + clear_sched_clock_stable(); + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to clocksource watchdog\n"); +} + /* * .mask MUST be CLOCKSOURCE_MASK(64). See comment above read_tsc() */ @@ -1118,6 +1128,7 @@ static struct clocksource clocksource_tsc = { CLOCK_SOURCE_MUST_VERIFY, .archdata = { .vclock_mode = VCLOCK_TSC }, .resume = tsc_resume, + .mark_unstable = tsc_cs_mark_unstable, }; void mark_tsc_unstable(char *reason) diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index e315d04a2fd9..cfc75848a35d 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -62,6 +62,8 @@ struct module; * @archdata: arch-specific data * @suspend: suspend function for the clocksource, if necessary * @resume: resume function for the clocksource, if necessary + * @mark_unstable: Optional function to inform the clocksource driver that + * the watchdog marked the clocksource unstable * @owner: module reference, must be set by clocksource in modules * * Note: This struct is not used in hotpathes of the timekeeping code @@ -93,6 +95,7 @@ struct clocksource { unsigned long flags; void (*suspend)(struct clocksource *cs); void (*resume)(struct clocksource *cs); + void (*mark_unstable)(struct clocksource *cs); /* private: */ #ifdef CONFIG_CLOCKSOURCE_WATCHDOG diff --git a/kernel/time/clocksource.c b/kernel/time/clocksource.c index 665985b0a89a..93621ae718d3 100644 --- a/kernel/time/clocksource.c +++ b/kernel/time/clocksource.c @@ -141,6 +141,10 @@ static void __clocksource_unstable(struct clocksource *cs) { cs->flags &= ~(CLOCK_SOURCE_VALID_FOR_HRES | CLOCK_SOURCE_WATCHDOG); cs->flags |= CLOCK_SOURCE_UNSTABLE; + + if (cs->mark_unstable) + cs->mark_unstable(cs); + if (finished_booting) schedule_work(&watchdog_work); } -- cgit v1.2.3 From 9e3d6223d2093a8903c8f570a06284453ee59944 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 9 Dec 2016 09:30:11 +0100 Subject: math64, timers: Fix 32bit mul_u64_u32_shr() and friends It turns out that while GCC-4.4 manages to generate 32x32->64 mult instructions for the 32bit mul_u64_u32_shr() code, any GCC after that fails horribly. Fix this by providing an explicit mul_u32_u32() function which can be architcture provided. Reported-by: Chris Metcalf Signed-off-by: Peter Zijlstra (Intel) Acked-by: Chris Metcalf [for tile] Cc: Christopher S. Hall Cc: David Gibson Cc: John Stultz Cc: Laurent Vivier Cc: Liav Rehana Cc: Linus Torvalds Cc: Parit Bhargava Cc: Peter Zijlstra Cc: Richard Cochran Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20161209083011.GD15765@worktop.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/tile/include/asm/Kbuild | 1 - arch/tile/include/asm/div64.h | 14 ++++++++++++++ arch/x86/include/asm/div64.h | 11 +++++++++++ include/linux/math64.h | 26 ++++++++++++++++++-------- 4 files changed, 43 insertions(+), 9 deletions(-) create mode 100644 arch/tile/include/asm/div64.h (limited to 'arch/x86') diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 2d1f5638974c..20f2ba6d79be 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h generic-y += cputime.h -generic-y += div64.h generic-y += emergency-restart.h generic-y += errno.h generic-y += exec.h diff --git a/arch/tile/include/asm/div64.h b/arch/tile/include/asm/div64.h new file mode 100644 index 000000000000..bf6161966dfa --- /dev/null +++ b/arch/tile/include/asm/div64.h @@ -0,0 +1,14 @@ +#ifndef _ASM_TILE_DIV64_H +#define _ASM_TILE_DIV64_H + +#ifdef __tilegx__ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return __insn_mul_lu_lu(a, b); +} +#define mul_u32_u32 mul_u32_u32 +#endif + +#include + +#endif /* _ASM_TILE_DIV64_H */ diff --git a/arch/x86/include/asm/div64.h b/arch/x86/include/asm/div64.h index ced283ac79df..af95c47d5c9e 100644 --- a/arch/x86/include/asm/div64.h +++ b/arch/x86/include/asm/div64.h @@ -59,6 +59,17 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) } #define div_u64_rem div_u64_rem +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + u32 high, low; + + asm ("mull %[b]" : "=a" (low), "=d" (high) + : [a] "a" (a), [b] "rm" (b) ); + + return low | ((u64)high) << 32; +} +#define mul_u32_u32 mul_u32_u32 + #else # include #endif /* CONFIG_X86_32 */ diff --git a/include/linux/math64.h b/include/linux/math64.h index 6e8b5b270ffe..80690c96c734 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -133,6 +133,16 @@ __iter_div_u64_rem(u64 dividend, u32 divisor, u64 *remainder) return ret; } +#ifndef mul_u32_u32 +/* + * Many a GCC version messes this up and generates a 64x64 mult :-( + */ +static inline u64 mul_u32_u32(u32 a, u32 b) +{ + return (u64)a * b; +} +#endif + #if defined(CONFIG_ARCH_SUPPORTS_INT128) && defined(__SIZEOF_INT128__) #ifndef mul_u64_u32_shr @@ -160,9 +170,9 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) al = a; ah = a >> 32; - ret = ((u64)al * mul) >> shift; + ret = mul_u32_u32(al, mul) >> shift; if (ah) - ret += ((u64)ah * mul) << (32 - shift); + ret += mul_u32_u32(ah, mul) << (32 - shift); return ret; } @@ -186,10 +196,10 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift) a0.ll = a; b0.ll = b; - rl.ll = (u64)a0.l.low * b0.l.low; - rm.ll = (u64)a0.l.low * b0.l.high; - rn.ll = (u64)a0.l.high * b0.l.low; - rh.ll = (u64)a0.l.high * b0.l.high; + rl.ll = mul_u32_u32(a0.l.low, b0.l.low); + rm.ll = mul_u32_u32(a0.l.low, b0.l.high); + rn.ll = mul_u32_u32(a0.l.high, b0.l.low); + rh.ll = mul_u32_u32(a0.l.high, b0.l.high); /* * Each of these lines computes a 64-bit intermediate result into "c", @@ -229,8 +239,8 @@ static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) } u, rl, rh; u.ll = a; - rl.ll = (u64)u.l.low * mul; - rh.ll = (u64)u.l.high * mul + rl.l.high; + rl.ll = mul_u32_u32(u.l.low, mul); + rh.ll = mul_u32_u32(u.l.high, mul) + rl.l.high; /* Bits 32-63 of the result will be in rh.l.low. */ rl.l.high = do_div(rh.ll, divisor); -- cgit v1.2.3 From 06b35d93af0a5904aa832f58733be84ddbfe2e04 Mon Sep 17 00:00:00 2001 From: Piotr Luc Date: Tue, 10 Jan 2017 18:34:02 +0100 Subject: x86/cpufeature: Add AVX512_VPOPCNTDQ feature MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vector population count instructions for dwords and qwords are going to be available in future Intel Xeon & Xeon Phi processors. Bit 14 of CPUID[level:0x07, ECX] indicates that the instructions are supported by a processor. The specification can be found in the Intel Software Developer Manual (SDM) and in the Instruction Set Extensions Programming Reference (ISE). Populate the feature bit and clear it when xsave is disabled. Signed-off-by: Piotr Luc Reviewed-by: Borislav Petkov Cc: Paolo Bonzini Cc: kvm@vger.kernel.org Cc: Radim Krčmář Link: http://lkml.kernel.org/r/20170110173403.6010-2-piotr.luc@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/fpu/xstate.c | 1 + tools/arch/x86/include/asm/cpufeatures.h | 1 + 3 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index eafee3161d1c..d9d7136edf05 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -288,6 +288,7 @@ #define X86_FEATURE_AVX512VBMI (16*32+ 1) /* AVX512 Vector Bit Manipulation instructions*/ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ #define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ @@ -320,5 +321,4 @@ #define X86_BUG_SWAPGS_FENCE X86_BUG(11) /* SWAPGS without input dep on GS */ #define X86_BUG_MONITOR X86_BUG(12) /* IPI required to wake up remote CPU */ #define X86_BUG_AMD_E400 X86_BUG(13) /* CPU is among the affected by Erratum 400 */ - #endif /* _ASM_X86_CPUFEATURES_H */ diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 1d7770447b3e..35f7024aace5 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -78,6 +78,7 @@ void fpu__xstate_clear_all_cpu_caps(void) setup_clear_cpu_cap(X86_FEATURE_PKU); setup_clear_cpu_cap(X86_FEATURE_AVX512_4VNNIW); setup_clear_cpu_cap(X86_FEATURE_AVX512_4FMAPS); + setup_clear_cpu_cap(X86_FEATURE_AVX512_VPOPCNTDQ); } /* diff --git a/tools/arch/x86/include/asm/cpufeatures.h b/tools/arch/x86/include/asm/cpufeatures.h index cddd5d06e1cb..3603556fa0d9 100644 --- a/tools/arch/x86/include/asm/cpufeatures.h +++ b/tools/arch/x86/include/asm/cpufeatures.h @@ -280,6 +280,7 @@ /* Intel-defined CPU features, CPUID level 0x00000007:0 (ecx), word 16 */ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ +#define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ -- cgit v1.2.3 From a17f32270af1e1054bbc8858b0f27226a2c859ba Mon Sep 17 00:00:00 2001 From: Piotr Luc Date: Tue, 10 Jan 2017 18:34:03 +0100 Subject: kvm: x86: Expose Intel VPOPCNTDQ feature to guest MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Vector population count instructions for dwords and qwords are to be used in future Intel Xeon & Xeon Phi processors. The bit 14 of CPUID[level:0x07, ECX] indicates that the new instructions are supported by a processor. The spec can be found in the Intel Software Developer Manual (SDM) or in the Instruction Set Extensions Programming Reference (ISE). Signed-off-by: Piotr Luc Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: x86@kernel.org Cc: kvm@vger.kernel.org Cc: linux-kernel@vger.kernel.org Signed-off-by: Radim Krčmář --- arch/x86/kvm/cpuid.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index e85f6bd7b9d5..09c2ac741567 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -383,7 +383,7 @@ static inline int __do_cpuid_ent(struct kvm_cpuid_entry2 *entry, u32 function, /* cpuid 7.0.ecx*/ const u32 kvm_cpuid_7_0_ecx_x86_features = - F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/; + F(AVX512VBMI) | F(PKU) | 0 /*OSPKE*/ | F(AVX512_VPOPCNTDQ); /* cpuid 7.0.edx*/ const u32 kvm_cpuid_7_0_edx_x86_features = -- cgit v1.2.3 From a9b4f08770b415f30f2fb0f8329a370c8f554aa3 Mon Sep 17 00:00:00 2001 From: Ruslan Ruslichenko Date: Tue, 17 Jan 2017 16:13:52 +0200 Subject: x86/ioapic: Restore IO-APIC irq_chip retrigger callback commit d32932d02e18 removed the irq_retrigger callback from the IO-APIC chip and did not add it to the new IO-APIC-IR irq chip. There is no harm because the interrupts are resent in software when the retrigger callback is NULL, but it's less efficient. So restore them. [ tglx: Massaged changelog ] Fixes: d32932d02e18 ("x86/irq: Convert IOAPIC to use hierarchical irqdomain interfaces") Signed-off-by: Ruslan Ruslichenko Cc: xe-linux-external@cisco.com Link: http://lkml.kernel.org/r/1484662432-13580-1-git-send-email-rruslich@cisco.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 945e512a112a..1e35dd06b090 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1875,6 +1875,7 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ack_level, .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -1886,6 +1887,7 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ir_ack_level, .irq_set_affinity = ioapic_set_affinity, + .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; -- cgit v1.2.3 From 02cfdc95a0104fa5812d855d1e4ec687312aaa6f Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Wed, 18 Jan 2017 14:30:29 -0800 Subject: sched/x86: Remove unnecessary TBM3 check to update topology Scheduling to the max performance core is enabled by default for Turbo Boost Maxt Technology 3.0 capable platforms. Remove the useless sysctl_sched_itmt_enabled check to update sched topology for adding the prioritized core scheduling flag. Signed-off-by: Tim Chen Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Srinivas Pandruvada Cc: Thomas Gleixner Cc: bp@suse.de Cc: jolsa@redhat.com Cc: linux-acpi@vger.kernel.org Cc: linux-pm@vger.kernel.org Cc: rjw@rjwysocki.net Link: http://lkml.kernel.org/r/1484778629-4404-1-git-send-email-tim.c.chen@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/itmt.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index cb9c1ed1d391..f73f475d0573 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -132,10 +132,8 @@ int sched_set_itmt_support(void) sysctl_sched_itmt_enabled = 1; - if (sysctl_sched_itmt_enabled) { - x86_topology_update = true; - rebuild_sched_domains(); - } + x86_topology_update = true; + rebuild_sched_domains(); mutex_unlock(&itmt_update_mutex); -- cgit v1.2.3 From 3f646ed70ccd1c4e5c1263d2922247d28c8e08f0 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 18 Jan 2017 16:45:00 -0700 Subject: Drivers: hv: vmbus: Move the definition of hv_x64_msr_hypercall_contents As part of the effort to separate out architecture specific code, move the definition of hv_x64_msr_hypercall_contents to x86 specific header file. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 12 ++++++++++++ drivers/hv/hyperv_vmbus.h | 15 --------------- 2 files changed, 12 insertions(+), 15 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index aaf59b7da98a..188ddfdde2b9 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -13,6 +13,18 @@ struct ms_hyperv_info { extern struct ms_hyperv_info ms_hyperv; +/* + * Declare the MSR used to setup pages used to communicate with the hypervisor. + */ +union hv_x64_msr_hypercall_contents { + u64 as_uint64; + struct { + u64 enable:1; + u64 reserved:11; + u64 guest_physical_address:52; + }; +}; + void hyperv_callback_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 83beea748c6f..a1ff03677e23 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -306,21 +306,6 @@ union hv_x64_msr_guest_os_id_contents { }; }; -/* - * Declare the MSR used to setup pages used to communicate with the hypervisor. - */ -#define HV_X64_MSR_HYPERCALL 0x40000001 - -union hv_x64_msr_hypercall_contents { - u64 as_uint64; - struct { - u64 enable:1; - u64 reserved:11; - u64 guest_physical_address:52; - }; -}; - - enum { VMBUS_MESSAGE_CONNECTION_ID = 1, VMBUS_MESSAGE_PORT_ID = 1, -- cgit v1.2.3 From 352c9624242d5836ad8a960826183011367871a4 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 18 Jan 2017 16:45:01 -0700 Subject: Drivers: hv: vmbus: Move the definition of generate_guest_id() As part of the effort to separate out architecture specific code, move the definition of generate_guest_id() to x86 specific header file. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 43 +++++++++++++++++++++++++++++++++++++++++ drivers/hv/hyperv_vmbus.h | 43 ----------------------------------------- 2 files changed, 43 insertions(+), 43 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 188ddfdde2b9..15a0c275c82e 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -25,6 +25,49 @@ union hv_x64_msr_hypercall_contents { }; }; +/* + * The guest OS needs to register the guest ID with the hypervisor. + * The guest ID is a 64 bit entity and the structure of this ID is + * specified in the Hyper-V specification: + * + * msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx + * + * While the current guideline does not specify how Linux guest ID(s) + * need to be generated, our plan is to publish the guidelines for + * Linux and other guest operating systems that currently are hosted + * on Hyper-V. The implementation here conforms to this yet + * unpublished guidelines. + * + * + * Bit(s) + * 63 - Indicates if the OS is Open Source or not; 1 is Open Source + * 62:56 - Os Type; Linux is 0x100 + * 55:48 - Distro specific identification + * 47:16 - Linux kernel version number + * 15:0 - Distro specific identification + * + * + */ + +#define HV_LINUX_VENDOR_ID 0x8800 + +/* + * Generate the guest ID based on the guideline described above. + */ + +static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, + __u64 d_info2) +{ + __u64 guest_id = 0; + + guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 56); + guest_id |= (d_info1 << 48); + guest_id |= (kernel_version << 16); + guest_id |= d_info2; + + return guest_id; +} + void hyperv_callback_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index a1ff03677e23..da57626786b7 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -320,49 +320,6 @@ enum { #define HV_PRESENT_BIT 0x80000000 -/* - * The guest OS needs to register the guest ID with the hypervisor. - * The guest ID is a 64 bit entity and the structure of this ID is - * specified in the Hyper-V specification: - * - * http://msdn.microsoft.com/en-us/library/windows/hardware/ff542653%28v=vs.85%29.aspx - * - * While the current guideline does not specify how Linux guest ID(s) - * need to be generated, our plan is to publish the guidelines for - * Linux and other guest operating systems that currently are hosted - * on Hyper-V. The implementation here conforms to this yet - * unpublished guidelines. - * - * - * Bit(s) - * 63 - Indicates if the OS is Open Source or not; 1 is Open Source - * 62:56 - Os Type; Linux is 0x100 - * 55:48 - Distro specific identification - * 47:16 - Linux kernel version number - * 15:0 - Distro specific identification - * - * - */ - -#define HV_LINUX_VENDOR_ID 0x8100 - -/* - * Generate the guest ID based on the guideline described above. - */ - -static inline __u64 generate_guest_id(__u8 d_info1, __u32 kernel_version, - __u16 d_info2) -{ - __u64 guest_id = 0; - - guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48); - guest_id |= (((__u64)(d_info1)) << 48); - guest_id |= (((__u64)(kernel_version)) << 16); - guest_id |= ((__u64)(d_info2)); - - return guest_id; -} - #define HV_CPU_POWER_MANAGEMENT (1 << 0) #define HV_RECOMMENDATIONS_MAX 4 -- cgit v1.2.3 From 8730046c1498e8fb8c9a124789893944e8ce8220 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 18 Jan 2017 16:45:02 -0700 Subject: Drivers: hv vmbus: Move Hypercall page setup out of common code As part of the effort to separate out architecture specific code, move the hypercall page setup to an architecture specific file. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- MAINTAINERS | 1 + arch/x86/Kbuild | 3 ++ arch/x86/hyperv/Makefile | 1 + arch/x86/hyperv/hv_init.c | 62 +++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/mshyperv.h | 5 ++++ arch/x86/kernel/cpu/mshyperv.c | 7 +++++ drivers/hv/hv.c | 45 ++---------------------------- 7 files changed, 82 insertions(+), 42 deletions(-) create mode 100644 arch/x86/hyperv/Makefile create mode 100644 arch/x86/hyperv/hv_init.c (limited to 'arch/x86') diff --git a/MAINTAINERS b/MAINTAINERS index c36976d3bd1a..be8de24fd6dd 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -5962,6 +5962,7 @@ S: Maintained F: arch/x86/include/asm/mshyperv.h F: arch/x86/include/uapi/asm/hyperv.h F: arch/x86/kernel/cpu/mshyperv.c +F: arch/x86/hyperv F: drivers/hid/hid-hyperv.c F: drivers/hv/ F: drivers/input/serio/hyperv-keyboard.c diff --git a/arch/x86/Kbuild b/arch/x86/Kbuild index eb3abf8ac44e..586b786b3edf 100644 --- a/arch/x86/Kbuild +++ b/arch/x86/Kbuild @@ -7,6 +7,9 @@ obj-$(CONFIG_KVM) += kvm/ # Xen paravirtualization support obj-$(CONFIG_XEN) += xen/ +# Hyper-V paravirtualization support +obj-$(CONFIG_HYPERVISOR_GUEST) += hyperv/ + # lguest paravirtualization support obj-$(CONFIG_LGUEST_GUEST) += lguest/ diff --git a/arch/x86/hyperv/Makefile b/arch/x86/hyperv/Makefile new file mode 100644 index 000000000000..171ae09864d7 --- /dev/null +++ b/arch/x86/hyperv/Makefile @@ -0,0 +1 @@ +obj-y := hv_init.o diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c new file mode 100644 index 000000000000..3206bfda586d --- /dev/null +++ b/arch/x86/hyperv/hv_init.c @@ -0,0 +1,62 @@ +/* + * X86 specific Hyper-V initialization code. + * + * Copyright (C) 2016, Microsoft, Inc. + * + * Author : K. Y. Srinivasan + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or + * NON INFRINGEMENT. See the GNU General Public License for more + * details. + * + */ + +#include +#include +#include +#include +#include +#include +#include + +void *hv_hypercall_pg; +/* + * This function is to be invoked early in the boot sequence after the + * hypervisor has been detected. + * + * 1. Setup the hypercall page. + */ +void hyperv_init(void) +{ + u64 guest_id; + union hv_x64_msr_hypercall_contents hypercall_msr; + + if (x86_hyper != &x86_hyper_ms_hyperv) + return; + + /* + * Setup the hypercall page and enable hypercalls. + * 1. Register the guest ID + * 2. Enable the hypercall and register the hypercall page + */ + guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); + wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); + + hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_EXEC); + if (hv_hypercall_pg == NULL) { + wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); + return; + } + + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + hypercall_msr.enable = 1; + hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); +} +EXPORT_SYMBOL_GPL(hv_hypercall_pg); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 15a0c275c82e..e5f57e15a507 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -80,4 +80,9 @@ void hv_setup_kexec_handler(void (*handler)(void)); void hv_remove_kexec_handler(void); void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); void hv_remove_crash_handler(void); + +#if IS_ENABLED(CONFIG_HYPERV) +void hyperv_init(void); +extern void *hv_hypercall_pg; +#endif #endif diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index 65e20c97e04b..c5a1e9ba9ae0 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -227,6 +227,13 @@ static void __init ms_hyperv_init_platform(void) */ if (efi_enabled(EFI_BOOT)) x86_platform.get_nmi_reason = hv_get_nmi_reason; + +#if IS_ENABLED(CONFIG_HYPERV) + /* + * Setup the hook to get control post apic initialization. + */ + x86_platform.apic_post_init = hyperv_init; +#endif } const __refconst struct hypervisor_x86 x86_hyper_ms_hyperv = { diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 714e1ebc834c..d8d41542d93c 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -193,7 +193,6 @@ int hv_init(void) { int max_leaf; union hv_x64_msr_hypercall_contents hypercall_msr; - void *virtaddr = NULL; memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS); memset(hv_context.synic_message_page, 0, @@ -211,33 +210,15 @@ int hv_init(void) max_leaf = query_hypervisor_info(); - /* - * Write our OS ID. - */ - hv_context.guestid = generate_guest_id(0, LINUX_VERSION_CODE, 0); - wrmsrl(HV_X64_MSR_GUEST_OS_ID, hv_context.guestid); /* See if the hypercall page is already set */ - rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - - virtaddr = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_EXEC); - - if (!virtaddr) - goto cleanup; - - hypercall_msr.enable = 1; - - hypercall_msr.guest_physical_address = vmalloc_to_pfn(virtaddr); - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - - /* Confirm that hypercall page did get setup. */ hypercall_msr.as_uint64 = 0; rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); if (!hypercall_msr.enable) - goto cleanup; + return -ENOTSUPP; - hv_context.hypercall_page = virtaddr; + hv_context.hypercall_page = hv_hypercall_pg; #ifdef CONFIG_X86_64 if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { @@ -261,15 +242,6 @@ int hv_init(void) return 0; cleanup: - if (virtaddr) { - if (hypercall_msr.enable) { - hypercall_msr.as_uint64 = 0; - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - } - - vfree(virtaddr); - } - return -ENOTSUPP; } @@ -280,20 +252,9 @@ cleanup: */ void hv_cleanup(bool crash) { - union hv_x64_msr_hypercall_contents hypercall_msr; - - /* Reset our OS id */ - wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); - - if (hv_context.hypercall_page) { - hypercall_msr.as_uint64 = 0; - wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - if (!crash) - vfree(hv_context.hypercall_page); - hv_context.hypercall_page = NULL; - } #ifdef CONFIG_X86_64 + union hv_x64_msr_hypercall_contents hypercall_msr; /* * Cleanup the TSC page based CS. */ -- cgit v1.2.3 From 6ab42a66d2cc10afefea9f9e5d9a5ad5a836d254 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 18 Jan 2017 16:45:03 -0700 Subject: Drivers: hv: vmbus: Move Hypercall invocation code out of common code As part of the effort to separate out architecture specific code, move the hypercall invocation code to an architecture specific file. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/hyperv/hv_init.c | 54 +++++++++++++++++++++++++++++++++++++---- arch/x86/include/asm/mshyperv.h | 1 - drivers/hv/hv.c | 52 --------------------------------------- drivers/hv/hyperv_vmbus.h | 1 - 4 files changed, 49 insertions(+), 59 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 3206bfda586d..b5c8e04deacb 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -25,7 +25,7 @@ #include #include -void *hv_hypercall_pg; +static void *hypercall_pg; /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -48,15 +48,59 @@ void hyperv_init(void) guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); - hv_hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_EXEC); - if (hv_hypercall_pg == NULL) { + hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_EXEC); + if (hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); return; } rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); hypercall_msr.enable = 1; - hypercall_msr.guest_physical_address = vmalloc_to_pfn(hv_hypercall_pg); + hypercall_msr.guest_physical_address = vmalloc_to_pfn(hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); } -EXPORT_SYMBOL_GPL(hv_hypercall_pg); + +/* + * hv_do_hypercall- Invoke the specified hypercall + */ +u64 hv_do_hypercall(u64 control, void *input, void *output) +{ + u64 input_address = (input) ? virt_to_phys(input) : 0; + u64 output_address = (output) ? virt_to_phys(output) : 0; +#ifdef CONFIG_X86_64 + u64 hv_status = 0; + + if (!hypercall_pg) + return (u64)ULLONG_MAX; + + __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8"); + __asm__ __volatile__("call *%3" : "=a" (hv_status) : + "c" (control), "d" (input_address), + "m" (hypercall_pg)); + + return hv_status; + +#else + + u32 control_hi = control >> 32; + u32 control_lo = control & 0xFFFFFFFF; + u32 hv_status_hi = 1; + u32 hv_status_lo = 1; + u32 input_address_hi = input_address >> 32; + u32 input_address_lo = input_address & 0xFFFFFFFF; + u32 output_address_hi = output_address >> 32; + u32 output_address_lo = output_address & 0xFFFFFFFF; + + if (!hypercall_pg) + return (u64)ULLONG_MAX; + + __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi), + "=a"(hv_status_lo) : "d" (control_hi), + "a" (control_lo), "b" (input_address_hi), + "c" (input_address_lo), "D"(output_address_hi), + "S"(output_address_lo), "m" (hypercall_pg)); + + return hv_status_lo | ((u64)hv_status_hi << 32); +#endif /* !x86_64 */ +} +EXPORT_SYMBOL_GPL(hv_do_hypercall); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index e5f57e15a507..ed8e07399071 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -83,6 +83,5 @@ void hv_remove_crash_handler(void); #if IS_ENABLED(CONFIG_HYPERV) void hyperv_init(void); -extern void *hv_hypercall_pg; #endif #endif diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index d8d41542d93c..fd3b9b98a29d 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -36,7 +36,6 @@ /* The one and only */ struct hv_context hv_context = { .synic_initialized = false, - .hypercall_page = NULL, }; #define HV_TIMER_FREQUENCY (10 * 1000 * 1000) /* 100ns period */ @@ -88,52 +87,6 @@ static int query_hypervisor_info(void) return max_leaf; } -/* - * hv_do_hypercall- Invoke the specified hypercall - */ -u64 hv_do_hypercall(u64 control, void *input, void *output) -{ - u64 input_address = (input) ? virt_to_phys(input) : 0; - u64 output_address = (output) ? virt_to_phys(output) : 0; - void *hypercall_page = hv_context.hypercall_page; -#ifdef CONFIG_X86_64 - u64 hv_status = 0; - - if (!hypercall_page) - return (u64)ULLONG_MAX; - - __asm__ __volatile__("mov %0, %%r8" : : "r" (output_address) : "r8"); - __asm__ __volatile__("call *%3" : "=a" (hv_status) : - "c" (control), "d" (input_address), - "m" (hypercall_page)); - - return hv_status; - -#else - - u32 control_hi = control >> 32; - u32 control_lo = control & 0xFFFFFFFF; - u32 hv_status_hi = 1; - u32 hv_status_lo = 1; - u32 input_address_hi = input_address >> 32; - u32 input_address_lo = input_address & 0xFFFFFFFF; - u32 output_address_hi = output_address >> 32; - u32 output_address_lo = output_address & 0xFFFFFFFF; - - if (!hypercall_page) - return (u64)ULLONG_MAX; - - __asm__ __volatile__ ("call *%8" : "=d"(hv_status_hi), - "=a"(hv_status_lo) : "d" (control_hi), - "a" (control_lo), "b" (input_address_hi), - "c" (input_address_lo), "D"(output_address_hi), - "S"(output_address_lo), "m" (hypercall_page)); - - return hv_status_lo | ((u64)hv_status_hi << 32); -#endif /* !x86_64 */ -} -EXPORT_SYMBOL_GPL(hv_do_hypercall); - #ifdef CONFIG_X86_64 static u64 read_hv_clock_tsc(struct clocksource *arg) { @@ -218,8 +171,6 @@ int hv_init(void) if (!hypercall_msr.enable) return -ENOTSUPP; - hv_context.hypercall_page = hv_hypercall_pg; - #ifdef CONFIG_X86_64 if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { union hv_x64_msr_hypercall_contents tsc_msr; @@ -466,9 +417,6 @@ int hv_synic_init(unsigned int cpu) union hv_synic_scontrol sctrl; u64 vp_index; - if (!hv_context.hypercall_page) - return -EFAULT; - /* Check the version */ rdmsrl(HV_X64_MSR_SVERSION, version); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index da57626786b7..09485269d537 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -371,7 +371,6 @@ struct hv_context { */ u64 guestid; - void *hypercall_page; void *tsc_page; bool synic_initialized; -- cgit v1.2.3 From acb04058de49458010c44bb35b849d45113fd668 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 19 Jan 2017 14:36:33 +0100 Subject: sched/clock: Fix hotplug crash Mike reported that he could trigger the WARN_ON_ONCE() in set_sched_clock_stable() using hotplug. This exposed a fundamental problem with the interface, we should never mark the TSC stable if we ever find it to be unstable. Therefore set_sched_clock_stable() is a broken interface. The reason it existed is that not having it is a pain, it means all relevant architecture code needs to call clear_sched_clock_stable() where appropriate. Of the three architectures that select HAVE_UNSTABLE_SCHED_CLOCK ia64 and parisc are trivial in that they never called set_sched_clock_stable(), so add an unconditional call to clear_sched_clock_stable() to them. For x86 the story is a lot more involved, and what this patch tries to do is ensure we preserve the status quo. So even is Cyrix or Transmeta have usable TSC they never called set_sched_clock_stable() so they now get an explicit mark unstable. Reported-by: Mike Galbraith Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 9881b024b7d7 ("sched/clock: Delay switching sched_clock to stable") Link: http://lkml.kernel.org/r/20170119133633.GB6536@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/ia64/kernel/setup.c | 2 ++ arch/parisc/kernel/setup.c | 2 ++ arch/x86/kernel/cpu/amd.c | 6 ++++-- arch/x86/kernel/cpu/centaur.c | 6 ++++-- arch/x86/kernel/cpu/common.c | 3 +++ arch/x86/kernel/cpu/cyrix.c | 2 ++ arch/x86/kernel/cpu/intel.c | 6 ++++-- arch/x86/kernel/cpu/transmeta.c | 3 +++ arch/x86/kernel/kvmclock.c | 2 +- include/linux/sched.h | 1 - kernel/sched/clock.c | 29 ++++++++--------------------- 11 files changed, 33 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 7ec7acc844c2..c483ece3eb84 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -619,6 +619,8 @@ setup_arch (char **cmdline_p) check_sal_cache_flush(); #endif paging_init(); + + clear_sched_clock_stable(); } /* diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 2e66a887788e..068ed3607bac 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -36,6 +36,7 @@ #undef PCI_DEBUG #include #include +#include #include #include @@ -176,6 +177,7 @@ void __init setup_arch(char **cmdline_p) conswitchp = &dummy_con; /* we use do_take_over_console() later ! */ #endif + clear_sched_clock_stable(); } /* diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 71cae73a5076..1bb253a6ee4d 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -548,8 +548,10 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - set_sched_clock_stable(); + if (check_tsc_unstable()) + clear_sched_clock_stable(); + } else { + clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 1661d8ec9280..2c234a6d94c4 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,5 +1,5 @@ -#include -#include + +#include #include #include @@ -104,6 +104,8 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif + + clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index dc1697ca5191..3457186275a0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -83,6 +83,7 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif + clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -1055,6 +1056,8 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); + else + clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index bd9dcd6b712d..47416f959a48 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "cpu.h" @@ -183,6 +184,7 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } + clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index fcd484d2bb03..26eaff4907b2 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -124,8 +124,10 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (!check_tsc_unstable()) - set_sched_clock_stable(); + if (check_tsc_unstable()) + clear_sched_clock_stable(); + } else { + clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index 34178564be2a..c1ea5b999839 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -14,6 +15,8 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } + + clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2a5cafdf8808..542710b99f52 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -107,12 +107,12 @@ static inline void kvm_sched_clock_init(bool stable) { if (!stable) { pv_time_ops.sched_clock = kvm_clock_read; + clear_sched_clock_stable(); return; } kvm_sched_clock_offset = kvm_clock_read(); pv_time_ops.sched_clock = kvm_sched_clock_read; - set_sched_clock_stable(); printk(KERN_INFO "kvm-clock: using sched offset of %llu cycles\n", kvm_sched_clock_offset); diff --git a/include/linux/sched.h b/include/linux/sched.h index a8daed914eef..69e6852fede1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2547,7 +2547,6 @@ extern void sched_clock_init_late(void); * is reliable after all: */ extern int sched_clock_stable(void); -extern void set_sched_clock_stable(void); extern void clear_sched_clock_stable(void); extern void sched_clock_tick(void); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index 7713b2b53f61..ad64efe41722 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -83,8 +83,15 @@ void sched_clock_init(void) } #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK +/* + * We must start with !__sched_clock_stable because the unstable -> stable + * transition is accurate, while the stable -> unstable transition is not. + * + * Similarly we start with __sched_clock_stable_early, thereby assuming we + * will become stable, such that there's only a single 1 -> 0 transition. + */ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); -static int __sched_clock_stable_early; +static int __sched_clock_stable_early = 1; /* * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset @@ -132,24 +139,6 @@ static void __set_sched_clock_stable(void) tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); } -void set_sched_clock_stable(void) -{ - __sched_clock_stable_early = 1; - - smp_mb(); /* matches sched_clock_init_late() */ - - /* - * This really should only be called early (before - * sched_clock_init_late()) when guestimating our sched_clock() is - * solid. - * - * After that we test stability and we can negate our guess using - * clear_sched_clock_stable, possibly from a watchdog. - */ - if (WARN_ON_ONCE(sched_clock_running == 2)) - __set_sched_clock_stable(); -} - static void __clear_sched_clock_stable(struct work_struct *work) { struct sched_clock_data *scd = this_scd(); @@ -199,8 +188,6 @@ void sched_clock_init_late(void) if (__sched_clock_stable_early) __set_sched_clock_stable(); - else - __clear_sched_clock_stable(NULL); } /* -- cgit v1.2.3 From 358e96deaed3330a59d9dd6a7e419f4da08d6497 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Jan 2017 21:24:22 +0200 Subject: x86/ioapic: Return suitable error code in mp_map_gsi_to_irq() mp_map_gsi_to_irq() in some cases might return legacy -1, which would be wrongly interpreted as -EPERM. Correct those cases to return proper error code. Signed-off-by: Andy Shevchenko Link: http://lkml.kernel.org/r/20170119192425.189899-2-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/apic/io_apic.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 945e512a112a..f62c38d325da 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1107,12 +1107,12 @@ int mp_map_gsi_to_irq(u32 gsi, unsigned int flags, struct irq_alloc_info *info) ioapic = mp_find_ioapic(gsi); if (ioapic < 0) - return -1; + return -ENODEV; pin = mp_find_ioapic_pin(ioapic, gsi); idx = find_irq_entry(ioapic, pin, mp_INT); if ((flags & IOAPIC_MAP_CHECK) && idx < 0) - return -1; + return -ENODEV; return mp_map_pin_to_irq(gsi, idx, ioapic, pin, flags, info); } -- cgit v1.2.3 From 910a26f6e952148a0c8815281737aaead640626c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Jan 2017 21:24:23 +0200 Subject: x86/platform/intel-mid: Allocate RTC interrupt for Merrifield Legacy RTC requires interrupt line 8 to be dedicated for it. On Intel MID platforms the legacy PIC is absent and in order to make RTC work we need to allocate interrupt separately. Current solution brought by commit de1c2540aa4f does it in a wrong place, and since it's done unconditionally for all x86 devices, some of them, e.g. PNP based, might get it wrong because they execute the MID specific code due to x86_platform.legacy.rtc flag being set. Move intel_mid_legacy_rtc_init() to its own module and call it before x86 RTC CMOS initialization. Fixes: de1c2540aa4f ("x86/platform/intel-mid: Enable RTC on Intel Merrifield") Signed-off-by: Andy Shevchenko Cc: "Luis R . Rodriguez" Link: http://lkml.kernel.org/r/20170119192425.189899-3-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/intel-mid/device_libs/Makefile | 1 + .../intel-mid/device_libs/platform_mrfld_rtc.c | 48 ++++++++++++++++++++++ arch/x86/platform/intel-mid/sfi.c | 14 ------- 3 files changed, 49 insertions(+), 14 deletions(-) create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index d4af7785844e..a7dbec4dce27 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -26,4 +26,5 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o # MISC Devices obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o +obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c new file mode 100644 index 000000000000..3135416df037 --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_rtc.c @@ -0,0 +1,48 @@ +/* + * Intel Merrifield legacy RTC initialization file + * + * (C) Copyright 2017 Intel Corporation + * + * Author: Andy Shevchenko + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include + +#include +#include +#include +#include +#include + +static int __init mrfld_legacy_rtc_alloc_irq(void) +{ + struct irq_alloc_info info; + int ret; + + if (!x86_platform.legacy.rtc) + return -ENODEV; + + ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, 0); + ret = mp_map_gsi_to_irq(RTC_IRQ, IOAPIC_MAP_ALLOC, &info); + if (ret < 0) { + pr_info("Failed to allocate RTC interrupt. Disabling RTC\n"); + x86_platform.legacy.rtc = 0; + return ret; + } + + return 0; +} + +static int __init mrfld_legacy_rtc_init(void) +{ + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return -ENODEV; + + return mrfld_legacy_rtc_alloc_irq(); +} +arch_initcall(mrfld_legacy_rtc_init); diff --git a/arch/x86/platform/intel-mid/sfi.c b/arch/x86/platform/intel-mid/sfi.c index e4d4cabbb370..19b43e3a9f0f 100644 --- a/arch/x86/platform/intel-mid/sfi.c +++ b/arch/x86/platform/intel-mid/sfi.c @@ -41,7 +41,6 @@ #include #include #include -#include #define SFI_SIG_OEM0 "OEM0" #define MAX_IPCDEVS 24 @@ -540,21 +539,8 @@ static int __init sfi_parse_devs(struct sfi_table_header *table) return 0; } -static int __init intel_mid_legacy_rtc_init(void) -{ - struct irq_alloc_info info; - - if (!x86_platform.legacy.rtc) - return -ENODEV; - - ioapic_set_alloc_attr(&info, NUMA_NO_NODE, 1, 0); - return mp_map_gsi_to_irq(RTC_IRQ, IOAPIC_MAP_ALLOC, &info); -} - static int __init intel_mid_platform_init(void) { - intel_mid_legacy_rtc_init(); - sfi_table_parse(SFI_SIG_GPIO, NULL, NULL, sfi_parse_gpio); sfi_table_parse(SFI_SIG_DEVS, NULL, NULL, sfi_parse_devs); return 0; -- cgit v1.2.3 From 939533955d1f1d51e8e37d7d675646ce9d55534b Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Jan 2017 21:24:24 +0200 Subject: x86/platform/intel-mid: Don't shadow error code of mp_map_gsi_to_irq() When call mp_map_gsi_to_irq() and return its error code do not shadow it. Note that 0 is not an error. Signed-off-by: Andy Shevchenko Link: http://lkml.kernel.org/r/20170119192425.189899-4-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c index 3f1f1c77d090..8a10a56f2840 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c @@ -28,9 +28,9 @@ static struct platform_device wdt_dev = { static int tangier_probe(struct platform_device *pdev) { - int gsi; struct irq_alloc_info info; struct intel_mid_wdt_pdata *pdata = pdev->dev.platform_data; + int gsi, irq; if (!pdata) return -EINVAL; @@ -38,10 +38,10 @@ static int tangier_probe(struct platform_device *pdev) /* IOAPIC builds identity mapping between GSI and IRQ on MID */ gsi = pdata->irq; ioapic_set_alloc_attr(&info, cpu_to_node(0), 1, 0); - if (mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info) <= 0) { - dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n", - gsi); - return -EINVAL; + irq = mp_map_gsi_to_irq(gsi, IOAPIC_MAP_ALLOC, &info); + if (irq < 0) { + dev_warn(&pdev->dev, "cannot find interrupt %d in ioapic\n", gsi); + return irq; } return 0; -- cgit v1.2.3 From e2e2eabb68dfd00502bf8501b015862eb8b3f392 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 19 Jan 2017 21:24:25 +0200 Subject: x86/platform/intel-mid: Move watchdog registration to arch_initcall() There is no need to choose a random initcall level for certainly architecture dependent code. Move watchdog registration to arch_initcall() from rootfs_initcall(). Signed-off-by: Andy Shevchenko Link: http://lkml.kernel.org/r/20170119192425.189899-5-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c index 8a10a56f2840..86edd1e941eb 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c @@ -82,4 +82,4 @@ static int __init register_mid_wdt(void) return 0; } -rootfs_initcall(register_mid_wdt); +arch_initcall(register_mid_wdt); -- cgit v1.2.3 From 63ed4e0c67df332681ebfef6eca6852da28d6300 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:46 -0700 Subject: Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code As part of the effort to separate out architecture specific code, consolidate all Hyper-V specific clocksource code to an architecture specific code. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/hyperv/hv_init.c | 105 ++++++++++++++++++++++++++++++++++++++++ arch/x86/include/asm/mshyperv.h | 12 +++++ arch/x86/kernel/cpu/mshyperv.c | 23 --------- drivers/hv/hv.c | 95 ------------------------------------ drivers/hv/hyperv_vmbus.h | 8 --- 5 files changed, 117 insertions(+), 126 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b5c8e04deacb..860233af4568 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -24,6 +24,79 @@ #include #include #include +#include + + +#ifdef CONFIG_X86_64 + +static struct ms_hyperv_tsc_page *tsc_pg; + +static u64 read_hv_clock_tsc(struct clocksource *arg) +{ + u64 current_tick; + + if (tsc_pg->tsc_sequence != 0) { + /* + * Use the tsc page to compute the value. + */ + + while (1) { + u64 tmp; + u32 sequence = tsc_pg->tsc_sequence; + u64 cur_tsc; + u64 scale = tsc_pg->tsc_scale; + s64 offset = tsc_pg->tsc_offset; + + rdtscll(cur_tsc); + /* current_tick = ((cur_tsc *scale) >> 64) + offset */ + asm("mulq %3" + : "=d" (current_tick), "=a" (tmp) + : "a" (cur_tsc), "r" (scale)); + + current_tick += offset; + if (tsc_pg->tsc_sequence == sequence) + return current_tick; + + if (tsc_pg->tsc_sequence != 0) + continue; + /* + * Fallback using MSR method. + */ + break; + } + } + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); + return current_tick; +} + +static struct clocksource hyperv_cs_tsc = { + .name = "hyperv_clocksource_tsc_page", + .rating = 400, + .read = read_hv_clock_tsc, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; +#endif + +static u64 read_hv_clock_msr(struct clocksource *arg) +{ + u64 current_tick; + /* + * Read the partition counter to get the current tick count. This count + * is set to 0 when the partition is created and is incremented in + * 100 nanosecond units. + */ + rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); + return current_tick; +} + +static struct clocksource hyperv_cs_msr = { + .name = "hyperv_clocksource_msr", + .rating = 400, + .read = read_hv_clock_msr, + .mask = CLOCKSOURCE_MASK(64), + .flags = CLOCK_SOURCE_IS_CONTINUOUS, +}; static void *hypercall_pg; /* @@ -31,6 +104,7 @@ static void *hypercall_pg; * hypervisor has been detected. * * 1. Setup the hypercall page. + * 2. Register Hyper-V specific clocksource. */ void hyperv_init(void) { @@ -58,6 +132,37 @@ void hyperv_init(void) hypercall_msr.enable = 1; hypercall_msr.guest_physical_address = vmalloc_to_pfn(hypercall_pg); wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + /* + * Register Hyper-V specific clocksource. + */ +#ifdef CONFIG_X86_64 + if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { + union hv_x64_msr_hypercall_contents tsc_msr; + + tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); + if (!tsc_pg) { + clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); + return; + } + + rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); + + tsc_msr.enable = 1; + tsc_msr.guest_physical_address = vmalloc_to_pfn(tsc_pg); + + wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); + clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); + return; + } +#endif + /* + * For 32 bit guests just use the MSR based mechanism for reading + * the partition counter. + */ + + if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) + clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); } /* diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index ed8e07399071..adfe8cc9f7e3 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -25,6 +25,18 @@ union hv_x64_msr_hypercall_contents { }; }; +/* + * TSC page layout. + */ + +struct ms_hyperv_tsc_page { + volatile u32 tsc_sequence; + u32 reserved1; + volatile u64 tsc_scale; + volatile s64 tsc_offset; + u64 reserved2[509]; +}; + /* * The guest OS needs to register the guest ID with the hypervisor. * The guest ID is a 64 bit entity and the structure of this ID is diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index c5a1e9ba9ae0..d3705a44971c 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -133,26 +133,6 @@ static uint32_t __init ms_hyperv_platform(void) return 0; } -static u64 read_hv_clock(struct clocksource *arg) -{ - u64 current_tick; - /* - * Read the partition counter to get the current tick count. This count - * is set to 0 when the partition is created and is incremented in - * 100 nanosecond units. - */ - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - return current_tick; -} - -static struct clocksource hyperv_cs = { - .name = "hyperv_clocksource", - .rating = 400, /* use this when running on Hyperv*/ - .read = read_hv_clock, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; - static unsigned char hv_get_nmi_reason(void) { return 0; @@ -208,9 +188,6 @@ static void __init ms_hyperv_init_platform(void) "hv_nmi_unknown"); #endif - if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) - clocksource_register_hz(&hyperv_cs, NSEC_PER_SEC/100); - #ifdef CONFIG_X86_IO_APIC no_timer_check = 1; #endif diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index fd3b9b98a29d..1a33b59776d3 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -87,56 +87,6 @@ static int query_hypervisor_info(void) return max_leaf; } -#ifdef CONFIG_X86_64 -static u64 read_hv_clock_tsc(struct clocksource *arg) -{ - u64 current_tick; - struct ms_hyperv_tsc_page *tsc_pg = hv_context.tsc_page; - - if (tsc_pg->tsc_sequence != 0) { - /* - * Use the tsc page to compute the value. - */ - - while (1) { - u64 tmp; - u32 sequence = tsc_pg->tsc_sequence; - u64 cur_tsc; - u64 scale = tsc_pg->tsc_scale; - s64 offset = tsc_pg->tsc_offset; - - rdtscll(cur_tsc); - /* current_tick = ((cur_tsc *scale) >> 64) + offset */ - asm("mulq %3" - : "=d" (current_tick), "=a" (tmp) - : "a" (cur_tsc), "r" (scale)); - - current_tick += offset; - if (tsc_pg->tsc_sequence == sequence) - return current_tick; - - if (tsc_pg->tsc_sequence != 0) - continue; - /* - * Fallback using MSR method. - */ - break; - } - } - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); - return current_tick; -} - -static struct clocksource hyperv_cs_tsc = { - .name = "hyperv_clocksource_tsc_page", - .rating = 425, - .read = read_hv_clock_tsc, - .mask = CLOCKSOURCE_MASK(64), - .flags = CLOCK_SOURCE_IS_CONTINUOUS, -}; -#endif - - /* * hv_init - Main initialization routine. * @@ -171,29 +121,7 @@ int hv_init(void) if (!hypercall_msr.enable) return -ENOTSUPP; -#ifdef CONFIG_X86_64 - if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { - union hv_x64_msr_hypercall_contents tsc_msr; - void *va_tsc; - - va_tsc = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); - if (!va_tsc) - goto cleanup; - hv_context.tsc_page = va_tsc; - - rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - - tsc_msr.enable = 1; - tsc_msr.guest_physical_address = vmalloc_to_pfn(va_tsc); - - wrmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); - clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); - } -#endif return 0; - -cleanup: - return -ENOTSUPP; } /* @@ -204,29 +132,6 @@ cleanup: void hv_cleanup(bool crash) { -#ifdef CONFIG_X86_64 - union hv_x64_msr_hypercall_contents hypercall_msr; - /* - * Cleanup the TSC page based CS. - */ - if (ms_hyperv.features & HV_X64_MSR_REFERENCE_TSC_AVAILABLE) { - /* - * Crash can happen in an interrupt context and unregistering - * a clocksource is impossible and redundant in this case. - */ - if (!oops_in_progress) { - clocksource_change_rating(&hyperv_cs_tsc, 10); - clocksource_unregister(&hyperv_cs_tsc); - } - - hypercall_msr.as_uint64 = 0; - wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); - if (!crash) { - vfree(hv_context.tsc_page); - hv_context.tsc_page = NULL; - } - } -#endif } /* diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 09485269d537..947455d30707 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -416,14 +416,6 @@ struct hv_context { extern struct hv_context hv_context; -struct ms_hyperv_tsc_page { - volatile u32 tsc_sequence; - u32 reserved1; - volatile u64 tsc_scale; - volatile s64 tsc_offset; - u64 reserved2[509]; -}; - struct hv_ring_buffer_debug_info { u32 current_interrupt_mask; u32 current_read_index; -- cgit v1.2.3 From 8de8af7e0873c4fdac2205327dff922819e16657 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:47 -0700 Subject: Drivers: hv: vmbus: Move the extracting of Hypervisor version information As part of the effort to separate out architecture specific code, extract hypervisor version information in an architecture specific file. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 19 ++++++++++++++++ arch/x86/kernel/cpu/mshyperv.c | 20 +++++++++++++++++ drivers/hv/connection.c | 7 ++---- drivers/hv/hv.c | 49 ----------------------------------------- drivers/hv/hyperv_vmbus.h | 27 ----------------------- 5 files changed, 41 insertions(+), 81 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index adfe8cc9f7e3..54729e3cba47 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -5,6 +5,25 @@ #include #include +/* + * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent + * is set by CPUID(HVCPUID_VERSION_FEATURES). + */ +enum hv_cpuid_function { + HVCPUID_VERSION_FEATURES = 0x00000001, + HVCPUID_VENDOR_MAXFUNCTION = 0x40000000, + HVCPUID_INTERFACE = 0x40000001, + + /* + * The remaining functions depend on the value of + * HVCPUID_INTERFACE + */ + HVCPUID_VERSION = 0x40000002, + HVCPUID_FEATURES = 0x40000003, + HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, + HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, +}; + struct ms_hyperv_info { u32 features; u32 misc_features; diff --git a/arch/x86/kernel/cpu/mshyperv.c b/arch/x86/kernel/cpu/mshyperv.c index d3705a44971c..b5375b9497b3 100644 --- a/arch/x86/kernel/cpu/mshyperv.c +++ b/arch/x86/kernel/cpu/mshyperv.c @@ -160,6 +160,11 @@ static int hv_nmi_unknown(unsigned int val, struct pt_regs *regs) static void __init ms_hyperv_init_platform(void) { + int hv_host_info_eax; + int hv_host_info_ebx; + int hv_host_info_ecx; + int hv_host_info_edx; + /* * Extract the features and hints */ @@ -170,6 +175,21 @@ static void __init ms_hyperv_init_platform(void) pr_info("HyperV: features 0x%x, hints 0x%x\n", ms_hyperv.features, ms_hyperv.hints); + /* + * Extract host information. + */ + if (cpuid_eax(HVCPUID_VENDOR_MAXFUNCTION) >= HVCPUID_VERSION) { + hv_host_info_eax = cpuid_eax(HVCPUID_VERSION); + hv_host_info_ebx = cpuid_ebx(HVCPUID_VERSION); + hv_host_info_ecx = cpuid_ecx(HVCPUID_VERSION); + hv_host_info_edx = cpuid_edx(HVCPUID_VERSION); + + pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d\n", + hv_host_info_eax, hv_host_info_ebx >> 16, + hv_host_info_ebx & 0xFFFF, hv_host_info_ecx, + hv_host_info_edx >> 24, hv_host_info_edx & 0xFFFFFF); + } + #ifdef CONFIG_X86_LOCAL_APIC if (ms_hyperv.features & HV_X64_MSR_APIC_FREQUENCY_AVAILABLE) { /* diff --git a/drivers/hv/connection.c b/drivers/hv/connection.c index 9b72ebcd37bc..307a5a8937f6 100644 --- a/drivers/hv/connection.c +++ b/drivers/hv/connection.c @@ -221,11 +221,8 @@ int vmbus_connect(void) goto cleanup; vmbus_proto_version = version; - pr_info("Hyper-V Host Build:%d-%d.%d-%d-%d.%d; Vmbus version:%d.%d\n", - host_info_eax, host_info_ebx >> 16, - host_info_ebx & 0xFFFF, host_info_ecx, - host_info_edx >> 24, host_info_edx & 0xFFFFFF, - version >> 16, version & 0xFFFF); + pr_info("Vmbus version:%d.%d\n", + version >> 16, version & 0xFFFF); kfree(msginfo); return 0; diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 1a33b59776d3..9985a347ed03 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -42,51 +42,6 @@ struct hv_context hv_context = { #define HV_MAX_MAX_DELTA_TICKS 0xffffffff #define HV_MIN_DELTA_TICKS 1 -/* - * query_hypervisor_info - Get version info of the windows hypervisor - */ -unsigned int host_info_eax; -unsigned int host_info_ebx; -unsigned int host_info_ecx; -unsigned int host_info_edx; - -static int query_hypervisor_info(void) -{ - unsigned int eax; - unsigned int ebx; - unsigned int ecx; - unsigned int edx; - unsigned int max_leaf; - unsigned int op; - - /* - * Its assumed that this is called after confirming that Viridian - * is present. Query id and revision. - */ - eax = 0; - ebx = 0; - ecx = 0; - edx = 0; - op = HVCPUID_VENDOR_MAXFUNCTION; - cpuid(op, &eax, &ebx, &ecx, &edx); - - max_leaf = eax; - - if (max_leaf >= HVCPUID_VERSION) { - eax = 0; - ebx = 0; - ecx = 0; - edx = 0; - op = HVCPUID_VERSION; - cpuid(op, &eax, &ebx, &ecx, &edx); - host_info_eax = eax; - host_info_ebx = ebx; - host_info_ecx = ecx; - host_info_edx = edx; - } - return max_leaf; -} - /* * hv_init - Main initialization routine. * @@ -94,7 +49,6 @@ static int query_hypervisor_info(void) */ int hv_init(void) { - int max_leaf; union hv_x64_msr_hypercall_contents hypercall_msr; memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS); @@ -111,9 +65,6 @@ int hv_init(void) memset(hv_context.clk_evt, 0, sizeof(void *) * NR_CPUS); - max_leaf = query_hypervisor_info(); - - /* See if the hypercall page is already set */ hypercall_msr.as_uint64 = 0; rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 947455d30707..a7e35c842fed 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -40,25 +40,6 @@ */ #define HV_UTIL_NEGO_TIMEOUT 55 -/* - * The below CPUID leaves are present if VersionAndFeatures.HypervisorPresent - * is set by CPUID(HVCPUID_VERSION_FEATURES). - */ -enum hv_cpuid_function { - HVCPUID_VERSION_FEATURES = 0x00000001, - HVCPUID_VENDOR_MAXFUNCTION = 0x40000000, - HVCPUID_INTERFACE = 0x40000001, - - /* - * The remaining functions depend on the value of - * HVCPUID_INTERFACE - */ - HVCPUID_VERSION = 0x40000002, - HVCPUID_FEATURES = 0x40000003, - HVCPUID_ENLIGHTENMENT_INFO = 0x40000004, - HVCPUID_IMPLEMENTATION_LIMITS = 0x40000005, -}; - #define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 0x400 #define HV_X64_MSR_CRASH_P0 0x40000100 @@ -444,14 +425,6 @@ extern int hv_synic_cleanup(unsigned int cpu); extern void hv_synic_clockevents_cleanup(void); -/* - * Host version information. - */ -extern unsigned int host_info_eax; -extern unsigned int host_info_ebx; -extern unsigned int host_info_ecx; -extern unsigned int host_info_edx; - /* Interface */ -- cgit v1.2.3 From d058fa7e98ff01a4b4750a2210fc19906db3cbe1 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:48 -0700 Subject: Drivers: hv: vmbus: Move the crash notification function As part of the effort to separate out architecture specific code, move the crash notification function. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/hyperv/hv_init.c | 26 ++++++++++++++++++++++++++ arch/x86/include/asm/mshyperv.h | 1 + arch/x86/include/uapi/asm/hyperv.h | 8 ++++++++ drivers/hv/hyperv_vmbus.h | 10 ---------- drivers/hv/vmbus_drv.c | 25 ------------------------- 5 files changed, 35 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index 860233af4568..ce5fc7394814 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -209,3 +209,29 @@ u64 hv_do_hypercall(u64 control, void *input, void *output) #endif /* !x86_64 */ } EXPORT_SYMBOL_GPL(hv_do_hypercall); + +void hyperv_report_panic(struct pt_regs *regs) +{ + static bool panic_reported; + + /* + * We prefer to report panic on 'die' chain as we have proper + * registers to report, but if we miss it (e.g. on BUG()) we need + * to report it on 'panic'. + */ + if (panic_reported) + return; + panic_reported = true; + + wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip); + wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax); + wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx); + wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx); + wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx); + + /* + * Let Hyper-V know there is crash data available + */ + wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); +} +EXPORT_SYMBOL_GPL(hyperv_report_panic); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 54729e3cba47..64e682d88684 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -114,5 +114,6 @@ void hv_remove_crash_handler(void); #if IS_ENABLED(CONFIG_HYPERV) void hyperv_init(void); +void hyperv_report_panic(struct pt_regs *regs); #endif #endif diff --git a/arch/x86/include/uapi/asm/hyperv.h b/arch/x86/include/uapi/asm/hyperv.h index 9b1a91834ac8..3a20ccf787b8 100644 --- a/arch/x86/include/uapi/asm/hyperv.h +++ b/arch/x86/include/uapi/asm/hyperv.h @@ -73,6 +73,9 @@ */ #define HV_X64_MSR_STAT_PAGES_AVAILABLE (1 << 8) +/* Crash MSR available */ +#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE (1 << 10) + /* * Feature identification: EBX indicates which flags were specified at * partition creation. The format is the same as the partition creation @@ -144,6 +147,11 @@ */ #define HV_X64_RELAXED_TIMING_RECOMMENDED (1 << 5) +/* + * Crash notification flag. + */ +#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) + /* MSR used to identify the guest OS. */ #define HV_X64_MSR_GUEST_OS_ID 0x40000000 diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index a7e35c842fed..59eb28c45ff5 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -40,16 +40,6 @@ */ #define HV_UTIL_NEGO_TIMEOUT 55 -#define HV_FEATURE_GUEST_CRASH_MSR_AVAILABLE 0x400 - -#define HV_X64_MSR_CRASH_P0 0x40000100 -#define HV_X64_MSR_CRASH_P1 0x40000101 -#define HV_X64_MSR_CRASH_P2 0x40000102 -#define HV_X64_MSR_CRASH_P3 0x40000103 -#define HV_X64_MSR_CRASH_P4 0x40000104 -#define HV_X64_MSR_CRASH_CTL 0x40000105 - -#define HV_CRASH_CTL_CRASH_NOTIFY (1ULL << 63) /* Define version of the synthetic interrupt controller. */ #define HV_SYNIC_VERSION (1) diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 565bdd16134a..8e81346114d4 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -56,31 +56,6 @@ static struct completion probe_event; static int hyperv_cpuhp_online; -static void hyperv_report_panic(struct pt_regs *regs) -{ - static bool panic_reported; - - /* - * We prefer to report panic on 'die' chain as we have proper - * registers to report, but if we miss it (e.g. on BUG()) we need - * to report it on 'panic'. - */ - if (panic_reported) - return; - panic_reported = true; - - wrmsrl(HV_X64_MSR_CRASH_P0, regs->ip); - wrmsrl(HV_X64_MSR_CRASH_P1, regs->ax); - wrmsrl(HV_X64_MSR_CRASH_P2, regs->bx); - wrmsrl(HV_X64_MSR_CRASH_P3, regs->cx); - wrmsrl(HV_X64_MSR_CRASH_P4, regs->dx); - - /* - * Let Hyper-V know there is crash data available - */ - wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); -} - static int hyperv_panic_event(struct notifier_block *nb, unsigned long val, void *args) { -- cgit v1.2.3 From 73638cddaad861a5ebb2b119d8b318d4bded8f8d Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:49 -0700 Subject: Drivers: hv: vmbus: Move the check for hypercall page setup As part of the effort to separate out architecture specific code, move the check for detecting if the hypercall page is setup. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/hyperv/hv_init.c | 15 +++++++++++++++ arch/x86/include/asm/mshyperv.h | 1 + drivers/hv/hv.c | 7 +------ 3 files changed, 17 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index ce5fc7394814..d289bc29d282 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -235,3 +235,18 @@ void hyperv_report_panic(struct pt_regs *regs) wrmsrl(HV_X64_MSR_CRASH_CTL, HV_CRASH_CTL_CRASH_NOTIFY); } EXPORT_SYMBOL_GPL(hyperv_report_panic); + +bool hv_is_hypercall_page_setup(void) +{ + union hv_x64_msr_hypercall_contents hypercall_msr; + + /* Check if the hypercall page is setup */ + hypercall_msr.as_uint64 = 0; + rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + if (!hypercall_msr.enable) + return false; + + return true; +} +EXPORT_SYMBOL_GPL(hv_is_hypercall_page_setup); diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 64e682d88684..c843ef64defe 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -115,5 +115,6 @@ void hv_remove_crash_handler(void); #if IS_ENABLED(CONFIG_HYPERV) void hyperv_init(void); void hyperv_report_panic(struct pt_regs *regs); +bool hv_is_hypercall_page_setup(void); #endif #endif diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 9985a347ed03..d28a8731baa0 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -49,7 +49,6 @@ struct hv_context hv_context = { */ int hv_init(void) { - union hv_x64_msr_hypercall_contents hypercall_msr; memset(hv_context.synic_event_page, 0, sizeof(void *) * NR_CPUS); memset(hv_context.synic_message_page, 0, @@ -65,11 +64,7 @@ int hv_init(void) memset(hv_context.clk_evt, 0, sizeof(void *) * NR_CPUS); - /* See if the hypercall page is already set */ - hypercall_msr.as_uint64 = 0; - rdmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); - - if (!hypercall_msr.enable) + if (!hv_is_hypercall_page_setup()) return -ENOTSUPP; return 0; -- cgit v1.2.3 From e810e48c0c9a1a1ebb90cfe966bce6dc80ce08e7 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:50 -0700 Subject: Drivers: hv: vmbus: Move the code to signal end of message As part of the effort to separate out architecture specific code, move the code for signaling end of message. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 37 +++++++++++++++++++++++++++++++++++++ drivers/hv/channel_mgmt.c | 1 + drivers/hv/hyperv_vmbus.h | 35 ----------------------------------- 3 files changed, 38 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index c843ef64defe..b57b470ac2a7 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -99,6 +99,43 @@ static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, return guest_id; } + +/* Free the message slot and signal end-of-message if required */ +static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) +{ + /* + * On crash we're reading some other CPU's message page and we need + * to be careful: this other CPU may already had cleared the header + * and the host may already had delivered some other message there. + * In case we blindly write msg->header.message_type we're going + * to lose it. We can still lose a message of the same type but + * we count on the fact that there can only be one + * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages + * on crash. + */ + if (cmpxchg(&msg->header.message_type, old_msg_type, + HVMSG_NONE) != old_msg_type) + return; + + /* + * Make sure the write to MessageType (ie set to + * HVMSG_NONE) happens before we read the + * MessagePending and EOMing. Otherwise, the EOMing + * will not deliver any more messages since there is + * no empty slot + */ + mb(); + + if (msg->header.message_flags.msg_pending) { + /* + * This will cause message queue rescan to + * possibly deliver another msg from the + * hypervisor + */ + wrmsrl(HV_X64_MSR_EOM, 0); + } +} + void hyperv_callback_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector diff --git a/drivers/hv/channel_mgmt.c b/drivers/hv/channel_mgmt.c index 0af7e39006c8..49d77be90ca4 100644 --- a/drivers/hv/channel_mgmt.c +++ b/drivers/hv/channel_mgmt.c @@ -31,6 +31,7 @@ #include #include #include +#include #include "hyperv_vmbus.h" diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 59eb28c45ff5..e9f5d2c2fb6b 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -521,41 +521,6 @@ struct vmbus_channel_message_table_entry { extern struct vmbus_channel_message_table_entry channel_message_table[CHANNELMSG_COUNT]; -/* Free the message slot and signal end-of-message if required */ -static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) -{ - /* - * On crash we're reading some other CPU's message page and we need - * to be careful: this other CPU may already had cleared the header - * and the host may already had delivered some other message there. - * In case we blindly write msg->header.message_type we're going - * to lose it. We can still lose a message of the same type but - * we count on the fact that there can only be one - * CHANNELMSG_UNLOAD_RESPONSE and we don't care about other messages - * on crash. - */ - if (cmpxchg(&msg->header.message_type, old_msg_type, - HVMSG_NONE) != old_msg_type) - return; - - /* - * Make sure the write to MessageType (ie set to - * HVMSG_NONE) happens before we read the - * MessagePending and EOMing. Otherwise, the EOMing - * will not deliver any more messages since there is - * no empty slot - */ - mb(); - - if (msg->header.message_flags.msg_pending) { - /* - * This will cause message queue rescan to - * possibly deliver another msg from the - * hypervisor - */ - wrmsrl(HV_X64_MSR_EOM, 0); - } -} /* General vmbus interface */ -- cgit v1.2.3 From d5116b4091ecca271c249ede43a49c1245920558 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:51 -0700 Subject: Drivers: hv: vmbus: Restructure the clockevents code Move the relevant code that programs the hypervisor to an architecture specific file. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 4 ++++ drivers/hv/hv.c | 10 +++++----- 2 files changed, 9 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index b57b470ac2a7..a58c201f3412 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -136,6 +136,10 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) } } +#define hv_get_current_tick(tick) rdmsrl(HV_X64_MSR_TIME_REF_COUNT, tick) +#define hv_init_timer(timer, tick) wrmsrl(timer, tick) +#define hv_init_timer_config(config, val) wrmsrl(config, val) + void hyperv_callback_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index d28a8731baa0..ae5436e9c8a4 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -118,16 +118,16 @@ static int hv_ce_set_next_event(unsigned long delta, WARN_ON(!clockevent_state_oneshot(evt)); - rdmsrl(HV_X64_MSR_TIME_REF_COUNT, current_tick); + hv_get_current_tick(current_tick); current_tick += delta; - wrmsrl(HV_X64_MSR_STIMER0_COUNT, current_tick); + hv_init_timer(HV_X64_MSR_STIMER0_COUNT, current_tick); return 0; } static int hv_ce_shutdown(struct clock_event_device *evt) { - wrmsrl(HV_X64_MSR_STIMER0_COUNT, 0); - wrmsrl(HV_X64_MSR_STIMER0_CONFIG, 0); + hv_init_timer(HV_X64_MSR_STIMER0_COUNT, 0); + hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, 0); return 0; } @@ -139,7 +139,7 @@ static int hv_ce_set_oneshot(struct clock_event_device *evt) timer_cfg.enable = 1; timer_cfg.auto_enable = 1; timer_cfg.sintx = VMBUS_MESSAGE_SINT; - wrmsrl(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); + hv_init_timer_config(HV_X64_MSR_STIMER0_CONFIG, timer_cfg.as_uint64); return 0; } -- cgit v1.2.3 From 155e4a2f28a59e5344dfa7c5d003161fe59a5bf2 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:54 -0700 Subject: Drivers: hv: vmbus: Define APIs to manipulate the message page As part of cleaning up architecture specific code, define APIs to manipulate the message page. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 3 +++ drivers/hv/hv.c | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index a58c201f3412..1e75141bc123 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -140,6 +140,9 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_init_timer(timer, tick) wrmsrl(timer, tick) #define hv_init_timer_config(config, val) wrmsrl(config, val) +#define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val) +#define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val) + void hyperv_callback_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index ced2077bb925..04ad97749884 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -268,12 +268,12 @@ int hv_synic_init(unsigned int cpu) u64 vp_index; /* Setup the Synic's message page */ - rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64); + hv_get_simp(simp.as_uint64); simp.simp_enabled = 1; simp.base_simp_gpa = virt_to_phys(hv_context.synic_message_page[cpu]) >> PAGE_SHIFT; - wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64); + hv_set_simp(simp.as_uint64); /* Setup the Synic's event page */ rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); @@ -392,11 +392,11 @@ int hv_synic_cleanup(unsigned int cpu) /* Disable the interrupt */ wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); - rdmsrl(HV_X64_MSR_SIMP, simp.as_uint64); + hv_get_simp(simp.as_uint64); simp.simp_enabled = 0; simp.base_simp_gpa = 0; - wrmsrl(HV_X64_MSR_SIMP, simp.as_uint64); + hv_set_simp(simp.as_uint64); rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); siefp.siefp_enabled = 0; -- cgit v1.2.3 From 8e307bf82d76ab02e95a00d132d926f04db6ccab Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:55 -0700 Subject: Drivers: hv: vmbus: Define APIs to manipulate the event page As part of cleaning up architecture specific code, define APIs to manipulate the event page. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 3 +++ drivers/hv/hv.c | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 1e75141bc123..2ea7e16fc678 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -143,6 +143,9 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_simp(val) rdmsrl(HV_X64_MSR_SIMP, val) #define hv_set_simp(val) wrmsrl(HV_X64_MSR_SIMP, val) +#define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val) +#define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val) + void hyperv_callback_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 04ad97749884..5680aeed585c 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -276,12 +276,12 @@ int hv_synic_init(unsigned int cpu) hv_set_simp(simp.as_uint64); /* Setup the Synic's event page */ - rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); + hv_get_siefp(siefp.as_uint64); siefp.siefp_enabled = 1; siefp.base_siefp_gpa = virt_to_phys(hv_context.synic_event_page[cpu]) >> PAGE_SHIFT; - wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); + hv_set_siefp(siefp.as_uint64); /* Setup the shared SINT. */ rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); @@ -398,11 +398,11 @@ int hv_synic_cleanup(unsigned int cpu) hv_set_simp(simp.as_uint64); - rdmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); + hv_get_siefp(siefp.as_uint64); siefp.siefp_enabled = 0; siefp.base_siefp_gpa = 0; - wrmsrl(HV_X64_MSR_SIEFP, siefp.as_uint64); + hv_set_siefp(siefp.as_uint64); /* Disable the global synic bit */ rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); -- cgit v1.2.3 From 06d1d98a839f196e94cb726008fb2118e430f356 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:56 -0700 Subject: Drivers: hv: vmbus: Define APIs to manipulate the synthetic interrupt controller As part of cleaning up architecture specific code, define APIs to manipulate the interrupt controller state. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 3 +++ drivers/hv/hv.c | 8 ++++---- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 2ea7e16fc678..1ea19a57cf4c 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -146,6 +146,9 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_siefp(val) rdmsrl(HV_X64_MSR_SIEFP, val) #define hv_set_siefp(val) wrmsrl(HV_X64_MSR_SIEFP, val) +#define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val) +#define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val) + void hyperv_callback_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 5680aeed585c..7cb036d4b243 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -294,10 +294,10 @@ int hv_synic_init(unsigned int cpu) wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); /* Enable the global synic bit */ - rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); + hv_get_synic_state(sctrl.as_uint64); sctrl.enable = 1; - wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); + hv_set_synic_state(sctrl.as_uint64); hv_context.synic_initialized = true; @@ -405,9 +405,9 @@ int hv_synic_cleanup(unsigned int cpu) hv_set_siefp(siefp.as_uint64); /* Disable the global synic bit */ - rdmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); + hv_get_synic_state(sctrl.as_uint64); sctrl.enable = 0; - wrmsrl(HV_X64_MSR_SCONTROL, sctrl.as_uint64); + hv_set_synic_state(sctrl.as_uint64); return 0; } -- cgit v1.2.3 From 7297ff0ca9db7e2d830841035b95d8b94b529142 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:57 -0700 Subject: Drivers: hv: vmbus: Define an API to retrieve virtual processor index As part of cleaning up architecture specific code, define an API to retrieve the virtual procesor index. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 2 ++ drivers/hv/hv.c | 2 +- 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 1ea19a57cf4c..2d40bfc57e7a 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -149,6 +149,8 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_synic_state(val) rdmsrl(HV_X64_MSR_SCONTROL, val) #define hv_set_synic_state(val) wrmsrl(HV_X64_MSR_SCONTROL, val) +#define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index) + void hyperv_callback_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 7cb036d4b243..945719026223 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -306,7 +306,7 @@ int hv_synic_init(unsigned int cpu) * of cpuid and Linux' notion of cpuid. * This array will be indexed using Linux cpuid. */ - rdmsrl(HV_X64_MSR_VP_INDEX, vp_index); + hv_get_vp_index(vp_index); hv_context.vp_index[cpu] = (u32)vp_index; /* -- cgit v1.2.3 From 37e11d5c7052a5ca55ef807731c75218ea341b4c Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Thu, 19 Jan 2017 11:51:58 -0700 Subject: Drivers: hv: vmbus: Define an APIs to manage interrupt state As part of cleaning up architecture specific code, define APIs to manage interrupt state. Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 3 +++ drivers/hv/hv.c | 12 ++++++++---- 2 files changed, 11 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 2d40bfc57e7a..42505d1158d6 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -151,6 +151,9 @@ static inline void vmbus_signal_eom(struct hv_message *msg, u32 old_msg_type) #define hv_get_vp_index(index) rdmsrl(HV_X64_MSR_VP_INDEX, index) +#define hv_get_synint_state(int_num, val) rdmsrl(int_num, val) +#define hv_set_synint_state(int_num, val) wrmsrl(int_num, val) + void hyperv_callback_vector(void); #ifdef CONFIG_TRACING #define trace_hyperv_callback_vector hyperv_callback_vector diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 945719026223..60594fa3250d 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -284,14 +284,16 @@ int hv_synic_init(unsigned int cpu) hv_set_siefp(siefp.as_uint64); /* Setup the shared SINT. */ - rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + hv_get_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, + shared_sint.as_uint64); shared_sint.as_uint64 = 0; shared_sint.vector = HYPERVISOR_CALLBACK_VECTOR; shared_sint.masked = false; shared_sint.auto_eoi = true; - wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + hv_set_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, + shared_sint.as_uint64); /* Enable the global synic bit */ hv_get_synic_state(sctrl.as_uint64); @@ -384,13 +386,15 @@ int hv_synic_cleanup(unsigned int cpu) hv_ce_shutdown(hv_context.clk_evt[cpu]); } - rdmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + hv_get_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, + shared_sint.as_uint64); shared_sint.masked = 1; /* Need to correctly cleanup in the case of SMP!!! */ /* Disable the interrupt */ - wrmsrl(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, shared_sint.as_uint64); + hv_set_synint_state(HV_X64_MSR_SINT0 + VMBUS_MESSAGE_SINT, + shared_sint.as_uint64); hv_get_simp(simp.as_uint64); simp.simp_enabled = 0; -- cgit v1.2.3 From 0b4c208d443ba2af82b4c70f99ca8df31e9a0020 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Tue, 20 Dec 2016 16:34:50 -0800 Subject: Revert "KVM: nested VMX: disable perf cpuid reporting" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit bc6134942dbbf31c25e9bd7c876be5da81c9e1ce. A CPUID instruction executed in VMX non-root mode always causes a VM-exit, regardless of the leaf being queried. Fixes: bc6134942dbb ("KVM: nested VMX: disable perf cpuid reporting") Signed-off-by: Jim Mattson [The issue solved by bc6134942dbb has been resolved with ff651cb613b4 ("KVM: nVMX: Add nested msr load/restore algorithm").] Signed-off-by: Radim Krčmář --- arch/x86/kvm/cpuid.c | 6 ------ arch/x86/kvm/vmx.c | 2 -- 2 files changed, 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 09c2ac741567..c0e2036217ad 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -861,12 +861,6 @@ void kvm_cpuid(struct kvm_vcpu *vcpu, u32 *eax, u32 *ebx, u32 *ecx, u32 *edx) if (!best) best = check_cpuid_limit(vcpu, function, index); - /* - * Perfmon not yet supported for L2 guest. - */ - if (is_guest_mode(vcpu) && function == 0xa) - best = NULL; - if (best) { *eax = best->eax; *ebx = best->ebx; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4e691035a32d..c7bafa1457e2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8203,8 +8203,6 @@ static bool nested_vmx_exit_handled(struct kvm_vcpu *vcpu) case EXIT_REASON_TASK_SWITCH: return true; case EXIT_REASON_CPUID: - if (kvm_register_read(vcpu, VCPU_REGS_RAX) == 0xa) - return false; return true; case EXIT_REASON_HLT: return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING); -- cgit v1.2.3 From 4c45c5167c9563b1a2eee3e2fe954621355e4ca8 Mon Sep 17 00:00:00 2001 From: Jiri Slaby Date: Thu, 19 Jan 2017 12:47:30 +0100 Subject: x86/timer: Make delay() work during early bootup When a panic happens during bootup, "Rebooting in X seconds.." is shown, but reboot happens immediatelly. It is because panic() uses mdelay() and mdelay() calls __const_udelay() immediately, which does not work while booting. The per_cpu cpu_info.loops_per_jiffy value is not initialized yet, so __const_udelay() actually multiplies the number of loops by zero. This results in __const_udelay() to delay the execution only by a nanosecond or so. So check whether cpu_info.loops_per_jiffy is zero and use loops_per_jiffy in that case. mdelay() will not be so precise without proper calibration, but it works relatively well. Before: [ 0.170039] delaying 100ms [ 0.170828] done After [ 0.214042] delaying 100ms [ 0.313974] done I do not think the added check matters given we are about to spin the processor in the next few hundred cycles. Signed-off-by: Jiri Slaby Reviewed-by: Andy Shevchenko Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170119114730.2670-1-jslaby@suse.cz [ Minor edits. ] Signed-off-by: Ingo Molnar --- arch/x86/lib/delay.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/delay.c b/arch/x86/lib/delay.c index 073d1f1a620b..a8e91ae89fb3 100644 --- a/arch/x86/lib/delay.c +++ b/arch/x86/lib/delay.c @@ -156,13 +156,13 @@ EXPORT_SYMBOL(__delay); inline void __const_udelay(unsigned long xloops) { + unsigned long lpj = this_cpu_read(cpu_info.loops_per_jiffy) ? : loops_per_jiffy; int d0; xloops *= 4; asm("mull %%edx" :"=d" (xloops), "=&a" (d0) - :"1" (xloops), "0" - (this_cpu_read(cpu_info.loops_per_jiffy) * (HZ/4))); + :"1" (xloops), "0" (lpj * (HZ / 4))); __delay(++xloops); } -- cgit v1.2.3 From a8d4c8246b290ce97f88752d833804843041ac84 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Mon, 23 Jan 2017 14:48:23 +0800 Subject: x86/crash: Update the stale comment in reserve_crashkernel() CRASH_KERNEL_ADDR_MAX has been missing for a long time, update it with a more detailed explanation. Signed-off-by: Xunlei Pang Cc: Andrew Morton Cc: Baoquan He Cc: Borislav Petkov Cc: Dave Young Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Robert LeBlanc Cc: Thomas Gleixner Cc: kexec@lists.infradead.org Link: http://lkml.kernel.org/r/1485154103-18426-1-git-send-email-xlpang@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4cfba947d774..eb69b14dbfc8 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -575,7 +575,9 @@ static void __init reserve_crashkernel(void) /* 0 means: find the address automatically */ if (crash_base <= 0) { /* - * kexec want bzImage is below CRASH_KERNEL_ADDR_MAX + * Set CRASH_ADDR_LOW_MAX upper bound for crash memory, + * as old kexec-tools loads bzImage below that, unless + * "crashkernel=size[KMG],high" is specified. */ crash_base = memblock_find_in_range(CRASH_ALIGN, high ? CRASH_ADDR_HIGH_MAX -- cgit v1.2.3 From c26665ab5c49ad3e142e0f054ca3204f259ba09c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:40 +0100 Subject: x86/microcode/intel: Drop stashed AP patch pointer optimization This was meant to save us the scanning of the microcode containter in the initrd since the first AP had already done that but it can also hurt us: Imagine a single hyperthreaded CPU (Intel(R) Atom(TM) CPU N270, for example) which updates the microcode on the BSP but since the microcode engine is shared between the two threads, the update on CPU1 doesn't happen because it has already happened on CPU0 and we don't find a newer microcode revision on CPU1. Which doesn't set the intel_ucode_patch pointer and at initrd jettisoning time we don't save the microcode patch for later application. Now, when we suspend to RAM, the loaded microcode gets cleared so we need to reload but there's no patch saved in the cache. Removing the optimization fixes this issue and all is fine and dandy. Fixes: 06b8534cb728 ("x86/microcode: Rework microcode loading") Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170120202955.4091-2-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/intel.c | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/intel.c b/arch/x86/kernel/cpu/microcode/intel.c index 3f329b74e040..8325d8a09ab0 100644 --- a/arch/x86/kernel/cpu/microcode/intel.c +++ b/arch/x86/kernel/cpu/microcode/intel.c @@ -41,7 +41,7 @@ static const char ucode_path[] = "kernel/x86/microcode/GenuineIntel.bin"; -/* Current microcode patch used in early patching */ +/* Current microcode patch used in early patching on the APs. */ struct microcode_intel *intel_ucode_patch; static inline bool cpu_signatures_match(unsigned int s1, unsigned int p1, @@ -607,12 +607,6 @@ int __init save_microcode_in_initrd_intel(void) struct ucode_cpu_info uci; struct cpio_data cp; - /* - * AP loading didn't find any microcode patch, no need to save anything. - */ - if (!intel_ucode_patch || IS_ERR(intel_ucode_patch)) - return 0; - if (!load_builtin_intel_microcode(&cp)) cp = find_microcode_in_initrd(ucode_path, false); @@ -628,7 +622,6 @@ int __init save_microcode_in_initrd_intel(void) return 0; } - /* * @res_patch, output: a pointer to the patch we found. */ -- cgit v1.2.3 From a585df8edabdb47ae25214ebb3a627ca7ce800d3 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:41 +0100 Subject: x86/MSR: Carve out bare minimum accessors Add __rdmsr() and __wrmsr() which *only* read and write an MSR with exception handling. Those are going to be used in early code, like the microcode loader, which cannot stomach tracing code piggybacking on the MSR operation. While at it, get rid of __native_write_msr_notrace(). Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170120202955.4091-3-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/apic.h | 2 +- arch/x86/include/asm/msr.h | 51 +++++++++++++++++++++++++++------------------ 2 files changed, 32 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index 0c5fbc68e82d..eff8e36aaf72 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -195,7 +195,7 @@ static inline void native_apic_msr_write(u32 reg, u32 v) static inline void native_apic_msr_eoi_write(u32 reg, u32 v) { - wrmsr_notrace(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0); + __wrmsr(APIC_BASE_MSR + (APIC_EOI >> 4), APIC_EOI_ACK, 0); } static inline u32 native_apic_msr_read(u32 reg) diff --git a/arch/x86/include/asm/msr.h b/arch/x86/include/asm/msr.h index db0b90c3b03e..898dba2e2e2c 100644 --- a/arch/x86/include/asm/msr.h +++ b/arch/x86/include/asm/msr.h @@ -80,7 +80,14 @@ static inline void do_trace_read_msr(unsigned int msr, u64 val, int failed) {} static inline void do_trace_rdpmc(unsigned int msr, u64 val, int failed) {} #endif -static inline unsigned long long native_read_msr(unsigned int msr) +/* + * __rdmsr() and __wrmsr() are the two primitives which are the bare minimum MSR + * accessors and should not have any tracing or other functionality piggybacking + * on them - those are *purely* for accessing MSRs and nothing more. So don't even + * think of extending them - you will be slapped with a stinking trout or a frozen + * shark will reach you, wherever you are! You've been warned. + */ +static inline unsigned long long notrace __rdmsr(unsigned int msr) { DECLARE_ARGS(val, low, high); @@ -88,11 +95,30 @@ static inline unsigned long long native_read_msr(unsigned int msr) "2:\n" _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_rdmsr_unsafe) : EAX_EDX_RET(val, low, high) : "c" (msr)); - if (msr_tracepoint_active(__tracepoint_read_msr)) - do_trace_read_msr(msr, EAX_EDX_VAL(val, low, high), 0); + return EAX_EDX_VAL(val, low, high); } +static inline void notrace __wrmsr(unsigned int msr, u32 low, u32 high) +{ + asm volatile("1: wrmsr\n" + "2:\n" + _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) + : : "c" (msr), "a"(low), "d" (high) : "memory"); +} + +static inline unsigned long long native_read_msr(unsigned int msr) +{ + unsigned long long val; + + val = __rdmsr(msr); + + if (msr_tracepoint_active(__tracepoint_read_msr)) + do_trace_read_msr(msr, val, 0); + + return val; +} + static inline unsigned long long native_read_msr_safe(unsigned int msr, int *err) { @@ -114,31 +140,16 @@ static inline unsigned long long native_read_msr_safe(unsigned int msr, return EAX_EDX_VAL(val, low, high); } -/* Can be uninlined because referenced by paravirt */ -static inline void notrace -__native_write_msr_notrace(unsigned int msr, u32 low, u32 high) -{ - asm volatile("1: wrmsr\n" - "2:\n" - _ASM_EXTABLE_HANDLE(1b, 2b, ex_handler_wrmsr_unsafe) - : : "c" (msr), "a"(low), "d" (high) : "memory"); -} - /* Can be uninlined because referenced by paravirt */ static inline void notrace native_write_msr(unsigned int msr, u32 low, u32 high) { - __native_write_msr_notrace(msr, low, high); + __wrmsr(msr, low, high); + if (msr_tracepoint_active(__tracepoint_write_msr)) do_trace_write_msr(msr, ((u64)high << 32 | low), 0); } -static inline void -wrmsr_notrace(unsigned int msr, u32 low, u32 high) -{ - __native_write_msr_notrace(msr, low, high); -} - /* Can be uninlined because referenced by paravirt */ static inline int notrace native_write_msr_safe(unsigned int msr, u32 low, u32 high) -- cgit v1.2.3 From 0c12d18ab96e4da9f3e963bc242689bdeaaf2330 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:42 +0100 Subject: x86/microcode: Convert to bare minimum MSR accessors Having tracepoints to the MSR accessors makes them unsuitable for early microcode loading: think 32-bit before paging is enabled and us chasing pointers to test whether a tracepoint is enabled or not. Results in a reliable triple fault. Convert to the bare ones. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170120202955.4091-4-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/microcode.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 38711df3bcb5..90b22bbdfce9 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -7,18 +7,17 @@ #define native_rdmsr(msr, val1, val2) \ do { \ - u64 __val = native_read_msr((msr)); \ + u64 __val = __rdmsr((msr)); \ (void)((val1) = (u32)__val); \ (void)((val2) = (u32)(__val >> 32)); \ } while (0) #define native_wrmsr(msr, low, high) \ - native_write_msr(msr, low, high) + __wrmsr(msr, low, high) #define native_wrmsrl(msr, val) \ - native_write_msr((msr), \ - (u32)((u64)(val)), \ - (u32)((u64)(val) >> 32)) + __wrmsr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)) struct ucode_patch { struct list_head plist; -- cgit v1.2.3 From 1f02ac0682055418753fe30e6433dcb11fd03e1c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:43 +0100 Subject: x86/microcode/AMD: Clean up find_equiv_id() No need to have it marked "inline" - let gcc decide. Also, shorten the argument name and simplify while-test. While at it, make it into a proper for-loop and simplify it even more, as tglx suggests. No functionality change. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170120202955.4091-5-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 17 +++++------------ 1 file changed, 5 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 6a31e2691f3a..5c1509a38048 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -97,20 +97,13 @@ static size_t compute_container_size(u8 *data, u32 total_size) return size; } -static inline u16 find_equiv_id(struct equiv_cpu_entry *equiv_cpu_table, - unsigned int sig) +static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig) { - int i = 0; - - if (!equiv_cpu_table) - return 0; - - while (equiv_cpu_table[i].installed_cpu != 0) { - if (sig == equiv_cpu_table[i].installed_cpu) - return equiv_cpu_table[i].equiv_cpu; - - i++; + for (; equiv_table && equiv_table->installed_cpu; equiv_table++) { + if (sig == equiv_table->installed_cpu) + return equiv_table->equiv_cpu; } + return 0; } -- cgit v1.2.3 From ef901dc33dfa9abffe7d954628b5ec7ba7f62aec Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:44 +0100 Subject: x86/microcode/AMD: Shorten function parameter's name The whole driver calls this "mc", do that here too. No functionality change. Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170120202955.4091-6-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 5c1509a38048..1691f41fcfdb 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -187,15 +187,15 @@ find_proper_container(u8 *ucode, size_t size, struct container *ret_cont) return eq_id; } -static int __apply_microcode_amd(struct microcode_amd *mc_amd) +static int __apply_microcode_amd(struct microcode_amd *mc) { u32 rev, dummy; - native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc_amd->hdr.data_code); + native_wrmsrl(MSR_AMD64_PATCH_LOADER, (u64)(long)&mc->hdr.data_code); /* verify patch application was successful */ native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (rev != mc_amd->hdr.patch_id) + if (rev != mc->hdr.patch_id) return -1; return 0; -- cgit v1.2.3 From f454177f739e92620d84a1f42f91b03007a1cdd0 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:45 +0100 Subject: x86/microcode/AMD: Extend the container struct Make it into a container descriptor which is being passed around and stores important info like the matching container and the patch for the current CPU. Make it static too. Later patches will use this and thus get rid of a double container parsing. Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170120202955.4091-7-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 26 +++++++++++++++----------- 1 file changed, 15 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 1691f41fcfdb..4e2238345308 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -42,11 +42,15 @@ static struct equiv_cpu_entry *equiv_cpu_table; /* * This points to the current valid container of microcode patches which we will - * save from the initrd/builtin before jettisoning its contents. + * save from the initrd/builtin before jettisoning its contents. @mc is the + * microcode patch we found to match. */ -struct container { - u8 *data; - size_t size; +static struct cont_desc { + struct microcode_amd *mc; + u32 psize; + u16 eq_id; + u8 *data; + size_t size; } cont; static u32 ucode_new_rev; @@ -113,9 +117,9 @@ static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig) * table or 0 if none found. */ static u16 -find_proper_container(u8 *ucode, size_t size, struct container *ret_cont) +find_proper_container(u8 *ucode, size_t size, struct cont_desc *desc) { - struct container ret = { NULL, 0 }; + struct cont_desc ret = { 0 }; u32 eax, ebx, ecx, edx; struct equiv_cpu_entry *eq; int offset, left; @@ -158,7 +162,7 @@ find_proper_container(u8 *ucode, size_t size, struct container *ret_cont) */ left = ret.size - offset; - *ret_cont = ret; + *desc = ret; return eq_id; } @@ -213,11 +217,11 @@ static int __apply_microcode_amd(struct microcode_amd *mc) * Returns true if container found (sets @ret_cont), false otherwise. */ static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, - struct container *ret_cont) + struct cont_desc *desc) { u8 (*patch)[PATCH_MAX_SIZE]; u32 rev, *header, *new_rev; - struct container ret; + struct cont_desc ret; int offset, left; u16 eq_id = 0; u8 *data; @@ -270,8 +274,8 @@ static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, left -= offset; } - if (ret_cont) - *ret_cont = ret; + if (desc) + *desc = ret; return true; } -- cgit v1.2.3 From 8801b3fcb5744dc536272ba4ccfd6faca9b46705 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:46 +0100 Subject: x86/microcode/AMD: Rework container parsing It was pretty clumsy before and the whole work of parsing the microcode containers was spread around the functions wrongly. Clean it up so that there's a main scan_containers() function which iterates over the microcode blob and picks apart the containers glued together. For each container, it calls a parse_container() helper which concentrates on one container only: sanity-checking, parsing, counting microcode patches in there, etc. It makes much more sense now and it is actually very readable. Oh, and we luvz a diffstat removing more crap than adding. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170120202955.4091-8-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 238 ++++++++++++++++-------------------- 1 file changed, 105 insertions(+), 133 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 4e2238345308..9d5f4f3626f7 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -64,43 +64,6 @@ static u16 this_equiv_id; static const char ucode_path[] __maybe_unused = "kernel/x86/microcode/AuthenticAMD.bin"; -static size_t compute_container_size(u8 *data, u32 total_size) -{ - size_t size = 0; - u32 *header = (u32 *)data; - - if (header[0] != UCODE_MAGIC || - header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ - header[2] == 0) /* size */ - return size; - - size = header[2] + CONTAINER_HDR_SZ; - total_size -= size; - data += size; - - while (total_size) { - u16 patch_size; - - header = (u32 *)data; - - if (header[0] != UCODE_UCODE_TYPE) - break; - - /* - * Sanity-check patch size. - */ - patch_size = header[1]; - if (patch_size > PATCH_MAX_SIZE) - break; - - size += patch_size + SECTION_HDR_SIZE; - data += patch_size + SECTION_HDR_SIZE; - total_size -= patch_size + SECTION_HDR_SIZE; - } - - return size; -} - static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig) { for (; equiv_table && equiv_table->installed_cpu; equiv_table++) { @@ -115,80 +78,106 @@ static u16 find_equiv_id(struct equiv_cpu_entry *equiv_table, u32 sig) * This scans the ucode blob for the proper container as we can have multiple * containers glued together. Returns the equivalence ID from the equivalence * table or 0 if none found. + * Returns the amount of bytes consumed while scanning. @desc contains all the + * data we're going to use in later stages of the application. */ -static u16 -find_proper_container(u8 *ucode, size_t size, struct cont_desc *desc) +static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) { - struct cont_desc ret = { 0 }; - u32 eax, ebx, ecx, edx; struct equiv_cpu_entry *eq; - int offset, left; - u16 eq_id = 0; - u32 *header; - u8 *data; + ssize_t orig_size = size; + u32 *hdr = (u32 *)ucode; + u32 eax, ebx, ecx, edx; + u16 eq_id; + u8 *buf; - data = ucode; - left = size; - header = (u32 *)data; + /* Am I looking at an equivalence table header? */ + if (hdr[0] != UCODE_MAGIC || + hdr[1] != UCODE_EQUIV_CPU_TABLE_TYPE || + hdr[2] == 0) { + desc->eq_id = 0; + return CONTAINER_HDR_SZ; + } + buf = ucode; - /* find equiv cpu table */ - if (header[0] != UCODE_MAGIC || - header[1] != UCODE_EQUIV_CPU_TABLE_TYPE || /* type */ - header[2] == 0) /* size */ - return eq_id; + eq = (struct equiv_cpu_entry *)(buf + CONTAINER_HDR_SZ); - eax = 0x00000001; + eax = 1; ecx = 0; native_cpuid(&eax, &ebx, &ecx, &edx); - while (left > 0) { - eq = (struct equiv_cpu_entry *)(data + CONTAINER_HDR_SZ); - - ret.data = data; + /* Find the equivalence ID of our CPU in this table: */ + eq_id = find_equiv_id(eq, eax); - /* Advance past the container header */ - offset = header[2] + CONTAINER_HDR_SZ; - data += offset; - left -= offset; + buf += hdr[2] + CONTAINER_HDR_SZ; + size -= hdr[2] + CONTAINER_HDR_SZ; - eq_id = find_equiv_id(eq, eax); - if (eq_id) { - ret.size = compute_container_size(ret.data, left + offset); + /* + * Scan through the rest of the container to find where it ends. We do + * some basic sanity-checking too. + */ + while (size > 0) { + struct microcode_amd *mc; + u32 patch_size; - /* - * truncate how much we need to iterate over in the - * ucode update loop below - */ - left = ret.size - offset; + hdr = (u32 *)buf; - *desc = ret; - return eq_id; - } + if (hdr[0] != UCODE_UCODE_TYPE) + break; - /* - * support multiple container files appended together. if this - * one does not have a matching equivalent cpu entry, we fast - * forward to the next container file. - */ - while (left > 0) { - header = (u32 *)data; + /* Sanity-check patch size. */ + patch_size = hdr[1]; + if (patch_size > PATCH_MAX_SIZE) + break; - if (header[0] == UCODE_MAGIC && - header[1] == UCODE_EQUIV_CPU_TABLE_TYPE) - break; + /* Skip patch section header: */ + buf += SECTION_HDR_SIZE; + size -= SECTION_HDR_SIZE; - offset = header[1] + SECTION_HDR_SIZE; - data += offset; - left -= offset; + mc = (struct microcode_amd *)buf; + if (eq_id == mc->hdr.processor_rev_id) { + desc->psize = patch_size; + desc->mc = mc; } - /* mark where the next microcode container file starts */ - offset = data - (u8 *)ucode; - ucode = data; + buf += patch_size; + size -= patch_size; } - return eq_id; + /* + * If we have found a patch (desc->mc), it means we're looking at the + * container which has a patch for this CPU so return 0 to mean, @ucode + * already points to the proper container. Otherwise, we return the size + * we scanned so that we can advance to the next container in the + * buffer. + */ + if (desc->mc) { + desc->eq_id = eq_id; + desc->data = ucode; + desc->size = orig_size - size; + + return 0; + } + + return orig_size - size; +} + +/* + * Scan the ucode blob for the proper container as we can have multiple + * containers glued together. + */ +static void scan_containers(u8 *ucode, size_t size, struct cont_desc *desc) +{ + ssize_t rem = size; + + while (rem >= 0) { + ssize_t s = parse_container(ucode, rem, desc); + if (!s) + return; + + ucode += s; + rem -= s; + } } static int __apply_microcode_amd(struct microcode_amd *mc) @@ -214,17 +203,16 @@ static int __apply_microcode_amd(struct microcode_amd *mc) * load_microcode_amd() to save equivalent cpu table and microcode patches in * kernel heap memory. * - * Returns true if container found (sets @ret_cont), false otherwise. + * Returns true if container found (sets @desc), false otherwise. */ static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, - struct cont_desc *desc) + struct cont_desc *ret_desc) { + struct cont_desc desc = { 0 }; u8 (*patch)[PATCH_MAX_SIZE]; - u32 rev, *header, *new_rev; - struct cont_desc ret; - int offset, left; - u16 eq_id = 0; - u8 *data; + struct microcode_amd *mc; + u32 rev, *new_rev; + bool ret = false; #ifdef CONFIG_X86_32 new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); @@ -237,47 +225,31 @@ static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, if (check_current_patch_level(&rev, true)) return false; - eq_id = find_proper_container(ucode, size, &ret); - if (!eq_id) - return false; - - this_equiv_id = eq_id; - header = (u32 *)ret.data; - - /* We're pointing to an equiv table, skip over it. */ - data = ret.data + header[2] + CONTAINER_HDR_SZ; - left = ret.size - (header[2] + CONTAINER_HDR_SZ); - - while (left > 0) { - struct microcode_amd *mc; - - header = (u32 *)data; - if (header[0] != UCODE_UCODE_TYPE || /* type */ - header[1] == 0) /* size */ - break; + scan_containers(ucode, size, &desc); + if (!desc.eq_id) + return ret; - mc = (struct microcode_amd *)(data + SECTION_HDR_SIZE); + this_equiv_id = desc.eq_id; - if (eq_id == mc->hdr.processor_rev_id && rev < mc->hdr.patch_id) { + mc = desc.mc; + if (!mc) + return ret; - if (!__apply_microcode_amd(mc)) { - rev = mc->hdr.patch_id; - *new_rev = rev; + if (rev >= mc->hdr.patch_id) + return ret; - if (save_patch) - memcpy(patch, mc, min_t(u32, header[1], PATCH_MAX_SIZE)); - } - } + if (!__apply_microcode_amd(mc)) { + *new_rev = mc->hdr.patch_id; + ret = true; - offset = header[1] + SECTION_HDR_SIZE; - data += offset; - left -= offset; + if (save_patch) + memcpy(patch, mc, min_t(u32, desc.psize, PATCH_MAX_SIZE)); } - if (desc) - *desc = ret; + if (ret_desc) + *ret_desc = desc; - return true; + return ret; } static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) @@ -396,6 +368,7 @@ reget: } if (!apply_microcode_early_amd(cp.data, cp.size, false, &cont)) { + cont.data = NULL; cont.size = -1; return; } @@ -434,7 +407,6 @@ int __init save_microcode_in_initrd_amd(unsigned int fam) { enum ucode_state ret; int retval = 0; - u16 eq_id; if (!cont.data) { if (IS_ENABLED(CONFIG_X86_32) && (cont.size != -1)) { @@ -450,8 +422,8 @@ int __init save_microcode_in_initrd_amd(unsigned int fam) return -EINVAL; } - eq_id = find_proper_container(cp.data, cp.size, &cont); - if (!eq_id) { + scan_containers(cp.data, cp.size, &cont); + if (!cont.eq_id) { cont.size = -1; return -EINVAL; } -- cgit v1.2.3 From 309aac77768c0c689eac4900ca8a6e9ef470035c Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:47 +0100 Subject: x86/microcode: Decrease CPUID use Get CPUID(1).EAX value once per CPU and propagate value into the callers instead of conveniently calling it every time. Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170120202955.4091-9-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 52 +++++++++++++++++------------------- arch/x86/kernel/cpu/microcode/core.c | 38 ++++++++++---------------- 2 files changed, 38 insertions(+), 52 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 9d5f4f3626f7..9fb398e64b0a 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -47,6 +47,7 @@ static struct equiv_cpu_entry *equiv_cpu_table; */ static struct cont_desc { struct microcode_amd *mc; + u32 cpuid_1_eax; u32 psize; u16 eq_id; u8 *data; @@ -86,7 +87,6 @@ static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) struct equiv_cpu_entry *eq; ssize_t orig_size = size; u32 *hdr = (u32 *)ucode; - u32 eax, ebx, ecx, edx; u16 eq_id; u8 *buf; @@ -102,12 +102,8 @@ static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) eq = (struct equiv_cpu_entry *)(buf + CONTAINER_HDR_SZ); - eax = 1; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - /* Find the equivalence ID of our CPU in this table: */ - eq_id = find_equiv_id(eq, eax); + eq_id = find_equiv_id(eq, desc->cpuid_1_eax); buf += hdr[2] + CONTAINER_HDR_SZ; size -= hdr[2] + CONTAINER_HDR_SZ; @@ -205,8 +201,9 @@ static int __apply_microcode_amd(struct microcode_amd *mc) * * Returns true if container found (sets @desc), false otherwise. */ -static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, - struct cont_desc *ret_desc) +static bool +apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, + bool save_patch, struct cont_desc *ret_desc) { struct cont_desc desc = { 0 }; u8 (*patch)[PATCH_MAX_SIZE]; @@ -225,6 +222,8 @@ static bool apply_microcode_early_amd(void *ucode, size_t size, bool save_patch, if (check_current_patch_level(&rev, true)) return false; + desc.cpuid_1_eax = cpuid_1_eax; + scan_containers(ucode, size, &desc); if (!desc.eq_id) return ret; @@ -267,10 +266,9 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) #endif } -void __init load_ucode_amd_bsp(unsigned int family) +void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) { struct ucode_cpu_info *uci; - u32 eax, ebx, ecx, edx; struct cpio_data cp; const char *path; bool use_pa; @@ -285,19 +283,16 @@ void __init load_ucode_amd_bsp(unsigned int family) use_pa = false; } - if (!get_builtin_microcode(&cp, family)) + if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax))) cp = find_microcode_in_initrd(path, use_pa); if (!(cp.data && cp.size)) return; - /* Get BSP's CPUID.EAX(1), needed in load_microcode_amd() */ - eax = 1; - ecx = 0; - native_cpuid(&eax, &ebx, &ecx, &edx); - uci->cpu_sig.sig = eax; + /* Needed in load_microcode_amd() */ + uci->cpu_sig.sig = cpuid_1_eax; - apply_microcode_early_amd(cp.data, cp.size, true, NULL); + apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true, NULL); } #ifdef CONFIG_X86_32 @@ -308,7 +303,7 @@ void __init load_ucode_amd_bsp(unsigned int family) * In save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, * which is used upon resume from suspend. */ -void load_ucode_amd_ap(unsigned int family) +void load_ucode_amd_ap(unsigned int cpuid_1_eax) { struct microcode_amd *mc; struct cpio_data cp; @@ -319,7 +314,7 @@ void load_ucode_amd_ap(unsigned int family) return; } - if (!get_builtin_microcode(&cp, family)) + if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax))) cp = find_microcode_in_initrd((const char *)__pa_nodebug(ucode_path), true); if (!(cp.data && cp.size)) @@ -329,14 +324,14 @@ void load_ucode_amd_ap(unsigned int family) * This would set amd_ucode_patch above so that the following APs can * use it directly instead of going down this path again. */ - apply_microcode_early_amd(cp.data, cp.size, true, NULL); + apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true, NULL); } #else -void load_ucode_amd_ap(unsigned int family) +void load_ucode_amd_ap(unsigned int cpuid_1_eax) { struct equiv_cpu_entry *eq; struct microcode_amd *mc; - u32 rev, eax; + u32 rev; u16 eq_id; /* 64-bit runs with paging enabled, thus early==false. */ @@ -351,7 +346,7 @@ void load_ucode_amd_ap(unsigned int family) return; reget: - if (!get_builtin_microcode(&cp, family)) { + if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax))) { #ifdef CONFIG_BLK_DEV_INITRD cp = find_cpio_data(ucode_path, (void *)initrd_start, initrd_end - initrd_start, NULL); @@ -367,17 +362,16 @@ reget: } } - if (!apply_microcode_early_amd(cp.data, cp.size, false, &cont)) { + if (!apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false, &cont)) { cont.data = NULL; cont.size = -1; return; } } - eax = cpuid_eax(0x00000001); eq = (struct equiv_cpu_entry *)(cont.data + CONTAINER_HDR_SZ); - eq_id = find_equiv_id(eq, eax); + eq_id = find_equiv_id(eq, cpuid_1_eax); if (!eq_id) return; @@ -403,7 +397,7 @@ reget: static enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); -int __init save_microcode_in_initrd_amd(unsigned int fam) +int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) { enum ucode_state ret; int retval = 0; @@ -422,6 +416,8 @@ int __init save_microcode_in_initrd_amd(unsigned int fam) return -EINVAL; } + cont.cpuid_1_eax = cpuid_1_eax; + scan_containers(cp.data, cp.size, &cont); if (!cont.eq_id) { cont.size = -1; @@ -432,7 +428,7 @@ int __init save_microcode_in_initrd_amd(unsigned int fam) return -EINVAL; } - ret = load_microcode_amd(smp_processor_id(), fam, cont.data, cont.size); + ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax), cont.data, cont.size); if (ret != UCODE_OK) retval = -EINVAL; diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 2af69d27da62..437996c9be67 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -64,10 +64,6 @@ static DEFINE_MUTEX(microcode_mutex); struct ucode_cpu_info ucode_cpu_info[NR_CPUS]; -/* - * Operations that are run on a target cpu: - */ - struct cpu_info_ctx { struct cpu_signature *cpu_sig; int err; @@ -76,7 +72,6 @@ struct cpu_info_ctx { static bool __init check_loader_disabled_bsp(void) { static const char *__dis_opt_str = "dis_ucode_ldr"; - u32 a, b, c, d; #ifdef CONFIG_X86_32 const char *cmdline = (const char *)__pa_nodebug(boot_command_line); @@ -92,16 +87,12 @@ static bool __init check_loader_disabled_bsp(void) if (!have_cpuid_p()) return *res; - a = 1; - c = 0; - native_cpuid(&a, &b, &c, &d); - /* * CPUID(1).ECX[31]: reserved for hypervisor use. This is still not * completely accurate as xen pv guests don't see that CPUID bit set but * that's good enough as they don't land on the BSP path anyway. */ - if (c & BIT(31)) + if (native_cpuid_ecx(1) & BIT(31)) return *res; if (cmdline_find_option_bool(cmdline, option) <= 0) @@ -131,23 +122,22 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name) void __init load_ucode_bsp(void) { - int vendor; - unsigned int family; + unsigned int vendor, cpuid_1_eax; if (check_loader_disabled_bsp()) return; - vendor = x86_cpuid_vendor(); - family = x86_cpuid_family(); + vendor = x86_cpuid_vendor(); + cpuid_1_eax = native_cpuid_eax(1); switch (vendor) { case X86_VENDOR_INTEL: - if (family >= 6) + if (x86_family(cpuid_1_eax) >= 6) load_ucode_intel_bsp(); break; case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_bsp(family); + if (x86_family(cpuid_1_eax) >= 0x10) + load_ucode_amd_bsp(cpuid_1_eax); break; default: break; @@ -165,22 +155,22 @@ static bool check_loader_disabled_ap(void) void load_ucode_ap(void) { - int vendor, family; + unsigned int vendor, cpuid_1_eax; if (check_loader_disabled_ap()) return; - vendor = x86_cpuid_vendor(); - family = x86_cpuid_family(); + vendor = x86_cpuid_vendor(); + cpuid_1_eax = native_cpuid_eax(1); switch (vendor) { case X86_VENDOR_INTEL: - if (family >= 6) + if (x86_family(cpuid_1_eax) >= 6) load_ucode_intel_ap(); break; case X86_VENDOR_AMD: - if (family >= 0x10) - load_ucode_amd_ap(family); + if (x86_family(cpuid_1_eax) >= 0x10) + load_ucode_amd_ap(cpuid_1_eax); break; default: break; @@ -198,7 +188,7 @@ static int __init save_microcode_in_initrd(void) break; case X86_VENDOR_AMD: if (c->x86 >= 0x10) - return save_microcode_in_initrd_amd(c->x86); + return save_microcode_in_initrd_amd(cpuid_eax(1)); break; default: break; -- cgit v1.2.3 From 3da9b41794590022d09caa345aaa7c812ac22971 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:48 +0100 Subject: x86/microcode/AMD: Get rid of global this_equiv_id We have a container which we update/prepare each time before applying a microcode patch instead of using a global. Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170120202955.4091-10-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 9fb398e64b0a..4cbfe70cf458 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -56,7 +56,6 @@ static struct cont_desc { static u32 ucode_new_rev; static u8 amd_ucode_patch[PATCH_MAX_SIZE]; -static u16 this_equiv_id; /* * Microcode patch container file is prepended to the initrd in cpio @@ -228,8 +227,6 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, if (!desc.eq_id) return ret; - this_equiv_id = desc.eq_id; - mc = desc.mc; if (!mc) return ret; @@ -375,7 +372,7 @@ reget: if (!eq_id) return; - if (eq_id == this_equiv_id) { + if (eq_id == cont.eq_id) { mc = (struct microcode_amd *)amd_ucode_patch; if (mc && rev < mc->hdr.patch_id) { -- cgit v1.2.3 From 8cc26e0b4c49246564f773edbbefa3d5dc91d56e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:49 +0100 Subject: x86/microcode/AMD: Use find_microcode_in_initrd() Use the generic helper instead of semi-open-coding the procedure. Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170120202955.4091-11-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 4cbfe70cf458..7727f278de58 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -337,17 +337,15 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax) /* First AP hasn't cached it yet, go through the blob. */ if (!cont.data) { - struct cpio_data cp = { NULL, 0, "" }; + struct cpio_data cp; if (cont.size == -1) return; reget: if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax))) { -#ifdef CONFIG_BLK_DEV_INITRD - cp = find_cpio_data(ucode_path, (void *)initrd_start, - initrd_end - initrd_start, NULL); -#endif + cp = find_microcode_in_initrd(ucode_path, false); + if (!(cp.data && cp.size)) { /* * Mark it so that other APs do not scan again @@ -401,13 +399,9 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) if (!cont.data) { if (IS_ENABLED(CONFIG_X86_32) && (cont.size != -1)) { - struct cpio_data cp = { NULL, 0, "" }; - -#ifdef CONFIG_BLK_DEV_INITRD - cp = find_cpio_data(ucode_path, (void *)initrd_start, - initrd_end - initrd_start, NULL); -#endif + struct cpio_data cp; + cp = find_microcode_in_initrd(ucode_path, false); if (!(cp.data && cp.size)) { cont.size = -1; return -EINVAL; -- cgit v1.2.3 From 7a93a40be23e5557934d773cc89b7b3627c08097 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:50 +0100 Subject: x86/microcode: Remove local vendor variable Use x86_cpuid_vendor() directly. No functionality change. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170120202955.4091-12-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/core.c | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 437996c9be67..dc54518299c4 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -122,15 +122,14 @@ bool get_builtin_firmware(struct cpio_data *cd, const char *name) void __init load_ucode_bsp(void) { - unsigned int vendor, cpuid_1_eax; + unsigned int cpuid_1_eax; if (check_loader_disabled_bsp()) return; - vendor = x86_cpuid_vendor(); cpuid_1_eax = native_cpuid_eax(1); - switch (vendor) { + switch (x86_cpuid_vendor()) { case X86_VENDOR_INTEL: if (x86_family(cpuid_1_eax) >= 6) load_ucode_intel_bsp(); @@ -155,15 +154,14 @@ static bool check_loader_disabled_ap(void) void load_ucode_ap(void) { - unsigned int vendor, cpuid_1_eax; + unsigned int cpuid_1_eax; if (check_loader_disabled_ap()) return; - vendor = x86_cpuid_vendor(); cpuid_1_eax = native_cpuid_eax(1); - switch (vendor) { + switch (x86_cpuid_vendor()) { case X86_VENDOR_INTEL: if (x86_family(cpuid_1_eax) >= 6) load_ucode_intel_ap(); -- cgit v1.2.3 From f3ad136d6ef966c8ba9090770c2bfe7e85f18471 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:51 +0100 Subject: x86/microcode/AMD: Check patch level only on the BSP Check final patch levels for AMD only on the BSP. This way, we decide early and only once whether to continue loading or to leave the loader disabled on such systems. Simplify a lot. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170120202955.4091-13-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/microcode_amd.h | 2 - arch/x86/kernel/cpu/microcode/amd.c | 78 +++++------------------------------- arch/x86/kernel/cpu/microcode/core.c | 41 +++++++++++++++++++ 3 files changed, 52 insertions(+), 69 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode_amd.h b/arch/x86/include/asm/microcode_amd.h index 3e3e20be829a..3d57009e168b 100644 --- a/arch/x86/include/asm/microcode_amd.h +++ b/arch/x86/include/asm/microcode_amd.h @@ -54,6 +54,4 @@ static inline int __init save_microcode_in_initrd_amd(unsigned int family) { return -EINVAL; } void reload_ucode_amd(void) {} #endif - -extern bool check_current_patch_level(u32 *rev, bool early); #endif /* _ASM_X86_MICROCODE_AMD_H */ diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 7727f278de58..61743476c25b 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -207,7 +207,7 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, struct cont_desc desc = { 0 }; u8 (*patch)[PATCH_MAX_SIZE]; struct microcode_amd *mc; - u32 rev, *new_rev; + u32 rev, dummy, *new_rev; bool ret = false; #ifdef CONFIG_X86_32 @@ -218,9 +218,6 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, patch = &amd_ucode_patch; #endif - if (check_current_patch_level(&rev, true)) - return false; - desc.cpuid_1_eax = cpuid_1_eax; scan_containers(ucode, size, &desc); @@ -231,6 +228,7 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, if (!mc) return ret; + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); if (rev >= mc->hdr.patch_id) return ret; @@ -328,13 +326,8 @@ void load_ucode_amd_ap(unsigned int cpuid_1_eax) { struct equiv_cpu_entry *eq; struct microcode_amd *mc; - u32 rev; u16 eq_id; - /* 64-bit runs with paging enabled, thus early==false. */ - if (check_current_patch_level(&rev, false)) - return; - /* First AP hasn't cached it yet, go through the blob. */ if (!cont.data) { struct cpio_data cp; @@ -371,6 +364,10 @@ reget: return; if (eq_id == cont.eq_id) { + u32 rev, dummy; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + mc = (struct microcode_amd *)amd_ucode_patch; if (mc && rev < mc->hdr.patch_id) { @@ -436,19 +433,14 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) void reload_ucode_amd(void) { struct microcode_amd *mc; - u32 rev; - - /* - * early==false because this is a syscore ->resume path and by - * that time paging is long enabled. - */ - if (check_current_patch_level(&rev, false)) - return; + u32 rev, dummy; mc = (struct microcode_amd *)amd_ucode_patch; if (!mc) return; + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); + if (rev < mc->hdr.patch_id) { if (!__apply_microcode_amd(mc)) { ucode_new_rev = mc->hdr.patch_id; @@ -586,60 +578,13 @@ static unsigned int verify_patch_size(u8 family, u32 patch_size, return patch_size; } -/* - * Those patch levels cannot be updated to newer ones and thus should be final. - */ -static u32 final_levels[] = { - 0x01000098, - 0x0100009f, - 0x010000af, - 0, /* T-101 terminator */ -}; - -/* - * Check the current patch level on this CPU. - * - * @rev: Use it to return the patch level. It is set to 0 in the case of - * error. - * - * Returns: - * - true: if update should stop - * - false: otherwise - */ -bool check_current_patch_level(u32 *rev, bool early) -{ - u32 lvl, dummy, i; - bool ret = false; - u32 *levels; - - native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); - - if (IS_ENABLED(CONFIG_X86_32) && early) - levels = (u32 *)__pa_nodebug(&final_levels); - else - levels = final_levels; - - for (i = 0; levels[i]; i++) { - if (lvl == levels[i]) { - lvl = 0; - ret = true; - break; - } - } - - if (rev) - *rev = lvl; - - return ret; -} - static int apply_microcode_amd(int cpu) { struct cpuinfo_x86 *c = &cpu_data(cpu); struct microcode_amd *mc_amd; struct ucode_cpu_info *uci; struct ucode_patch *p; - u32 rev; + u32 rev, dummy; BUG_ON(raw_smp_processor_id() != cpu); @@ -652,8 +597,7 @@ static int apply_microcode_amd(int cpu) mc_amd = p->data; uci->mc = p->data; - if (check_current_patch_level(&rev, false)) - return -1; + rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); /* need to apply patch? */ if (rev >= mc_amd->hdr.patch_id) { diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index dc54518299c4..3b74d2f315d3 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -69,6 +69,42 @@ struct cpu_info_ctx { int err; }; +/* + * Those patch levels cannot be updated to newer ones and thus should be final. + */ +static u32 final_levels[] = { + 0x01000098, + 0x0100009f, + 0x010000af, + 0, /* T-101 terminator */ +}; + +/* + * Check the current patch level on this CPU. + * + * Returns: + * - true: if update should stop + * - false: otherwise + */ +static bool amd_check_current_patch_level(void) +{ + u32 lvl, dummy, i; + u32 *levels; + + native_rdmsr(MSR_AMD64_PATCH_LEVEL, lvl, dummy); + + if (IS_ENABLED(CONFIG_X86_32)) + levels = (u32 *)__pa_nodebug(&final_levels); + else + levels = final_levels; + + for (i = 0; levels[i]; i++) { + if (lvl == levels[i]) + return true; + } + return false; +} + static bool __init check_loader_disabled_bsp(void) { static const char *__dis_opt_str = "dis_ucode_ldr"; @@ -95,6 +131,11 @@ static bool __init check_loader_disabled_bsp(void) if (native_cpuid_ecx(1) & BIT(31)) return *res; + if (x86_cpuid_vendor() == X86_VENDOR_AMD) { + if (amd_check_current_patch_level()) + return *res; + } + if (cmdline_find_option_bool(cmdline, option) <= 0) *res = false; -- cgit v1.2.3 From e71bb4ec073901ad50bfa86fed74fce7ac3210fe Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:52 +0100 Subject: x86/microcode/AMD: Unify load_ucode_amd_ap() Use a version for both bitness by adding a helper which does the actual container finding and parsing which can be used on any CPU - BSP or AP. Streamlines the paths more. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170120202955.4091-14-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 81 ++++++++++++++----------------------- 1 file changed, 31 insertions(+), 50 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 61743476c25b..fe9e865480af 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -261,7 +261,7 @@ static bool get_builtin_microcode(struct cpio_data *cp, unsigned int family) #endif } -void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) +void __load_ucode_amd(unsigned int cpuid_1_eax, struct cpio_data *ret) { struct ucode_cpu_info *uci; struct cpio_data cp; @@ -281,89 +281,71 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax))) cp = find_microcode_in_initrd(path, use_pa); - if (!(cp.data && cp.size)) - return; - /* Needed in load_microcode_amd() */ uci->cpu_sig.sig = cpuid_1_eax; - apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true, NULL); + *ret = cp; } -#ifdef CONFIG_X86_32 -/* - * On 32-bit, since AP's early load occurs before paging is turned on, we - * cannot traverse cpu_equiv_table and microcode_cache in kernel heap memory. - * So during cold boot, AP will apply_ucode_in_initrd() just like the BSP. - * In save_microcode_in_initrd_amd() BSP's patch is copied to amd_ucode_patch, - * which is used upon resume from suspend. - */ -void load_ucode_amd_ap(unsigned int cpuid_1_eax) +void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) { - struct microcode_amd *mc; - struct cpio_data cp; + struct cpio_data cp = { }; - mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); - if (mc->hdr.patch_id && mc->hdr.processor_rev_id) { - __apply_microcode_amd(mc); - return; - } - - if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax))) - cp = find_microcode_in_initrd((const char *)__pa_nodebug(ucode_path), true); + __load_ucode_amd(cpuid_1_eax, &cp); if (!(cp.data && cp.size)) return; - /* - * This would set amd_ucode_patch above so that the following APs can - * use it directly instead of going down this path again. - */ apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true, NULL); } -#else + void load_ucode_amd_ap(unsigned int cpuid_1_eax) { struct equiv_cpu_entry *eq; struct microcode_amd *mc; + struct cont_desc *desc; u16 eq_id; + if (IS_ENABLED(CONFIG_X86_32)) { + mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); + desc = (struct cont_desc *)__pa_nodebug(&cont); + } else { + mc = (struct microcode_amd *)amd_ucode_patch; + desc = &cont; + } + /* First AP hasn't cached it yet, go through the blob. */ - if (!cont.data) { - struct cpio_data cp; + if (!desc->data) { + struct cpio_data cp = { }; - if (cont.size == -1) + if (desc->size == -1) return; reget: - if (!get_builtin_microcode(&cp, x86_family(cpuid_1_eax))) { - cp = find_microcode_in_initrd(ucode_path, false); - - if (!(cp.data && cp.size)) { - /* - * Mark it so that other APs do not scan again - * for no real reason and slow down boot - * needlessly. - */ - cont.size = -1; - return; - } + __load_ucode_amd(cpuid_1_eax, &cp); + if (!(cp.data && cp.size)) { + /* + * Mark it so that other APs do not scan again for no + * real reason and slow down boot needlessly. + */ + desc->size = -1; + return; } - if (!apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false, &cont)) { - cont.data = NULL; - cont.size = -1; + if (!apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false, desc)) { + desc->data = NULL; + desc->size = -1; return; } } - eq = (struct equiv_cpu_entry *)(cont.data + CONTAINER_HDR_SZ); + eq = (struct equiv_cpu_entry *)(desc->data + CONTAINER_HDR_SZ); eq_id = find_equiv_id(eq, cpuid_1_eax); if (!eq_id) return; - if (eq_id == cont.eq_id) { + if (eq_id == desc->eq_id) { u32 rev, dummy; native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); @@ -384,7 +366,6 @@ reget: goto reget; } } -#endif /* CONFIG_X86_32 */ static enum ucode_state load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); -- cgit v1.2.3 From 72edfe950b36308353e27cdc02f334431239938a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:53 +0100 Subject: x86/microcode/AMD: Simplify saving from initrd No need to use the previously stashed info in the container - simply go ahead and parse the initrd once more. It simplifies and streamlines the code a whole lot. Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170120202955.4091-15-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 43 +++++++++++-------------------------- 1 file changed, 13 insertions(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index fe9e865480af..2a194e384207 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -372,43 +372,26 @@ load_microcode_amd(int cpu, u8 family, const u8 *data, size_t size); int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) { + struct cont_desc desc = { 0 }; enum ucode_state ret; - int retval = 0; - - if (!cont.data) { - if (IS_ENABLED(CONFIG_X86_32) && (cont.size != -1)) { - struct cpio_data cp; - - cp = find_microcode_in_initrd(ucode_path, false); - if (!(cp.data && cp.size)) { - cont.size = -1; - return -EINVAL; - } + struct cpio_data cp; - cont.cpuid_1_eax = cpuid_1_eax; + cp = find_microcode_in_initrd(ucode_path, false); + if (!(cp.data && cp.size)) + return -EINVAL; - scan_containers(cp.data, cp.size, &cont); - if (!cont.eq_id) { - cont.size = -1; - return -EINVAL; - } + desc.cpuid_1_eax = cpuid_1_eax; - } else - return -EINVAL; - } + scan_containers(cp.data, cp.size, &desc); + if (!desc.eq_id) + return -EINVAL; - ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax), cont.data, cont.size); + ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax), + desc.data, desc.size); if (ret != UCODE_OK) - retval = -EINVAL; - - /* - * This will be freed any msec now, stash patches for the current - * family and switch to patch cache for cpu hotplug, etc later. - */ - cont.data = NULL; - cont.size = 0; + return -EINVAL; - return retval; + return 0; } void reload_ucode_amd(void) -- cgit v1.2.3 From 69f5f983001f6d097aac774a9e917f44657f3367 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:54 +0100 Subject: x86/microcode/AMD: Remove AP scanning optimization The idea was to not scan the microcode blob on each AP (Application Processor) during boot and thus save us some milliseconds. However, on architectures where the microcode engine is shared between threads, this doesn't work. Here's why: The microcode on CPU0, i.e., the first thread, gets updated. The second thread, i.e., CPU1, i.e., the first AP walks into load_ucode_amd_ap(), sees that there's no container cached and goes and scans for the proper blob. It finds it and as a last step of apply_microcode_early_amd(), it tries to apply the patch but that core has already the updated microcode revision which it has received through CPU0's update. So it returns false and we do desc->size = -1 to prevent other APs from scanning. However, the next AP, CPU2, has a different microcode engine which hasn't been updated yet. The desc->size == -1 test prevents it from scanning the blob anew and we fail to update it. The fix is much more straight-forward than it looks: the BSP (BootStrapping Processor), i.e., CPU0, caches the microcode patch in amd_ucode_patch. We use that on the AP and try to apply it. In the 99.9999% of cases where we have homogeneous cores - *not* mixed-steppings - the application will be successful and we're good to go. In the remaining small set of systems, we will simply rescan the blob and find (or not, if none present) the proper patch and apply it then. Signed-off-by: Borislav Petkov Reviewed-by: Thomas Gleixner Link: http://lkml.kernel.org/r/20170120202955.4091-16-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 78 +++++++++---------------------------- 1 file changed, 18 insertions(+), 60 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 2a194e384207..5e1b57747c2f 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -45,14 +45,14 @@ static struct equiv_cpu_entry *equiv_cpu_table; * save from the initrd/builtin before jettisoning its contents. @mc is the * microcode patch we found to match. */ -static struct cont_desc { +struct cont_desc { struct microcode_amd *mc; u32 cpuid_1_eax; u32 psize; u16 eq_id; u8 *data; size_t size; -} cont; +}; static u32 ucode_new_rev; static u8 amd_ucode_patch[PATCH_MAX_SIZE]; @@ -201,8 +201,7 @@ static int __apply_microcode_amd(struct microcode_amd *mc) * Returns true if container found (sets @desc), false otherwise. */ static bool -apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, - bool save_patch, struct cont_desc *ret_desc) +apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_patch) { struct cont_desc desc = { 0 }; u8 (*patch)[PATCH_MAX_SIZE]; @@ -240,9 +239,6 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, memcpy(patch, mc, min_t(u32, desc.psize, PATCH_MAX_SIZE)); } - if (ret_desc) - *ret_desc = desc; - return ret; } @@ -292,79 +288,41 @@ void __init load_ucode_amd_bsp(unsigned int cpuid_1_eax) struct cpio_data cp = { }; __load_ucode_amd(cpuid_1_eax, &cp); - if (!(cp.data && cp.size)) return; - apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true, NULL); + apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, true); } void load_ucode_amd_ap(unsigned int cpuid_1_eax) { - struct equiv_cpu_entry *eq; struct microcode_amd *mc; - struct cont_desc *desc; - u16 eq_id; + struct cpio_data cp; + u32 *new_rev, rev, dummy; if (IS_ENABLED(CONFIG_X86_32)) { - mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); - desc = (struct cont_desc *)__pa_nodebug(&cont); + mc = (struct microcode_amd *)__pa_nodebug(amd_ucode_patch); + new_rev = (u32 *)__pa_nodebug(&ucode_new_rev); } else { - mc = (struct microcode_amd *)amd_ucode_patch; - desc = &cont; + mc = (struct microcode_amd *)amd_ucode_patch; + new_rev = &ucode_new_rev; } - /* First AP hasn't cached it yet, go through the blob. */ - if (!desc->data) { - struct cpio_data cp = { }; - - if (desc->size == -1) - return; - -reget: - __load_ucode_amd(cpuid_1_eax, &cp); - if (!(cp.data && cp.size)) { - /* - * Mark it so that other APs do not scan again for no - * real reason and slow down boot needlessly. - */ - desc->size = -1; - return; - } + native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - if (!apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false, desc)) { - desc->data = NULL; - desc->size = -1; + /* Check whether we have saved a new patch already: */ + if (*new_rev && rev < mc->hdr.patch_id) { + if (!__apply_microcode_amd(mc)) { + *new_rev = mc->hdr.patch_id; return; } } - eq = (struct equiv_cpu_entry *)(desc->data + CONTAINER_HDR_SZ); - - eq_id = find_equiv_id(eq, cpuid_1_eax); - if (!eq_id) + __load_ucode_amd(cpuid_1_eax, &cp); + if (!(cp.data && cp.size)) return; - if (eq_id == desc->eq_id) { - u32 rev, dummy; - - native_rdmsr(MSR_AMD64_PATCH_LEVEL, rev, dummy); - - mc = (struct microcode_amd *)amd_ucode_patch; - - if (mc && rev < mc->hdr.patch_id) { - if (!__apply_microcode_amd(mc)) - ucode_new_rev = mc->hdr.patch_id; - } - - } else { - - /* - * AP has a different equivalence ID than BSP, looks like - * mixed-steppings silicon so go through the ucode blob anew. - */ - goto reget; - } + apply_microcode_early_amd(cpuid_1_eax, cp.data, cp.size, false); } static enum ucode_state -- cgit v1.2.3 From da0aa3dde05108e180eecd76534c55f43ea4b9c8 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 20 Jan 2017 21:29:55 +0100 Subject: x86/microcode/AMD: Remove struct cont_desc.eq_id The equivalence ID was needed outside of the container scanning logic but now, after this has been cleaned up, not anymore. Now, cont_desc.mc is used to denote whether the container we're looking at has the proper microcode patch for this CPU or not. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170120202955.4091-17-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/microcode/amd.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 5e1b57747c2f..7889ae492af0 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -49,7 +49,6 @@ struct cont_desc { struct microcode_amd *mc; u32 cpuid_1_eax; u32 psize; - u16 eq_id; u8 *data; size_t size; }; @@ -92,10 +91,8 @@ static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) /* Am I looking at an equivalence table header? */ if (hdr[0] != UCODE_MAGIC || hdr[1] != UCODE_EQUIV_CPU_TABLE_TYPE || - hdr[2] == 0) { - desc->eq_id = 0; + hdr[2] == 0) return CONTAINER_HDR_SZ; - } buf = ucode; @@ -147,9 +144,8 @@ static ssize_t parse_container(u8 *ucode, ssize_t size, struct cont_desc *desc) * buffer. */ if (desc->mc) { - desc->eq_id = eq_id; - desc->data = ucode; - desc->size = orig_size - size; + desc->data = ucode; + desc->size = orig_size - size; return 0; } @@ -220,8 +216,6 @@ apply_microcode_early_amd(u32 cpuid_1_eax, void *ucode, size_t size, bool save_p desc.cpuid_1_eax = cpuid_1_eax; scan_containers(ucode, size, &desc); - if (!desc.eq_id) - return ret; mc = desc.mc; if (!mc) @@ -341,7 +335,7 @@ int __init save_microcode_in_initrd_amd(unsigned int cpuid_1_eax) desc.cpuid_1_eax = cpuid_1_eax; scan_containers(cp.data, cp.size, &desc); - if (!desc.eq_id) + if (!desc.mc) return -EINVAL; ret = load_microcode_amd(smp_processor_id(), x86_family(cpuid_1_eax), -- cgit v1.2.3 From 4c833368f0bf748d4147bf301b1f95bc8eccb3c0 Mon Sep 17 00:00:00 2001 From: Kevin Hao Date: Sun, 22 Jan 2017 16:50:23 +0800 Subject: x86/fpu: Set the xcomp_bv when we fake up a XSAVES area I got the following calltrace on a Apollo Lake SoC with 32-bit kernel: WARNING: CPU: 2 PID: 261 at arch/x86/include/asm/fpu/internal.h:363 fpu__restore+0x1f5/0x260 [...] Hardware name: Intel Corp. Broxton P/NOTEBOOK, BIOS APLIRVPA.X64.0138.B35.1608091058 08/09/2016 Call Trace: dump_stack() __warn() ? fpu__restore() warn_slowpath_null() fpu__restore() __fpu__restore_sig() fpu__restore_sig() restore_sigcontext.isra.9() sys_sigreturn() do_int80_syscall_32() entry_INT80_32() The reason is that a #GP occurs when executing XRSTORS. The root cause is that we forget to set the xcomp_bv when we fake up the XSAVES area in the copyin_to_xsaves() function. Signed-off-by: Kevin Hao Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Denys Vlasenko Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Quentin Casasnovas Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/1485075023-30161-1-git-send-email-haokexin@gmail.com Signed-off-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- arch/x86/kernel/fpu/xstate.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 1d7770447b3e..e287b9075527 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1070,6 +1070,7 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, * Add back in the features that came in from userspace: */ xsave->header.xfeatures |= xfeatures; + xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xsave->header.xfeatures; return 0; } -- cgit v1.2.3 From 587d531b8f67ebe62f8326849a7a685a03cbc904 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 19 Jan 2017 22:28:05 +0100 Subject: crypto: x86/crc32c - fix %progbits -> @progbits %progbits form is used on ARM (where @ is a comment char). x86 consistently uses @progbits everywhere else. Signed-off-by: Denys Vlasenko CC: Herbert Xu CC: Josh Poimboeuf CC: Xiaodong Liu CC: Megha Dey CC: George Spelvin CC: linux-crypto@vger.kernel.org CC: x86@kernel.org CC: linux-kernel@vger.kernel.org Reviewed-by: Josh Poimboeuf Signed-off-by: Herbert Xu --- arch/x86/crypto/crc32c-pcl-intel-asm_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S index dc05f010ca9b..7a7de27c6f41 100644 --- a/arch/x86/crypto/crc32c-pcl-intel-asm_64.S +++ b/arch/x86/crypto/crc32c-pcl-intel-asm_64.S @@ -312,7 +312,7 @@ do_return: ret ENDPROC(crc_pcl) -.section .rodata, "a", %progbits +.section .rodata, "a", @progbits ################################################################ ## jump table Table is 129 entries x 2 bytes each ################################################################ -- cgit v1.2.3 From e183914af00e15eb41ae666d44e323bfa154be13 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Thu, 19 Jan 2017 22:33:04 +0100 Subject: crypto: x86 - make constants readonly, allow linker to merge them A lot of asm-optimized routines in arch/x86/crypto/ keep its constants in .data. This is wrong, they should be on .rodata. Mnay of these constants are the same in different modules. For example, 128-bit shuffle mask 0x000102030405060708090A0B0C0D0E0F exists in at least half a dozen places. There is a way to let linker merge them and use just one copy. The rules are as follows: mergeable objects of different sizes should not share sections. You can't put them all in one .rodata section, they will lose "mergeability". GCC puts its mergeable constants in ".rodata.cstSIZE" sections, or ".rodata.cstSIZE." if -fdata-sections is used. This patch does the same: .section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 It is important that all data in such section consists of 16-byte elements, not larger ones, and there are no implicit use of one element from another. When this is not the case, use non-mergeable section: .section .rodata[.VAR_NAME], "a", @progbits This reduces .data by ~15 kbytes: text data bss dec hex filename 11097415 2705840 2630712 16433967 fac32f vmlinux-prev.o 11112095 2690672 2630712 16433479 fac147 vmlinux.o Merged objects are visible in System.map: ffffffff81a28810 r POLY ffffffff81a28810 r POLY ffffffff81a28820 r TWOONE ffffffff81a28820 r TWOONE ffffffff81a28830 r PSHUFFLE_BYTE_FLIP_MASK <- merged regardless of ffffffff81a28830 r SHUF_MASK <------------- the name difference ffffffff81a28830 r SHUF_MASK ffffffff81a28830 r SHUF_MASK .. ffffffff81a28d00 r K512 <- merged three identical 640-byte tables ffffffff81a28d00 r K512 ffffffff81a28d00 r K512 Use of object names in section name suffixes is not strictly necessary, but might help if someday link stage will use garbage collection to eliminate unused sections (ld --gc-sections). Signed-off-by: Denys Vlasenko CC: Herbert Xu CC: Josh Poimboeuf CC: Xiaodong Liu CC: Megha Dey CC: linux-crypto@vger.kernel.org CC: x86@kernel.org CC: linux-kernel@vger.kernel.org Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_asm.S | 37 +++++++++++++++++----- arch/x86/crypto/aesni-intel_avx-x86_64.S | 32 ++++++++++++++----- arch/x86/crypto/camellia-aesni-avx-asm_64.S | 5 ++- arch/x86/crypto/camellia-aesni-avx2-asm_64.S | 12 +++++-- arch/x86/crypto/cast5-avx-x86_64-asm_64.S | 14 ++++++-- arch/x86/crypto/cast6-avx-x86_64-asm_64.S | 12 +++++-- arch/x86/crypto/chacha20-avx2-x86_64.S | 9 ++++-- arch/x86/crypto/chacha20-ssse3-x86_64.S | 7 ++-- arch/x86/crypto/crct10dif-pcl-asm_64.S | 14 ++++++-- arch/x86/crypto/des3_ede-asm_64.S | 2 +- arch/x86/crypto/ghash-clmulni-intel_asm.S | 3 +- arch/x86/crypto/poly1305-avx2-x86_64.S | 6 ++-- arch/x86/crypto/poly1305-sse2-x86_64.S | 6 ++-- arch/x86/crypto/serpent-avx-x86_64-asm_64.S | 5 +-- arch/x86/crypto/serpent-avx2-asm_64.S | 9 ++++-- arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S | 6 ++-- arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S | 3 +- arch/x86/crypto/sha1-mb/sha1_x8_avx2.S | 15 +++++++-- arch/x86/crypto/sha1_ni_asm.S | 8 +++-- arch/x86/crypto/sha256-avx-asm.S | 9 +++++- arch/x86/crypto/sha256-avx2-asm.S | 9 +++++- .../crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S | 6 ++-- .../crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S | 3 +- arch/x86/crypto/sha256-mb/sha256_x8_avx2.S | 7 +++- arch/x86/crypto/sha256-ssse3-asm.S | 8 ++++- arch/x86/crypto/sha256_ni_asm.S | 4 ++- arch/x86/crypto/sha512-avx-asm.S | 9 ++++-- arch/x86/crypto/sha512-avx2-asm.S | 10 ++++-- .../crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S | 10 ++++-- .../crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S | 4 ++- arch/x86/crypto/sha512-mb/sha512_x4_avx2.S | 4 ++- arch/x86/crypto/sha512-ssse3-asm.S | 9 ++++-- arch/x86/crypto/twofish-avx-x86_64-asm_64.S | 6 ++-- 33 files changed, 229 insertions(+), 74 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_asm.S b/arch/x86/crypto/aesni-intel_asm.S index 383a6f84a060..3c465184ff8a 100644 --- a/arch/x86/crypto/aesni-intel_asm.S +++ b/arch/x86/crypto/aesni-intel_asm.S @@ -46,28 +46,49 @@ #ifdef __x86_64__ -.data +# constants in mergeable sections, linker can reorder and merge +.section .rodata.cst16.gf128mul_x_ble_mask, "aM", @progbits, 16 .align 16 .Lgf128mul_x_ble_mask: .octa 0x00000000000000010000000000000087 +.section .rodata.cst16.POLY, "aM", @progbits, 16 +.align 16 POLY: .octa 0xC2000000000000000000000000000001 +.section .rodata.cst16.TWOONE, "aM", @progbits, 16 +.align 16 TWOONE: .octa 0x00000001000000000000000000000001 -# order of these constants should not change. -# more specifically, ALL_F should follow SHIFT_MASK, -# and ZERO should follow ALL_F - +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +.section .rodata.cst16.MASK1, "aM", @progbits, 16 +.align 16 MASK1: .octa 0x0000000000000000ffffffffffffffff +.section .rodata.cst16.MASK2, "aM", @progbits, 16 +.align 16 MASK2: .octa 0xffffffffffffffff0000000000000000 -SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 -ALL_F: .octa 0xffffffffffffffffffffffffffffffff -ZERO: .octa 0x00000000000000000000000000000000 +.section .rodata.cst16.ONE, "aM", @progbits, 16 +.align 16 ONE: .octa 0x00000000000000000000000000000001 +.section .rodata.cst16.F_MIN_MASK, "aM", @progbits, 16 +.align 16 F_MIN_MASK: .octa 0xf1f2f3f4f5f6f7f8f9fafbfcfdfeff0 +.section .rodata.cst16.dec, "aM", @progbits, 16 +.align 16 dec: .octa 0x1 +.section .rodata.cst16.enc, "aM", @progbits, 16 +.align 16 enc: .octa 0x2 +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, +# and zero should follow ALL_F +.section .rodata, "a", @progbits +.align 16 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff + .octa 0x00000000000000000000000000000000 + .text diff --git a/arch/x86/crypto/aesni-intel_avx-x86_64.S b/arch/x86/crypto/aesni-intel_avx-x86_64.S index 522ab68d1c88..d664382c6e56 100644 --- a/arch/x86/crypto/aesni-intel_avx-x86_64.S +++ b/arch/x86/crypto/aesni-intel_avx-x86_64.S @@ -122,23 +122,39 @@ #include #include -.data +# constants in mergeable sections, linker can reorder and merge +.section .rodata.cst16.POLY, "aM", @progbits, 16 .align 16 - POLY: .octa 0xC2000000000000000000000000000001 + +.section .rodata.cst16.POLY2, "aM", @progbits, 16 +.align 16 POLY2: .octa 0xC20000000000000000000001C2000000 -TWOONE: .octa 0x00000001000000000000000000000001 -# order of these constants should not change. -# more specifically, ALL_F should follow SHIFT_MASK, and ZERO should follow ALL_F +.section .rodata.cst16.TWOONE, "aM", @progbits, 16 +.align 16 +TWOONE: .octa 0x00000001000000000000000000000001 +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F -SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 -ALL_F: .octa 0xffffffffffffffffffffffffffffffff -ZERO: .octa 0x00000000000000000000000000000000 + +.section .rodata.cst16.ONE, "aM", @progbits, 16 +.align 16 ONE: .octa 0x00000000000000000000000000000001 + +.section .rodata.cst16.ONEf, "aM", @progbits, 16 +.align 16 ONEf: .octa 0x01000000000000000000000000000000 +# order of these constants should not change. +# more specifically, ALL_F should follow SHIFT_MASK, and zero should follow ALL_F +.section .rodata, "a", @progbits +.align 16 +SHIFT_MASK: .octa 0x0f0e0d0c0b0a09080706050403020100 +ALL_F: .octa 0xffffffffffffffffffffffffffffffff + .octa 0x00000000000000000000000000000000 + .text diff --git a/arch/x86/crypto/camellia-aesni-avx-asm_64.S b/arch/x86/crypto/camellia-aesni-avx-asm_64.S index aa9e8bd163f6..f7c495e2863c 100644 --- a/arch/x86/crypto/camellia-aesni-avx-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx-asm_64.S @@ -571,7 +571,9 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vmovdqu y6, 14 * 16(rio); \ vmovdqu y7, 15 * 16(rio); -.data + +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 .align 16 #define SHUFB_BYTES(idx) \ @@ -711,6 +713,7 @@ ENDPROC(roundsm16_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 /* 4-bit mask */ +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 .align 4 .L0f0f0f0f: .long 0x0f0f0f0f diff --git a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S index 16186c18656d..eee5b3982cfd 100644 --- a/arch/x86/crypto/camellia-aesni-avx2-asm_64.S +++ b/arch/x86/crypto/camellia-aesni-avx2-asm_64.S @@ -610,20 +610,25 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) vmovdqu y6, 14 * 32(rio); \ vmovdqu y7, 15 * 32(rio); -.data -.align 32 +.section .rodata.cst32.shufb_16x16b, "aM", @progbits, 32 +.align 32 #define SHUFB_BYTES(idx) \ 0 + (idx), 4 + (idx), 8 + (idx), 12 + (idx) - .Lshufb_16x16b: .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) .byte SHUFB_BYTES(0), SHUFB_BYTES(1), SHUFB_BYTES(2), SHUFB_BYTES(3) +.section .rodata.cst32.pack_bswap, "aM", @progbits, 32 +.align 32 .Lpack_bswap: .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 .long 0x00010203, 0x04050607, 0x80808080, 0x80808080 +/* NB: section is mergeable, all elements must be aligned 16-byte blocks */ +.section .rodata.cst16, "aM", @progbits, 16 +.align 16 + /* For CTR-mode IV byteswap */ .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 @@ -750,6 +755,7 @@ ENDPROC(roundsm32_x4_x5_x6_x7_x0_x1_x2_x3_y4_y5_y6_y7_y0_y1_y2_y3_ab) .byte 0x00, 0x0d, 0x0a, 0x07, 0x04, 0x01, 0x0e, 0x0b .byte 0x08, 0x05, 0x02, 0x0f, 0x0c, 0x09, 0x06, 0x03 +.section .rodata.cst4.L0f0f0f0f, "aM", @progbits, 4 .align 4 /* 4-bit mask */ .L0f0f0f0f: diff --git a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S index 14fa1966bf01..b4a8806234ea 100644 --- a/arch/x86/crypto/cast5-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast5-avx-x86_64-asm_64.S @@ -195,19 +195,29 @@ vpshufb rmask, x0, x0; \ vpshufb rmask, x1, x1; -.data - +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 .align 16 .Lbswap_mask: .byte 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 +.align 16 .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.section .rodata.cst16.bswap_iv_mask, "aM", @progbits, 16 +.align 16 .Lbswap_iv_mask: .byte 7, 6, 5, 4, 3, 2, 1, 0, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst4.16_mask, "aM", @progbits, 4 +.align 4 .L16_mask: .byte 16, 16, 16, 16 +.section .rodata.cst4.32_mask, "aM", @progbits, 4 +.align 4 .L32_mask: .byte 32, 0, 0, 0 +.section .rodata.cst4.first_mask, "aM", @progbits, 4 +.align 4 .Lfirst_mask: .byte 0x1f, 0, 0, 0 diff --git a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S index c419389889cd..952d3156a933 100644 --- a/arch/x86/crypto/cast6-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/cast6-avx-x86_64-asm_64.S @@ -225,8 +225,7 @@ vpshufb rmask, x2, x2; \ vpshufb rmask, x3, x3; -.data - +.section .rodata.cst16, "aM", @progbits, 16 .align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 @@ -244,10 +243,19 @@ .byte 12, 13, 14, 15, 8, 9, 10, 11, 7, 6, 5, 4, 3, 2, 1, 0 .Lrkr_dec_QBAR_QBAR_QBAR_QBAR: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst4.L16_mask, "aM", @progbits, 4 +.align 4 .L16_mask: .byte 16, 16, 16, 16 + +.section .rodata.cst4.L32_mask, "aM", @progbits, 4 +.align 4 .L32_mask: .byte 32, 0, 0, 0 + +.section .rodata.cst4.first_mask, "aM", @progbits, 4 +.align 4 .Lfirst_mask: .byte 0x1f, 0, 0, 0 diff --git a/arch/x86/crypto/chacha20-avx2-x86_64.S b/arch/x86/crypto/chacha20-avx2-x86_64.S index 16694e625f77..3a2dc3dc6cac 100644 --- a/arch/x86/crypto/chacha20-avx2-x86_64.S +++ b/arch/x86/crypto/chacha20-avx2-x86_64.S @@ -11,13 +11,18 @@ #include -.data +.section .rodata.cst32.ROT8, "aM", @progbits, 32 .align 32 - ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 .octa 0x0e0d0c0f0a09080b0605040702010003 + +.section .rodata.cst32.ROT16, "aM", @progbits, 32 +.align 32 ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 .octa 0x0d0c0f0e09080b0a0504070601000302 + +.section .rodata.cst32.CTRINC, "aM", @progbits, 32 +.align 32 CTRINC: .octa 0x00000003000000020000000100000000 .octa 0x00000007000000060000000500000004 diff --git a/arch/x86/crypto/chacha20-ssse3-x86_64.S b/arch/x86/crypto/chacha20-ssse3-x86_64.S index 3a33124e9112..3f511a7d73b8 100644 --- a/arch/x86/crypto/chacha20-ssse3-x86_64.S +++ b/arch/x86/crypto/chacha20-ssse3-x86_64.S @@ -11,11 +11,14 @@ #include -.data +.section .rodata.cst16.ROT8, "aM", @progbits, 16 .align 16 - ROT8: .octa 0x0e0d0c0f0a09080b0605040702010003 +.section .rodata.cst16.ROT16, "aM", @progbits, 16 +.align 16 ROT16: .octa 0x0d0c0f0e09080b0a0504070601000302 +.section .rodata.cst16.CTRINC, "aM", @progbits, 16 +.align 16 CTRINC: .octa 0x00000003000000020000000100000000 .text diff --git a/arch/x86/crypto/crct10dif-pcl-asm_64.S b/arch/x86/crypto/crct10dif-pcl-asm_64.S index 35e97569d05f..de04d3e98d8d 100644 --- a/arch/x86/crypto/crct10dif-pcl-asm_64.S +++ b/arch/x86/crypto/crct10dif-pcl-asm_64.S @@ -554,12 +554,11 @@ _only_less_than_2: ENDPROC(crc_t10dif_pcl) -.data - +.section .rodata, "a", @progbits +.align 16 # precomputed constants # these constants are precomputed from the poly: # 0x8bb70000 (0x8bb7 scaled to 32 bits) -.align 16 # Q = 0x18BB70000 # rk1 = 2^(32*3) mod Q << 32 # rk2 = 2^(32*5) mod Q << 32 @@ -613,14 +612,23 @@ rk20: +.section .rodata.cst16.mask1, "aM", @progbits, 16 +.align 16 mask1: .octa 0x80808080808080808080808080808080 + +.section .rodata.cst16.mask2, "aM", @progbits, 16 +.align 16 mask2: .octa 0x00000000FFFFFFFFFFFFFFFFFFFFFFFF +.section .rodata.cst16.SHUF_MASK, "aM", @progbits, 16 +.align 16 SHUF_MASK: .octa 0x000102030405060708090A0B0C0D0E0F +.section .rodata.cst32.pshufb_shf_table, "aM", @progbits, 32 +.align 32 pshufb_shf_table: # use these values for shift constants for the pshufb instruction # different alignments result in values as shown: diff --git a/arch/x86/crypto/des3_ede-asm_64.S b/arch/x86/crypto/des3_ede-asm_64.S index 038f6ae87c5e..f3e91647ca27 100644 --- a/arch/x86/crypto/des3_ede-asm_64.S +++ b/arch/x86/crypto/des3_ede-asm_64.S @@ -537,7 +537,7 @@ ENTRY(des3_ede_x86_64_crypt_blk_3way) ret; ENDPROC(des3_ede_x86_64_crypt_blk_3way) -.data +.section .rodata, "a", @progbits .align 16 .L_s1: .quad 0x0010100001010400, 0x0000000000000000 diff --git a/arch/x86/crypto/ghash-clmulni-intel_asm.S b/arch/x86/crypto/ghash-clmulni-intel_asm.S index eed55c8cca4f..f94375a8dcd1 100644 --- a/arch/x86/crypto/ghash-clmulni-intel_asm.S +++ b/arch/x86/crypto/ghash-clmulni-intel_asm.S @@ -20,8 +20,7 @@ #include #include -.data - +.section .rodata.cst16.bswap_mask, "aM", @progbits, 16 .align 16 .Lbswap_mask: .octa 0x000102030405060708090a0b0c0d0e0f diff --git a/arch/x86/crypto/poly1305-avx2-x86_64.S b/arch/x86/crypto/poly1305-avx2-x86_64.S index eff2f414e22b..3b6e70d085da 100644 --- a/arch/x86/crypto/poly1305-avx2-x86_64.S +++ b/arch/x86/crypto/poly1305-avx2-x86_64.S @@ -11,11 +11,13 @@ #include -.data +.section .rodata.cst32.ANMASK, "aM", @progbits, 32 .align 32 - ANMASK: .octa 0x0000000003ffffff0000000003ffffff .octa 0x0000000003ffffff0000000003ffffff + +.section .rodata.cst32.ORMASK, "aM", @progbits, 32 +.align 32 ORMASK: .octa 0x00000000010000000000000001000000 .octa 0x00000000010000000000000001000000 diff --git a/arch/x86/crypto/poly1305-sse2-x86_64.S b/arch/x86/crypto/poly1305-sse2-x86_64.S index 338c748054ed..c88c670cb5fc 100644 --- a/arch/x86/crypto/poly1305-sse2-x86_64.S +++ b/arch/x86/crypto/poly1305-sse2-x86_64.S @@ -11,10 +11,12 @@ #include -.data +.section .rodata.cst16.ANMASK, "aM", @progbits, 16 .align 16 - ANMASK: .octa 0x0000000003ffffff0000000003ffffff + +.section .rodata.cst16.ORMASK, "aM", @progbits, 16 +.align 16 ORMASK: .octa 0x00000000010000000000000001000000 .text diff --git a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S index 8be571808342..2925077f8c6a 100644 --- a/arch/x86/crypto/serpent-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/serpent-avx-x86_64-asm_64.S @@ -29,11 +29,12 @@ .file "serpent-avx-x86_64-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 +.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 diff --git a/arch/x86/crypto/serpent-avx2-asm_64.S b/arch/x86/crypto/serpent-avx2-asm_64.S index 97c48add33ed..d67888f2a52a 100644 --- a/arch/x86/crypto/serpent-avx2-asm_64.S +++ b/arch/x86/crypto/serpent-avx2-asm_64.S @@ -20,13 +20,18 @@ .file "serpent-avx2-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask_0, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask_0: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask_1, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask_1: .byte 0x0e, 1, 0, 0, 0, 0, 0, 0, 2, 0, 0, 0, 0, 0, 0, 0 diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S index 96df6a39d7e2..93b945597ecf 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_flush_avx2.S @@ -281,11 +281,13 @@ ENTRY(sha1_mb_mgr_get_comp_job_avx2) ret ENDPROC(sha1_mb_mgr_get_comp_job_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 one: .quad 1 two: diff --git a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S index 63a0d9c8e31f..7a93b1c0d69a 100644 --- a/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_mb_mgr_submit_avx2.S @@ -203,8 +203,7 @@ return_null: ENDPROC(sha1_mb_mgr_submit_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S index c9dae1cd2919..20f77aa633de 100644 --- a/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S +++ b/arch/x86/crypto/sha1-mb/sha1_x8_avx2.S @@ -461,21 +461,32 @@ lloop: ENDPROC(sha1_x8_avx2) -.data - +.section .rodata.cst32.K00_19, "aM", @progbits, 32 .align 32 K00_19: .octa 0x5A8279995A8279995A8279995A827999 .octa 0x5A8279995A8279995A8279995A827999 + +.section .rodata.cst32.K20_39, "aM", @progbits, 32 +.align 32 K20_39: .octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 .octa 0x6ED9EBA16ED9EBA16ED9EBA16ED9EBA1 + +.section .rodata.cst32.K40_59, "aM", @progbits, 32 +.align 32 K40_59: .octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC .octa 0x8F1BBCDC8F1BBCDC8F1BBCDC8F1BBCDC + +.section .rodata.cst32.K60_79, "aM", @progbits, 32 +.align 32 K60_79: .octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 .octa 0xCA62C1D6CA62C1D6CA62C1D6CA62C1D6 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha1_ni_asm.S b/arch/x86/crypto/sha1_ni_asm.S index 874a651b9e7d..ebbdba72ae07 100644 --- a/arch/x86/crypto/sha1_ni_asm.S +++ b/arch/x86/crypto/sha1_ni_asm.S @@ -293,10 +293,12 @@ ENTRY(sha1_ni_transform) ret ENDPROC(sha1_ni_transform) -.data - -.align 64 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x000102030405060708090a0b0c0d0e0f + +.section .rodata.cst16.UPPER_WORD_MASK, "aM", @progbits, 16 +.align 16 UPPER_WORD_MASK: .octa 0xFFFFFFFF000000000000000000000000 diff --git a/arch/x86/crypto/sha256-avx-asm.S b/arch/x86/crypto/sha256-avx-asm.S index 92b3b5d75ba9..e08888a1a5f2 100644 --- a/arch/x86/crypto/sha256-avx-asm.S +++ b/arch/x86/crypto/sha256-avx-asm.S @@ -463,7 +463,7 @@ done_hash: ret ENDPROC(sha256_transform_avx) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -483,14 +483,21 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16 +.align 16 # shuffle xBxA -> 00BA _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 +.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16 +.align 16 # shuffle xDxC -> DC00 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha256-avx2-asm.S b/arch/x86/crypto/sha256-avx2-asm.S index 570ec5ec62d7..89c8f09787d2 100644 --- a/arch/x86/crypto/sha256-avx2-asm.S +++ b/arch/x86/crypto/sha256-avx2-asm.S @@ -723,7 +723,7 @@ done_hash: ret ENDPROC(sha256_transform_rorx) -.data +.section .rodata.cst512.K256, "aM", @progbits, 512 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -759,14 +759,21 @@ K256: .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203,0x0c0d0e0f08090a0b0405060700010203 # shuffle xBxA -> 00BA +.section .rodata.cst32._SHUF_00BA, "aM", @progbits, 32 +.align 32 _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100,0xFFFFFFFFFFFFFFFF0b0a090803020100 # shuffle xDxC -> DC00 +.section .rodata.cst32._SHUF_DC00, "aM", @progbits, 32 +.align 32 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF,0x0b0a090803020100FFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S index a78a0694ddef..8fe6338bcc84 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_flush_avx2.S @@ -284,11 +284,13 @@ ENTRY(sha256_mb_mgr_get_comp_job_avx2) ret ENDPROC(sha256_mb_mgr_get_comp_job_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 + +.section .rodata.cst8, "aM", @progbits, 8 +.align 8 one: .quad 1 two: diff --git a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S index 7ea670e25acc..b36ae7454084 100644 --- a/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_mb_mgr_submit_avx2.S @@ -208,8 +208,7 @@ return_null: ENDPROC(sha256_mb_mgr_submit_avx2) -.data - +.section .rodata.cst16.clear_low_nibble, "aM", @progbits, 16 .align 16 clear_low_nibble: .octa 0x000000000000000000000000FFFFFFF0 diff --git a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S index aa21aea4c722..1687c80c5995 100644 --- a/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S +++ b/arch/x86/crypto/sha256-mb/sha256_x8_avx2.S @@ -437,7 +437,8 @@ Lrounds_16_xx: ret ENDPROC(sha256_x8_avx2) -.data + +.section .rodata.K256_8, "a", @progbits .align 64 K256_8: .octa 0x428a2f98428a2f98428a2f98428a2f98 @@ -568,10 +569,14 @@ K256_8: .octa 0xbef9a3f7bef9a3f7bef9a3f7bef9a3f7 .octa 0xc67178f2c67178f2c67178f2c67178f2 .octa 0xc67178f2c67178f2c67178f2c67178f2 + +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 .global K256 K256: diff --git a/arch/x86/crypto/sha256-ssse3-asm.S b/arch/x86/crypto/sha256-ssse3-asm.S index 2cedc44e8121..39b83c93e7fd 100644 --- a/arch/x86/crypto/sha256-ssse3-asm.S +++ b/arch/x86/crypto/sha256-ssse3-asm.S @@ -474,7 +474,7 @@ done_hash: ret ENDPROC(sha256_transform_ssse3) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -494,13 +494,19 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 +.section .rodata.cst16._SHUF_00BA, "aM", @progbits, 16 +.align 16 # shuffle xBxA -> 00BA _SHUF_00BA: .octa 0xFFFFFFFFFFFFFFFF0b0a090803020100 +.section .rodata.cst16._SHUF_DC00, "aM", @progbits, 16 +.align 16 # shuffle xDxC -> DC00 _SHUF_DC00: .octa 0x0b0a090803020100FFFFFFFFFFFFFFFF diff --git a/arch/x86/crypto/sha256_ni_asm.S b/arch/x86/crypto/sha256_ni_asm.S index 748cdf21a938..fb58f58ecfbc 100644 --- a/arch/x86/crypto/sha256_ni_asm.S +++ b/arch/x86/crypto/sha256_ni_asm.S @@ -329,7 +329,7 @@ ENTRY(sha256_ni_transform) ret ENDPROC(sha256_ni_transform) -.data +.section .rodata.cst256.K256, "aM", @progbits, 256 .align 64 K256: .long 0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5 @@ -349,5 +349,7 @@ K256: .long 0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208 .long 0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2 +.section .rodata.cst16.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 16 +.align 16 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x0c0d0e0f08090a0b0405060700010203 diff --git a/arch/x86/crypto/sha512-avx-asm.S b/arch/x86/crypto/sha512-avx-asm.S index 565274d6a641..39235fefe6f7 100644 --- a/arch/x86/crypto/sha512-avx-asm.S +++ b/arch/x86/crypto/sha512-avx-asm.S @@ -370,14 +370,17 @@ ENDPROC(sha512_transform_avx) ######################################################################## ### Binary Data -.data - +.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16 .align 16 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. XMM_QWORD_BSWAP: .octa 0x08090a0b0c0d0e0f0001020304050607 +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 # K[t] used in SHA512 hashing K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd diff --git a/arch/x86/crypto/sha512-avx2-asm.S b/arch/x86/crypto/sha512-avx2-asm.S index 1f20b35d8573..7f5f6c6ec72e 100644 --- a/arch/x86/crypto/sha512-avx2-asm.S +++ b/arch/x86/crypto/sha512-avx2-asm.S @@ -684,8 +684,11 @@ ENDPROC(sha512_transform_rorx) ######################################################################## ### Binary Data -.data +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 .align 64 # K[t] used in SHA512 hashing K512: @@ -730,14 +733,17 @@ K512: .quad 0x4cc5d4becb3e42b6,0x597f299cfc657e2a .quad 0x5fcb6fab3ad6faec,0x6c44198c4a475817 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 .align 32 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 .octa 0x18191a1b1c1d1e1f1011121314151617 +.section .rodata.cst32.MASK_YMM_LO, "aM", @progbits, 32 +.align 32 MASK_YMM_LO: .octa 0x00000000000000000000000000000000 .octa 0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF + #endif diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S index 3ddba19a0db6..7c629caebc05 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_flush_avx2.S @@ -280,12 +280,18 @@ ENTRY(sha512_mb_mgr_get_comp_job_avx2) pop %rbx ret ENDPROC(sha512_mb_mgr_get_comp_job_avx2) -.data -.align 16 +.section .rodata.cst8.one, "aM", @progbits, 8 +.align 8 one: .quad 1 + +.section .rodata.cst8.two, "aM", @progbits, 8 +.align 8 two: .quad 2 + +.section .rodata.cst8.three, "aM", @progbits, 8 +.align 8 three: .quad 3 diff --git a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S index 815f07bdd1f8..4ba709ba78e5 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_mb_mgr_submit_avx2.S @@ -209,8 +209,9 @@ return_null: xor job_rax, job_rax jmp return ENDPROC(sha512_mb_mgr_submit_avx2) -.data +/* UNUSED? +.section .rodata.cst16, "aM", @progbits, 16 .align 16 H0: .int 0x6a09e667 H1: .int 0xbb67ae85 @@ -220,3 +221,4 @@ H4: .int 0x510e527f H5: .int 0x9b05688c H6: .int 0x1f83d9ab H7: .int 0x5be0cd19 +*/ diff --git a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S index 31ab1eff6413..e22e907643a6 100644 --- a/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S +++ b/arch/x86/crypto/sha512-mb/sha512_x4_avx2.S @@ -361,7 +361,7 @@ Lrounds_16_xx: ret ENDPROC(sha512_x4_avx2) -.data +.section .rodata.K512_4, "a", @progbits .align 64 K512_4: .octa 0x428a2f98d728ae22428a2f98d728ae22,\ @@ -525,5 +525,7 @@ K512_4: .octa 0x6c44198c4a4758176c44198c4a475817,\ 0x6c44198c4a4758176c44198c4a475817 +.section .rodata.cst32.PSHUFFLE_BYTE_FLIP_MASK, "aM", @progbits, 32 +.align 32 PSHUFFLE_BYTE_FLIP_MASK: .octa 0x08090a0b0c0d0e0f0001020304050607 .octa 0x18191a1b1c1d1e1f1011121314151617 diff --git a/arch/x86/crypto/sha512-ssse3-asm.S b/arch/x86/crypto/sha512-ssse3-asm.S index e610e29cbc81..66bbd9058a90 100644 --- a/arch/x86/crypto/sha512-ssse3-asm.S +++ b/arch/x86/crypto/sha512-ssse3-asm.S @@ -369,14 +369,17 @@ ENDPROC(sha512_transform_ssse3) ######################################################################## ### Binary Data -.data - +.section .rodata.cst16.XMM_QWORD_BSWAP, "aM", @progbits, 16 .align 16 - # Mask for byte-swapping a couple of qwords in an XMM register using (v)pshufb. XMM_QWORD_BSWAP: .octa 0x08090a0b0c0d0e0f0001020304050607 +# Mergeable 640-byte rodata section. This allows linker to merge the table +# with other, exactly the same 640-byte fragment of another rodata section +# (if such section exists). +.section .rodata.cst640.K512, "aM", @progbits, 640 +.align 64 # K[t] used in SHA512 hashing K512: .quad 0x428a2f98d728ae22,0x7137449123ef65cd diff --git a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S index dc66273e610d..b3f49d286348 100644 --- a/arch/x86/crypto/twofish-avx-x86_64-asm_64.S +++ b/arch/x86/crypto/twofish-avx-x86_64-asm_64.S @@ -29,11 +29,13 @@ .file "twofish-avx-x86_64-asm_64.S" -.data +.section .rodata.cst16.bswap128_mask, "aM", @progbits, 16 .align 16 - .Lbswap128_mask: .byte 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0 + +.section .rodata.cst16.xts_gf128mul_and_shl1_mask, "aM", @progbits, 16 +.align 16 .Lxts_gf128mul_and_shl1_mask: .byte 0x87, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0 -- cgit v1.2.3 From dffba9a31c7769be3231c420d4b364c92ba3f1ac Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Mon, 23 Jan 2017 14:54:44 -0800 Subject: x86/fpu/xstate: Fix xcomp_bv in XSAVES header The compacted-format XSAVES area is determined at boot time and never changed after. The field xsave.header.xcomp_bv indicates which components are in the fixed XSAVES format. In fpstate_init() we did not set xcomp_bv to reflect the XSAVES format since at the time there is no valid data. However, after we do copy_init_fpstate_to_fpregs() in fpu__clear(), as in commit: b22cbe404a9c x86/fpu: Fix invalid FPU ptrace state after execve() and when __fpu_restore_sig() does fpu__restore() for a COMPAT-mode app, a #GP occurs. This can be easily triggered by doing valgrind on a COMPAT-mode "Hello World," as reported by Joakim Tjernlund and others: https://bugzilla.kernel.org/show_bug.cgi?id=190061 Fix it by setting xcomp_bv correctly. This patch also moves the xcomp_bv initialization to the proper place, which was in copyin_to_xsaves() as of: 4c833368f0bf x86/fpu: Set the xcomp_bv when we fake up a XSAVES area which fixed the bug too, but it's more efficient and cleaner to initialize things once per boot, not for every signal handling operation. Reported-by: Kevin Hao Reported-by: Joakim Tjernlund Signed-off-by: Yu-cheng Yu Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi V. Shankar Cc: Thomas Gleixner Cc: haokexin@gmail.com Link: http://lkml.kernel.org/r/1485212084-4418-1-git-send-email-yu-cheng.yu@intel.com [ Combined it with 4c833368f0bf. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/core.c | 4 +++- arch/x86/kernel/fpu/xstate.c | 1 - 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index e4e97a5355ce..de7234401275 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include @@ -183,7 +184,8 @@ void fpstate_init(union fpregs_state *state) * it will #GP. Make sure it is replaced after the memset(). */ if (static_cpu_has(X86_FEATURE_XSAVES)) - state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT; + state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | + xfeatures_mask; if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index e287b9075527..1d7770447b3e 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -1070,7 +1070,6 @@ int copyin_to_xsaves(const void *kbuf, const void __user *ubuf, * Add back in the features that came in from userspace: */ xsave->header.xfeatures |= xfeatures; - xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xsave->header.xfeatures; return 0; } -- cgit v1.2.3 From d4b2ac63b0eae461fc10c9791084be24724ef57a Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 23 Jan 2017 19:35:06 +0100 Subject: x86/ras/inject: Make it depend on X86_LOCAL_APIC=y MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ... and get rid of the annoying: arch/x86/kernel/cpu/mcheck/mce-inject.c:97:13: warning: ‘mce_irq_ipi’ defined but not used [-Wunused-function] when doing randconfig builds. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: Yazen Ghannam Cc: linux-edac Link: http://lkml.kernel.org/r/20170123183514.13356-2-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 2 +- arch/x86/kernel/cpu/mcheck/mce-inject.c | 5 +---- 2 files changed, 2 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e487493bbd47..7b6fd68b4715 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1070,7 +1070,7 @@ config X86_MCE_THRESHOLD def_bool y config X86_MCE_INJECT - depends on X86_MCE + depends on X86_MCE && X86_LOCAL_APIC tristate "Machine check injector support" ---help--- Provide support for injecting machine checks for testing purposes. diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c index 517619ea6498..99165b206df3 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-inject.c +++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c @@ -152,7 +152,6 @@ static void raise_mce(struct mce *m) if (context == MCJ_CTX_RANDOM) return; -#ifdef CONFIG_X86_LOCAL_APIC if (m->inject_flags & (MCJ_IRQ_BROADCAST | MCJ_NMI_BROADCAST)) { unsigned long start; int cpu; @@ -192,9 +191,7 @@ static void raise_mce(struct mce *m) raise_local(); put_cpu(); put_online_cpus(); - } else -#endif - { + } else { preempt_disable(); raise_local(); preempt_enable(); -- cgit v1.2.3 From 9b052ea4ced0fa1ad30a2eafe86984a16297e6f1 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 23 Jan 2017 19:35:07 +0100 Subject: x86/ras/therm_throt: Do not log a fake MCE for thermal events We log a fake bank 128 MCE to note that we're handling a CPU thermal event. However, this confuses people into thinking that their hardware generates MCEs. Hijacking MCA for logging thermal events is a gross misuse anyway and it shouldn't have been done in the first place. And besides we have other means for dealing with thermal events which are much more suitable. So let's kill the MCE logging part. Signed-off-by: Borislav Petkov Acked-by: Ashok Raj Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: Yazen Ghannam Cc: linux-edac Link: http://lkml.kernel.org/r/20170105213846.GA12024@gmail.com Link: http://lkml.kernel.org/r/20170123183514.13356-3-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 6 ------ arch/x86/kernel/cpu/mcheck/mce.c | 25 ------------------------- arch/x86/kernel/cpu/mcheck/therm_throt.c | 30 +++++++++++------------------- 3 files changed, 11 insertions(+), 50 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 5132f2a6c0a2..a09ed05725c2 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -97,10 +97,6 @@ #define MCE_OVERFLOW 0 /* bit 0 in flags means overflow */ -/* Software defined banks */ -#define MCE_EXTENDED_BANK 128 -#define MCE_THERMAL_BANK (MCE_EXTENDED_BANK + 0) - #define MCE_LOG_LEN 32 #define MCE_LOG_SIGNATURE "MACHINECHECK" @@ -306,8 +302,6 @@ extern void (*deferred_error_int_vector)(void); void intel_init_thermal(struct cpuinfo_x86 *c); -void mce_log_therm_throt_event(__u64 status); - /* Interrupt Handler for core thermal thresholds */ extern int (*platform_thermal_notify)(__u64 msr_val); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 00ef43233e03..6eef6fde0f02 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1331,31 +1331,6 @@ static void mce_process_work(struct work_struct *dummy) mce_gen_pool_process(); } -#ifdef CONFIG_X86_MCE_INTEL -/*** - * mce_log_therm_throt_event - Logs the thermal throttling event to mcelog - * @cpu: The CPU on which the event occurred. - * @status: Event status information - * - * This function should be called by the thermal interrupt after the - * event has been processed and the decision was made to log the event - * further. - * - * The status parameter will be saved to the 'status' field of 'struct mce' - * and historically has been the register value of the - * MSR_IA32_THERMAL_STATUS (Intel) msr. - */ -void mce_log_therm_throt_event(__u64 status) -{ - struct mce m; - - mce_setup(&m); - m.bank = MCE_THERMAL_BANK; - m.status = status; - mce_log(&m); -} -#endif /* CONFIG_X86_MCE_INTEL */ - /* * Periodic polling timer for "silent" machine check errors. If the * poller finds an MCE, poll 2x faster. When the poller finds no more diff --git a/arch/x86/kernel/cpu/mcheck/therm_throt.c b/arch/x86/kernel/cpu/mcheck/therm_throt.c index 465aca8be009..85469f84c921 100644 --- a/arch/x86/kernel/cpu/mcheck/therm_throt.c +++ b/arch/x86/kernel/cpu/mcheck/therm_throt.c @@ -6,7 +6,7 @@ * * Maintains a counter in /sys that keeps track of the number of thermal * events, such that the user knows how bad the thermal problem might be - * (since the logging to syslog and mcelog is rate limited). + * (since the logging to syslog is rate limited). * * Author: Dmitriy Zavin (dmitriyz@google.com) * @@ -141,13 +141,8 @@ static struct attribute_group thermal_attr_group = { * IRQ has been acknowledged. * * It will take care of rate limiting and printing messages to the syslog. - * - * Returns: 0 : Event should NOT be further logged, i.e. still in - * "timeout" from previous log message. - * 1 : Event should be logged further, and a message has been - * printed to the syslog. */ -static int therm_throt_process(bool new_event, int event, int level) +static void therm_throt_process(bool new_event, int event, int level) { struct _thermal_state *state; unsigned int this_cpu = smp_processor_id(); @@ -162,16 +157,16 @@ static int therm_throt_process(bool new_event, int event, int level) else if (event == POWER_LIMIT_EVENT) state = &pstate->core_power_limit; else - return 0; + return; } else if (level == PACKAGE_LEVEL) { if (event == THERMAL_THROTTLING_EVENT) state = &pstate->package_throttle; else if (event == POWER_LIMIT_EVENT) state = &pstate->package_power_limit; else - return 0; + return; } else - return 0; + return; old_event = state->new_event; state->new_event = new_event; @@ -181,7 +176,7 @@ static int therm_throt_process(bool new_event, int event, int level) if (time_before64(now, state->next_check) && state->count != state->last_count) - return 0; + return; state->next_check = now + CHECK_INTERVAL; state->last_count = state->count; @@ -193,16 +188,14 @@ static int therm_throt_process(bool new_event, int event, int level) this_cpu, level == CORE_LEVEL ? "Core" : "Package", state->count); - return 1; + return; } if (old_event) { if (event == THERMAL_THROTTLING_EVENT) pr_info("CPU%d: %s temperature/speed normal\n", this_cpu, level == CORE_LEVEL ? "Core" : "Package"); - return 1; + return; } - - return 0; } static int thresh_event_valid(int level, int event) @@ -365,10 +358,9 @@ static void intel_thermal_interrupt(void) /* Check for violation of core thermal thresholds*/ notify_thresholds(msr_val); - if (therm_throt_process(msr_val & THERM_STATUS_PROCHOT, - THERMAL_THROTTLING_EVENT, - CORE_LEVEL) != 0) - mce_log_therm_throt_event(msr_val); + therm_throt_process(msr_val & THERM_STATUS_PROCHOT, + THERMAL_THROTTLING_EVENT, + CORE_LEVEL); if (this_cpu_has(X86_FEATURE_PLN) && int_pln_enable) therm_throt_process(msr_val & THERM_STATUS_POWER_LIMIT, -- cgit v1.2.3 From 0b737a9c2af85cc8295f9308d9250f9111bbf94d Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Mon, 23 Jan 2017 19:35:08 +0100 Subject: x86/ras/amd: Make sysfs names of banks more user-friendly Currently, we append the MCA_IPID[InstanceId] to the bank name to create the sysfs filename. The InstanceId field uniquely identifies a bank instance but it doesn't look very nice for most banks. Replace the InstanceId with a simpler, ascending (0, 1, ..) value. Only use this in the sysfs name when there is more than 1 instance. Otherwise, just use the bank's name as the sysfs name. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: linux-edac Link: http://lkml.kernel.org/r/1484322741-41884-3-git-send-email-Yazen.Ghannam@amd.com Link: http://lkml.kernel.org/r/20170123183514.13356-4-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 5 +++-- arch/x86/kernel/cpu/mcheck/mce_amd.c | 6 +++++- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index a09ed05725c2..528f6ec897cb 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -356,12 +356,13 @@ struct smca_hwid { unsigned int bank_type; /* Use with smca_bank_types for easy indexing. */ u32 hwid_mcatype; /* (hwid,mcatype) tuple */ u32 xec_bitmap; /* Bitmap of valid ExtErrorCodes; current max is 21. */ + u8 count; /* Number of instances. */ }; struct smca_bank { struct smca_hwid *hwid; - /* Instance ID */ - u32 id; + u32 id; /* Value of MCA_IPID[InstanceId]. */ + u8 sysfs_id; /* Value used for sysfs name. */ }; extern struct smca_bank smca_banks[MAX_NR_BANKS]; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index a5fd137417a2..776379e4a39c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -192,6 +192,7 @@ static void get_smca_bank_info(unsigned int bank) smca_banks[bank].hwid = s_hwid; smca_banks[bank].id = instance_id; + smca_banks[bank].sysfs_id = s_hwid->count++; break; } } @@ -1064,9 +1065,12 @@ static const char *get_name(unsigned int bank, struct threshold_block *b) return NULL; } + if (smca_banks[bank].hwid->count == 1) + return smca_get_name(bank_type); + snprintf(buf_mcatype, MAX_MCATYPE_NAME_LEN, "%s_%x", smca_get_name(bank_type), - smca_banks[bank].id); + smca_banks[bank].sysfs_id); return buf_mcatype; } -- cgit v1.2.3 From 669c00f09935fc7a22297eadee04536af141595b Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 23 Jan 2017 19:35:09 +0100 Subject: x86/ras: Flip the TSC-adding logic Add the TSC value to the MCE record only when the MCE being logged is precise, i.e., it is logged as an exception or an MCE-related interrupt. So it doesn't look particularly easy to do without touching/changing a bunch of places. That's why I'm trying tricks first. For example, the mce-apei.c case I'm addressing by setting ->tsc only for errors of panic severity. The idea there is, that, panic errors will have raised an #MC and not polled. And then instead of propagating a flag to mce_setup(), it seems easier/less code to set ->tsc depending on the call sites, i.e., are we polling or are we preparing an MCE record in an exception handler/thresholding interrupt. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: Yazen Ghannam Cc: linux-edac Link: http://lkml.kernel.org/r/20170123183514.13356-5-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce-apei.c | 5 ++++- arch/x86/kernel/cpu/mcheck/mce.c | 12 +++--------- arch/x86/kernel/cpu/mcheck/mce_amd.c | 3 ++- 3 files changed, 9 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-apei.c b/arch/x86/kernel/cpu/mcheck/mce-apei.c index 83f1a98d37db..2eee85379689 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-apei.c +++ b/arch/x86/kernel/cpu/mcheck/mce-apei.c @@ -52,8 +52,11 @@ void apei_mce_report_mem_error(int severity, struct cper_sec_mem_err *mem_err) if (severity >= GHES_SEV_RECOVERABLE) m.status |= MCI_STATUS_UC; - if (severity >= GHES_SEV_PANIC) + + if (severity >= GHES_SEV_PANIC) { m.status |= MCI_STATUS_PCC; + m.tsc = rdtsc(); + } m.addr = mem_err->physical_addr; mce_log(&m); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 6eef6fde0f02..ca15a7e1f97d 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -128,7 +128,6 @@ void mce_setup(struct mce *m) { memset(m, 0, sizeof(struct mce)); m->cpu = m->extcpu = smp_processor_id(); - m->tsc = rdtsc(); /* We hope get_seconds stays lockless */ m->time = get_seconds(); m->cpuvendor = boot_cpu_data.x86_vendor; @@ -710,14 +709,8 @@ bool machine_check_poll(enum mcp_flags flags, mce_banks_t *b) mce_gather_info(&m, NULL); - /* - * m.tsc was set in mce_setup(). Clear it if not requested. - * - * FIXME: Propagate @flags to mce_gather_info/mce_setup() to avoid - * that dance. - */ - if (!(flags & MCP_TIMESTAMP)) - m.tsc = 0; + if (flags & MCP_TIMESTAMP) + m.tsc = rdtsc(); for (i = 0; i < mca_cfg.banks; i++) { if (!mce_banks[i].ctl || !test_bit(i, *b)) @@ -1156,6 +1149,7 @@ void do_machine_check(struct pt_regs *regs, long error_code) goto out; mce_gather_info(&m, regs); + m.tsc = rdtsc(); final = this_cpu_ptr(&mces_seen); *final = m; diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 776379e4a39c..9e5427df3243 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -778,7 +778,8 @@ __log_error(unsigned int bank, bool deferred_err, bool threshold_err, u64 misc) mce_setup(&m); m.status = status; - m.bank = bank; + m.bank = bank; + m.tsc = rdtsc(); if (threshold_err) m.misc = misc; -- cgit v1.2.3 From bd43f60a260c83cbc9befd7d710a3f2bfd3b2dd2 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 23 Jan 2017 19:35:10 +0100 Subject: x86/ras/amd/inj: Change dependency Change dependency to mce.c as we're using mce_inject_log() now to stick an MCE into the MCA subsystem. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: Yazen Ghannam Cc: linux-edac Link: http://lkml.kernel.org/r/20170123183514.13356-6-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/ras/Kconfig | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/ras/Kconfig b/arch/x86/ras/Kconfig index d957d5f21a86..0bc60a308730 100644 --- a/arch/x86/ras/Kconfig +++ b/arch/x86/ras/Kconfig @@ -1,6 +1,6 @@ config MCE_AMD_INJ tristate "Simple MCE injection interface for AMD processors" - depends on RAS && EDAC_DECODE_MCE && DEBUG_FS && AMD_NB + depends on RAS && X86_MCE && DEBUG_FS && AMD_NB default n help This is a simple debugfs interface to inject MCEs and test different -- cgit v1.2.3 From cff4c0391a692cf9b89932c62a7f879fb3637148 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 23 Jan 2017 19:35:13 +0100 Subject: x86/ras: Get rid of mce_process_work() Make mce_gen_pool_process() the workqueue function directly and save us an indirection. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Tony Luck Cc: Yazen Ghannam Cc: linux-edac Link: http://lkml.kernel.org/r/20170123183514.13356-9-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce-genpool.c | 2 +- arch/x86/kernel/cpu/mcheck/mce-internal.h | 2 +- arch/x86/kernel/cpu/mcheck/mce.c | 12 +----------- 3 files changed, 3 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce-genpool.c b/arch/x86/kernel/cpu/mcheck/mce-genpool.c index 93d824ec3120..1e5a50c11d3c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-genpool.c +++ b/arch/x86/kernel/cpu/mcheck/mce-genpool.c @@ -72,7 +72,7 @@ struct llist_node *mce_gen_pool_prepare_records(void) return new_head.first; } -void mce_gen_pool_process(void) +void mce_gen_pool_process(struct work_struct *__unused) { struct llist_node *head; struct mce_evt_llist *node, *tmp; diff --git a/arch/x86/kernel/cpu/mcheck/mce-internal.h b/arch/x86/kernel/cpu/mcheck/mce-internal.h index cd74a3f00aea..903043e6a62b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce-internal.h +++ b/arch/x86/kernel/cpu/mcheck/mce-internal.h @@ -31,7 +31,7 @@ struct mce_evt_llist { struct mce mce; }; -void mce_gen_pool_process(void); +void mce_gen_pool_process(struct work_struct *__unused); bool mce_gen_pool_empty(void); int mce_gen_pool_add(struct mce *mce); int mce_gen_pool_init(void); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index ca15a7e1f97d..0fef5406f0eb 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1315,16 +1315,6 @@ int memory_failure(unsigned long pfn, int vector, int flags) } #endif -/* - * Action optional processing happens here (picking up - * from the list of faulting pages that do_machine_check() - * placed into the genpool). - */ -static void mce_process_work(struct work_struct *dummy) -{ - mce_gen_pool_process(); -} - /* * Periodic polling timer for "silent" machine check errors. If the * poller finds an MCE, poll 2x faster. When the poller finds no more @@ -2165,7 +2155,7 @@ int __init mcheck_init(void) mce_register_decode_chain(&mce_default_nb); mcheck_vendor_init_severity(); - INIT_WORK(&mce_work, mce_process_work); + INIT_WORK(&mce_work, mce_gen_pool_process); init_irq_work(&mce_irq_work, mce_irq_work_cb); return 0; -- cgit v1.2.3 From 9026cc82b632ed1a859935c82ed8ad65f27f2781 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 23 Jan 2017 19:35:14 +0100 Subject: x86/ras, EDAC, acpi: Assign MCE notifier handlers a priority Assign all notifiers on the MCE decode chain a priority so that they get called in the correct order. Suggested-by: Thomas Gleixner Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Tony Luck Cc: Yazen Ghannam Cc: linux-edac Link: http://lkml.kernel.org/r/20170123183514.13356-10-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mce.h | 9 +++++++++ arch/x86/kernel/cpu/mcheck/mce.c | 8 +++----- drivers/acpi/acpi_extlog.c | 1 + drivers/acpi/nfit/mce.c | 1 + drivers/edac/i7core_edac.c | 1 + drivers/edac/mce_amd.c | 1 + drivers/edac/sb_edac.c | 3 ++- drivers/edac/skx_edac.c | 3 ++- 8 files changed, 20 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h index 528f6ec897cb..e63873683d4a 100644 --- a/arch/x86/include/asm/mce.h +++ b/arch/x86/include/asm/mce.h @@ -189,6 +189,15 @@ extern struct mce_vendor_flags mce_flags; extern struct mca_config mca_cfg; extern struct mca_msr_regs msr_ops; + +enum mce_notifier_prios { + MCE_PRIO_SRAO = INT_MAX, + MCE_PRIO_EXTLOG = INT_MAX - 1, + MCE_PRIO_NFIT = INT_MAX - 2, + MCE_PRIO_EDAC = INT_MAX - 3, + MCE_PRIO_LOWEST = 0, +}; + extern void mce_register_decode_chain(struct notifier_block *nb); extern void mce_unregister_decode_chain(struct notifier_block *nb); diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 0fef5406f0eb..e39bbc0e7c8b 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -216,9 +216,7 @@ void mce_register_decode_chain(struct notifier_block *nb) { atomic_inc(&num_notifiers); - /* Ensure SRAO notifier has the highest priority in the decode chain. */ - if (nb != &mce_srao_nb && nb->priority == INT_MAX) - nb->priority -= 1; + WARN_ON(nb->priority > MCE_PRIO_LOWEST && nb->priority < MCE_PRIO_EDAC); atomic_notifier_chain_register(&x86_mce_decoder_chain, nb); } @@ -582,7 +580,7 @@ static int srao_decode_notifier(struct notifier_block *nb, unsigned long val, } static struct notifier_block mce_srao_nb = { .notifier_call = srao_decode_notifier, - .priority = INT_MAX, + .priority = MCE_PRIO_SRAO, }; static int mce_default_notifier(struct notifier_block *nb, unsigned long val, @@ -608,7 +606,7 @@ static int mce_default_notifier(struct notifier_block *nb, unsigned long val, static struct notifier_block mce_default_nb = { .notifier_call = mce_default_notifier, /* lowest prio, we want it to run last. */ - .priority = 0, + .priority = MCE_PRIO_LOWEST, }; /* diff --git a/drivers/acpi/acpi_extlog.c b/drivers/acpi/acpi_extlog.c index b3842ffc19ba..a15270a806fc 100644 --- a/drivers/acpi/acpi_extlog.c +++ b/drivers/acpi/acpi_extlog.c @@ -212,6 +212,7 @@ static bool __init extlog_get_l1addr(void) } static struct notifier_block extlog_mce_dec = { .notifier_call = extlog_print, + .priority = MCE_PRIO_EXTLOG, }; static int __init extlog_init(void) diff --git a/drivers/acpi/nfit/mce.c b/drivers/acpi/nfit/mce.c index e5ce81c38eed..3ba1c3472cf9 100644 --- a/drivers/acpi/nfit/mce.c +++ b/drivers/acpi/nfit/mce.c @@ -90,6 +90,7 @@ static int nfit_handle_mce(struct notifier_block *nb, unsigned long val, static struct notifier_block nfit_mce_dec = { .notifier_call = nfit_handle_mce, + .priority = MCE_PRIO_NFIT, }; void nfit_mce_register(void) diff --git a/drivers/edac/i7core_edac.c b/drivers/edac/i7core_edac.c index 69b5adead0ad..75ad847593b7 100644 --- a/drivers/edac/i7core_edac.c +++ b/drivers/edac/i7core_edac.c @@ -1835,6 +1835,7 @@ static int i7core_mce_check_error(struct notifier_block *nb, unsigned long val, static struct notifier_block i7_mce_dec = { .notifier_call = i7core_mce_check_error, + .priority = MCE_PRIO_EDAC, }; struct memdev_dmi_entry { diff --git a/drivers/edac/mce_amd.c b/drivers/edac/mce_amd.c index ecad750fd090..0d9bc25543d8 100644 --- a/drivers/edac/mce_amd.c +++ b/drivers/edac/mce_amd.c @@ -1054,6 +1054,7 @@ amd_decode_mce(struct notifier_block *nb, unsigned long val, void *data) static struct notifier_block amd_mce_dec_nb = { .notifier_call = amd_decode_mce, + .priority = MCE_PRIO_EDAC, }; static int __init mce_amd_init(void) diff --git a/drivers/edac/sb_edac.c b/drivers/edac/sb_edac.c index 54ae6dc45ab2..c585a014dd3d 100644 --- a/drivers/edac/sb_edac.c +++ b/drivers/edac/sb_edac.c @@ -3136,7 +3136,8 @@ static int sbridge_mce_check_error(struct notifier_block *nb, unsigned long val, } static struct notifier_block sbridge_mce_dec = { - .notifier_call = sbridge_mce_check_error, + .notifier_call = sbridge_mce_check_error, + .priority = MCE_PRIO_EDAC, }; /**************************************************************************** diff --git a/drivers/edac/skx_edac.c b/drivers/edac/skx_edac.c index 79ef675e4d6f..1159dba4671f 100644 --- a/drivers/edac/skx_edac.c +++ b/drivers/edac/skx_edac.c @@ -1007,7 +1007,8 @@ static int skx_mce_check_error(struct notifier_block *nb, unsigned long val, } static struct notifier_block skx_mce_dec = { - .notifier_call = skx_mce_check_error, + .notifier_call = skx_mce_check_error, + .priority = MCE_PRIO_EDAC, }; static void skx_remove(void) -- cgit v1.2.3 From 5299709d0a87342dadc1fc9850484fadeb488bf8 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 20 Jan 2017 13:04:01 -0800 Subject: treewide: Constify most dma_map_ops structures Most dma_map_ops structures are never modified. Constify these structures such that these can be write-protected. This patch has been generated as follows: git grep -l 'struct dma_map_ops' | xargs -d\\n sed -i \ -e 's/struct dma_map_ops/const struct dma_map_ops/g' \ -e 's/const struct dma_map_ops {/struct dma_map_ops {/g' \ -e 's/^const struct dma_map_ops;$/struct dma_map_ops;/' \ -e 's/const const struct dma_map_ops /const struct dma_map_ops /g'; sed -i -e 's/const \(struct dma_map_ops intel_dma_ops\)/\1/' \ $(git grep -l 'struct dma_map_ops intel_dma_ops'); sed -i -e 's/const \(struct dma_map_ops dma_iommu_ops\)/\1/' \ $(git grep -l 'struct dma_map_ops' | grep ^arch/powerpc); sed -i -e '/^struct vmd_dev {$/,/^};$/ s/const \(struct dma_map_ops[[:blank:]]dma_ops;\)/\1/' \ -e '/^static void vmd_setup_dma_ops/,/^}$/ s/const \(struct dma_map_ops \*dest\)/\1/' \ -e 's/const \(struct dma_map_ops \*dest = \&vmd->dma_ops\)/\1/' \ drivers/pci/host/*.c sed -i -e '/^void __init pci_iommu_alloc(void)$/,/^}$/ s/dma_ops->/intel_dma_ops./' arch/ia64/kernel/pci-dma.c sed -i -e 's/static const struct dma_map_ops sn_dma_ops/static struct dma_map_ops sn_dma_ops/' arch/ia64/sn/pci/pci_dma.c sed -i -e 's/(const struct dma_map_ops \*)//' drivers/misc/mic/bus/vop_bus.c Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: David Woodhouse Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Russell King Cc: x86@kernel.org Signed-off-by: Doug Ledford --- arch/alpha/include/asm/dma-mapping.h | 4 +-- arch/alpha/kernel/pci-noop.c | 4 +-- arch/alpha/kernel/pci_iommu.c | 4 +-- arch/arc/include/asm/dma-mapping.h | 4 +-- arch/arc/mm/dma.c | 2 +- arch/arm/common/dmabounce.c | 2 +- arch/arm/include/asm/device.h | 2 +- arch/arm/include/asm/dma-mapping.h | 10 +++--- arch/arm/mm/dma-mapping.c | 22 ++++++------ arch/arm/xen/mm.c | 4 +-- arch/arm64/include/asm/device.h | 2 +- arch/arm64/include/asm/dma-mapping.h | 6 ++-- arch/arm64/mm/dma-mapping.c | 6 ++-- arch/avr32/include/asm/dma-mapping.h | 4 +-- arch/avr32/mm/dma-coherent.c | 2 +- arch/blackfin/include/asm/dma-mapping.h | 4 +-- arch/blackfin/kernel/dma-mapping.c | 2 +- arch/c6x/include/asm/dma-mapping.h | 4 +-- arch/c6x/kernel/dma.c | 2 +- arch/cris/arch-v32/drivers/pci/dma.c | 2 +- arch/cris/include/asm/dma-mapping.h | 6 ++-- arch/frv/include/asm/dma-mapping.h | 4 +-- arch/frv/mb93090-mb00/pci-dma-nommu.c | 2 +- arch/frv/mb93090-mb00/pci-dma.c | 2 +- arch/h8300/include/asm/dma-mapping.h | 4 +-- arch/h8300/kernel/dma.c | 2 +- arch/hexagon/include/asm/dma-mapping.h | 4 +-- arch/hexagon/kernel/dma.c | 4 +-- arch/ia64/hp/common/hwsw_iommu.c | 4 +-- arch/ia64/hp/common/sba_iommu.c | 4 +-- arch/ia64/include/asm/dma-mapping.h | 2 +- arch/ia64/include/asm/machvec.h | 4 +-- arch/ia64/kernel/dma-mapping.c | 4 +-- arch/ia64/kernel/pci-dma.c | 10 +++--- arch/ia64/kernel/pci-swiotlb.c | 2 +- arch/m32r/include/asm/device.h | 2 +- arch/m32r/include/asm/dma-mapping.h | 2 +- arch/m68k/include/asm/dma-mapping.h | 4 +-- arch/m68k/kernel/dma.c | 2 +- arch/metag/include/asm/dma-mapping.h | 4 +-- arch/metag/kernel/dma.c | 2 +- arch/microblaze/include/asm/dma-mapping.h | 4 +-- arch/microblaze/kernel/dma.c | 2 +- arch/mips/cavium-octeon/dma-octeon.c | 4 +-- arch/mips/include/asm/device.h | 2 +- arch/mips/include/asm/dma-mapping.h | 4 +-- .../include/asm/mach-cavium-octeon/dma-coherence.h | 2 +- arch/mips/include/asm/netlogic/common.h | 2 +- arch/mips/loongson64/common/dma-swiotlb.c | 2 +- arch/mips/mm/dma-default.c | 4 +-- arch/mips/netlogic/common/nlm-dma.c | 2 +- arch/mn10300/include/asm/dma-mapping.h | 4 +-- arch/mn10300/mm/dma-alloc.c | 2 +- arch/nios2/include/asm/dma-mapping.h | 4 +-- arch/nios2/mm/dma-mapping.c | 2 +- arch/openrisc/include/asm/dma-mapping.h | 4 +-- arch/openrisc/kernel/dma.c | 2 +- arch/parisc/include/asm/dma-mapping.h | 8 ++--- arch/parisc/kernel/drivers.c | 2 +- arch/parisc/kernel/pci-dma.c | 4 +-- arch/powerpc/include/asm/device.h | 2 +- arch/powerpc/include/asm/dma-mapping.h | 6 ++-- arch/powerpc/include/asm/pci.h | 4 +-- arch/powerpc/include/asm/swiotlb.h | 2 +- arch/powerpc/kernel/dma-swiotlb.c | 2 +- arch/powerpc/kernel/dma.c | 6 ++-- arch/powerpc/kernel/pci-common.c | 6 ++-- arch/powerpc/platforms/cell/iommu.c | 4 +-- arch/powerpc/platforms/powernv/npu-dma.c | 2 +- arch/powerpc/platforms/ps3/system-bus.c | 4 +-- arch/powerpc/platforms/pseries/ibmebus.c | 2 +- arch/powerpc/platforms/pseries/vio.c | 2 +- arch/s390/include/asm/device.h | 2 +- arch/s390/include/asm/dma-mapping.h | 4 +-- arch/s390/pci/pci_dma.c | 2 +- arch/sh/include/asm/dma-mapping.h | 4 +-- arch/sh/kernel/dma-nommu.c | 2 +- arch/sh/mm/consistent.c | 2 +- arch/sparc/include/asm/dma-mapping.h | 8 ++--- arch/sparc/kernel/iommu.c | 4 +-- arch/sparc/kernel/ioport.c | 8 ++--- arch/sparc/kernel/pci_sun4v.c | 2 +- arch/tile/include/asm/device.h | 2 +- arch/tile/include/asm/dma-mapping.h | 12 +++---- arch/tile/kernel/pci-dma.c | 24 ++++++------- arch/unicore32/include/asm/dma-mapping.h | 4 +-- arch/unicore32/mm/dma-swiotlb.c | 2 +- arch/x86/include/asm/device.h | 4 +-- arch/x86/include/asm/dma-mapping.h | 4 +-- arch/x86/include/asm/iommu.h | 2 +- arch/x86/kernel/amd_gart_64.c | 2 +- arch/x86/kernel/pci-calgary_64.c | 2 +- arch/x86/kernel/pci-dma.c | 4 +-- arch/x86/kernel/pci-nommu.c | 2 +- arch/x86/kernel/pci-swiotlb.c | 2 +- arch/x86/pci/sta2x11-fixup.c | 2 +- arch/x86/xen/pci-swiotlb-xen.c | 2 +- arch/xtensa/include/asm/device.h | 2 +- arch/xtensa/include/asm/dma-mapping.h | 4 +-- arch/xtensa/kernel/pci-dma.c | 2 +- drivers/iommu/amd_iommu.c | 4 +-- drivers/misc/mic/bus/mic_bus.c | 2 +- drivers/misc/mic/bus/scif_bus.c | 2 +- drivers/misc/mic/bus/scif_bus.h | 2 +- drivers/misc/mic/bus/vop_bus.c | 2 +- drivers/misc/mic/host/mic_boot.c | 4 +-- drivers/parisc/ccio-dma.c | 2 +- drivers/parisc/sba_iommu.c | 2 +- drivers/pci/host/vmd.c | 2 +- include/linux/dma-mapping.h | 42 +++++++++++----------- include/linux/mic_bus.h | 2 +- include/xen/arm/hypervisor.h | 2 +- lib/dma-noop.c | 2 +- 113 files changed, 227 insertions(+), 227 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h index c63b6ac19ee5..d3480562411d 100644 --- a/arch/alpha/include/asm/dma-mapping.h +++ b/arch/alpha/include/asm/dma-mapping.h @@ -1,9 +1,9 @@ #ifndef _ALPHA_DMA_MAPPING_H #define _ALPHA_DMA_MAPPING_H -extern struct dma_map_ops *dma_ops; +extern const struct dma_map_ops *dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return dma_ops; } diff --git a/arch/alpha/kernel/pci-noop.c b/arch/alpha/kernel/pci-noop.c index bb152e21e5ae..ffbdb3fb672f 100644 --- a/arch/alpha/kernel/pci-noop.c +++ b/arch/alpha/kernel/pci-noop.c @@ -128,7 +128,7 @@ static int alpha_noop_supported(struct device *dev, u64 mask) return mask < 0x00ffffffUL ? 0 : 1; } -struct dma_map_ops alpha_noop_ops = { +const struct dma_map_ops alpha_noop_ops = { .alloc = alpha_noop_alloc_coherent, .free = dma_noop_free_coherent, .map_page = dma_noop_map_page, @@ -137,5 +137,5 @@ struct dma_map_ops alpha_noop_ops = { .dma_supported = alpha_noop_supported, }; -struct dma_map_ops *dma_ops = &alpha_noop_ops; +const struct dma_map_ops *dma_ops = &alpha_noop_ops; EXPORT_SYMBOL(dma_ops); diff --git a/arch/alpha/kernel/pci_iommu.c b/arch/alpha/kernel/pci_iommu.c index 451fc9cdd323..7fd2329038a3 100644 --- a/arch/alpha/kernel/pci_iommu.c +++ b/arch/alpha/kernel/pci_iommu.c @@ -939,7 +939,7 @@ static int alpha_pci_mapping_error(struct device *dev, dma_addr_t dma_addr) return dma_addr == 0; } -struct dma_map_ops alpha_pci_ops = { +const struct dma_map_ops alpha_pci_ops = { .alloc = alpha_pci_alloc_coherent, .free = alpha_pci_free_coherent, .map_page = alpha_pci_map_page, @@ -950,5 +950,5 @@ struct dma_map_ops alpha_pci_ops = { .dma_supported = alpha_pci_supported, }; -struct dma_map_ops *dma_ops = &alpha_pci_ops; +const struct dma_map_ops *dma_ops = &alpha_pci_ops; EXPORT_SYMBOL(dma_ops); diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h index 266f11c9bd59..fdff3aa60052 100644 --- a/arch/arc/include/asm/dma-mapping.h +++ b/arch/arc/include/asm/dma-mapping.h @@ -18,9 +18,9 @@ #include #endif -extern struct dma_map_ops arc_dma_ops; +extern const struct dma_map_ops arc_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &arc_dma_ops; } diff --git a/arch/arc/mm/dma.c b/arch/arc/mm/dma.c index 08450a1a5b5f..2a07e6ecafbd 100644 --- a/arch/arc/mm/dma.c +++ b/arch/arc/mm/dma.c @@ -218,7 +218,7 @@ static int arc_dma_supported(struct device *dev, u64 dma_mask) return dma_mask == DMA_BIT_MASK(32); } -struct dma_map_ops arc_dma_ops = { +const struct dma_map_ops arc_dma_ops = { .alloc = arc_dma_alloc, .free = arc_dma_free, .mmap = arc_dma_mmap, diff --git a/arch/arm/common/dmabounce.c b/arch/arm/common/dmabounce.c index 75055df1cda3..9b1b7be2ec0e 100644 --- a/arch/arm/common/dmabounce.c +++ b/arch/arm/common/dmabounce.c @@ -452,7 +452,7 @@ static int dmabounce_set_mask(struct device *dev, u64 dma_mask) return arm_dma_ops.set_dma_mask(dev, dma_mask); } -static struct dma_map_ops dmabounce_ops = { +static const struct dma_map_ops dmabounce_ops = { .alloc = arm_dma_alloc, .free = arm_dma_free, .mmap = arm_dma_mmap, diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index 4111592f0130..d8a572f9c187 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -7,7 +7,7 @@ #define ASMARM_DEVICE_H struct dev_archdata { - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; #ifdef CONFIG_DMABOUNCE struct dmabounce_device_info *dmabounce; #endif diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index bf02dbd9ccda..1aabd781306f 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -13,17 +13,17 @@ #include #define DMA_ERROR_CODE (~(dma_addr_t)0x0) -extern struct dma_map_ops arm_dma_ops; -extern struct dma_map_ops arm_coherent_dma_ops; +extern const struct dma_map_ops arm_dma_ops; +extern const struct dma_map_ops arm_coherent_dma_ops; -static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) +static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) { if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; return &arm_dma_ops; } -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (xen_initial_domain()) return xen_dma_ops; @@ -31,7 +31,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return __generic_dma_ops(dev); } -static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *ops) { BUG_ON(!dev); dev->archdata.dma_ops = ops; diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index ab7710002ba6..d26fe1a35687 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -180,7 +180,7 @@ static void arm_dma_sync_single_for_device(struct device *dev, __dma_page_cpu_to_dev(page, offset, size, dir); } -struct dma_map_ops arm_dma_ops = { +const struct dma_map_ops arm_dma_ops = { .alloc = arm_dma_alloc, .free = arm_dma_free, .mmap = arm_dma_mmap, @@ -204,7 +204,7 @@ static int arm_coherent_dma_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs); -struct dma_map_ops arm_coherent_dma_ops = { +const struct dma_map_ops arm_coherent_dma_ops = { .alloc = arm_coherent_dma_alloc, .free = arm_coherent_dma_free, .mmap = arm_coherent_dma_mmap, @@ -1067,7 +1067,7 @@ static void __dma_page_dev_to_cpu(struct page *page, unsigned long off, int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i, j; @@ -1101,7 +1101,7 @@ int arm_dma_map_sg(struct device *dev, struct scatterlist *sg, int nents, void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i; @@ -1120,7 +1120,7 @@ void arm_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i; @@ -1139,7 +1139,7 @@ void arm_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, void arm_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); struct scatterlist *s; int i; @@ -2099,7 +2099,7 @@ static void arm_iommu_sync_single_for_device(struct device *dev, __dma_page_cpu_to_dev(page, offset, size, dir); } -struct dma_map_ops iommu_ops = { +const struct dma_map_ops iommu_ops = { .alloc = arm_iommu_alloc_attrs, .free = arm_iommu_free_attrs, .mmap = arm_iommu_mmap_attrs, @@ -2119,7 +2119,7 @@ struct dma_map_ops iommu_ops = { .unmap_resource = arm_iommu_unmap_resource, }; -struct dma_map_ops iommu_coherent_ops = { +const struct dma_map_ops iommu_coherent_ops = { .alloc = arm_coherent_iommu_alloc_attrs, .free = arm_coherent_iommu_free_attrs, .mmap = arm_coherent_iommu_mmap_attrs, @@ -2319,7 +2319,7 @@ void arm_iommu_detach_device(struct device *dev) } EXPORT_SYMBOL_GPL(arm_iommu_detach_device); -static struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent) +static const struct dma_map_ops *arm_get_iommu_dma_map_ops(bool coherent) { return coherent ? &iommu_coherent_ops : &iommu_ops; } @@ -2374,7 +2374,7 @@ static void arm_teardown_iommu_dma_ops(struct device *dev) { } #endif /* CONFIG_ARM_DMA_USE_IOMMU */ -static struct dma_map_ops *arm_get_dma_map_ops(bool coherent) +static const struct dma_map_ops *arm_get_dma_map_ops(bool coherent) { return coherent ? &arm_coherent_dma_ops : &arm_dma_ops; } @@ -2382,7 +2382,7 @@ static struct dma_map_ops *arm_get_dma_map_ops(bool coherent) void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; dev->archdata.dma_coherent = coherent; if (arm_setup_iommu_dma_ops(dev, dma_base, size, iommu)) diff --git a/arch/arm/xen/mm.c b/arch/arm/xen/mm.c index bd62d94f8ac5..ce18c91b50a1 100644 --- a/arch/arm/xen/mm.c +++ b/arch/arm/xen/mm.c @@ -182,10 +182,10 @@ void xen_destroy_contiguous_region(phys_addr_t pstart, unsigned int order) } EXPORT_SYMBOL_GPL(xen_destroy_contiguous_region); -struct dma_map_ops *xen_dma_ops; +const struct dma_map_ops *xen_dma_ops; EXPORT_SYMBOL(xen_dma_ops); -static struct dma_map_ops xen_swiotlb_dma_ops = { +static const struct dma_map_ops xen_swiotlb_dma_ops = { .alloc = xen_swiotlb_alloc_coherent, .free = xen_swiotlb_free_coherent, .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 243ef256b8c9..00c678cc31e1 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -17,7 +17,7 @@ #define __ASM_DEVICE_H struct dev_archdata { - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; #ifdef CONFIG_IOMMU_API void *iommu; /* private IOMMU data */ #endif diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index ccea82c2b089..1fedb43be712 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -25,9 +25,9 @@ #include #define DMA_ERROR_CODE (~(dma_addr_t)0) -extern struct dma_map_ops dummy_dma_ops; +extern const struct dma_map_ops dummy_dma_ops; -static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) +static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) { if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; @@ -39,7 +39,7 @@ static inline struct dma_map_ops *__generic_dma_ops(struct device *dev) return &dummy_dma_ops; } -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (xen_initial_domain()) return xen_dma_ops; diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index e04082700bb1..bcef6368d48f 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -352,7 +352,7 @@ static int __swiotlb_dma_supported(struct device *hwdev, u64 mask) return 1; } -static struct dma_map_ops swiotlb_dma_ops = { +static const struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_alloc, .free = __dma_free, .mmap = __swiotlb_mmap, @@ -505,7 +505,7 @@ static int __dummy_dma_supported(struct device *hwdev, u64 mask) return 0; } -struct dma_map_ops dummy_dma_ops = { +const struct dma_map_ops dummy_dma_ops = { .alloc = __dummy_alloc, .free = __dummy_free, .mmap = __dummy_mmap, @@ -784,7 +784,7 @@ static void __iommu_unmap_sg_attrs(struct device *dev, iommu_dma_unmap_sg(dev, sgl, nelems, dir, attrs); } -static struct dma_map_ops iommu_dma_ops = { +static const struct dma_map_ops iommu_dma_ops = { .alloc = __iommu_alloc_attrs, .free = __iommu_free_attrs, .mmap = __iommu_mmap_attrs, diff --git a/arch/avr32/include/asm/dma-mapping.h b/arch/avr32/include/asm/dma-mapping.h index 1115f2a645d1..b2b43c0e0774 100644 --- a/arch/avr32/include/asm/dma-mapping.h +++ b/arch/avr32/include/asm/dma-mapping.h @@ -4,9 +4,9 @@ extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, int direction); -extern struct dma_map_ops avr32_dma_ops; +extern const struct dma_map_ops avr32_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &avr32_dma_ops; } diff --git a/arch/avr32/mm/dma-coherent.c b/arch/avr32/mm/dma-coherent.c index 54534e5d0781..555222d4f414 100644 --- a/arch/avr32/mm/dma-coherent.c +++ b/arch/avr32/mm/dma-coherent.c @@ -191,7 +191,7 @@ static void avr32_dma_sync_sg_for_device(struct device *dev, dma_cache_sync(dev, sg_virt(sg), sg->length, direction); } -struct dma_map_ops avr32_dma_ops = { +const struct dma_map_ops avr32_dma_ops = { .alloc = avr32_dma_alloc, .free = avr32_dma_free, .map_page = avr32_dma_map_page, diff --git a/arch/blackfin/include/asm/dma-mapping.h b/arch/blackfin/include/asm/dma-mapping.h index 3490570aaa82..320fb50fbd41 100644 --- a/arch/blackfin/include/asm/dma-mapping.h +++ b/arch/blackfin/include/asm/dma-mapping.h @@ -36,9 +36,9 @@ _dma_sync(dma_addr_t addr, size_t size, enum dma_data_direction dir) __dma_sync(addr, size, dir); } -extern struct dma_map_ops bfin_dma_ops; +extern const struct dma_map_ops bfin_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &bfin_dma_ops; } diff --git a/arch/blackfin/kernel/dma-mapping.c b/arch/blackfin/kernel/dma-mapping.c index a27a74a18fb0..477bb29a7987 100644 --- a/arch/blackfin/kernel/dma-mapping.c +++ b/arch/blackfin/kernel/dma-mapping.c @@ -159,7 +159,7 @@ static inline void bfin_dma_sync_single_for_device(struct device *dev, _dma_sync(handle, size, dir); } -struct dma_map_ops bfin_dma_ops = { +const struct dma_map_ops bfin_dma_ops = { .alloc = bfin_dma_alloc, .free = bfin_dma_free, diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h index 5717b1e52d96..88258b9ebc8e 100644 --- a/arch/c6x/include/asm/dma-mapping.h +++ b/arch/c6x/include/asm/dma-mapping.h @@ -17,9 +17,9 @@ */ #define DMA_ERROR_CODE ~0 -extern struct dma_map_ops c6x_dma_ops; +extern const struct dma_map_ops c6x_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &c6x_dma_ops; } diff --git a/arch/c6x/kernel/dma.c b/arch/c6x/kernel/dma.c index 6752df32ef06..9fff8be75f58 100644 --- a/arch/c6x/kernel/dma.c +++ b/arch/c6x/kernel/dma.c @@ -123,7 +123,7 @@ static void c6x_dma_sync_sg_for_device(struct device *dev, } -struct dma_map_ops c6x_dma_ops = { +const struct dma_map_ops c6x_dma_ops = { .alloc = c6x_dma_alloc, .free = c6x_dma_free, .map_page = c6x_dma_map_page, diff --git a/arch/cris/arch-v32/drivers/pci/dma.c b/arch/cris/arch-v32/drivers/pci/dma.c index 1f0636793f0c..7072341995ff 100644 --- a/arch/cris/arch-v32/drivers/pci/dma.c +++ b/arch/cris/arch-v32/drivers/pci/dma.c @@ -69,7 +69,7 @@ static inline int v32_dma_supported(struct device *dev, u64 mask) return 1; } -struct dma_map_ops v32_dma_ops = { +const struct dma_map_ops v32_dma_ops = { .alloc = v32_dma_alloc, .free = v32_dma_free, .map_page = v32_dma_map_page, diff --git a/arch/cris/include/asm/dma-mapping.h b/arch/cris/include/asm/dma-mapping.h index 5a370178a0e9..aae4fbc0a656 100644 --- a/arch/cris/include/asm/dma-mapping.h +++ b/arch/cris/include/asm/dma-mapping.h @@ -2,14 +2,14 @@ #define _ASM_CRIS_DMA_MAPPING_H #ifdef CONFIG_PCI -extern struct dma_map_ops v32_dma_ops; +extern const struct dma_map_ops v32_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &v32_dma_ops; } #else -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { BUG(); return NULL; diff --git a/arch/frv/include/asm/dma-mapping.h b/arch/frv/include/asm/dma-mapping.h index 9a82bfa4303b..150cc00544a8 100644 --- a/arch/frv/include/asm/dma-mapping.h +++ b/arch/frv/include/asm/dma-mapping.h @@ -7,9 +7,9 @@ extern unsigned long __nongprelbss dma_coherent_mem_start; extern unsigned long __nongprelbss dma_coherent_mem_end; -extern struct dma_map_ops frv_dma_ops; +extern const struct dma_map_ops frv_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &frv_dma_ops; } diff --git a/arch/frv/mb93090-mb00/pci-dma-nommu.c b/arch/frv/mb93090-mb00/pci-dma-nommu.c index 187688128c65..4a96de7f0af4 100644 --- a/arch/frv/mb93090-mb00/pci-dma-nommu.c +++ b/arch/frv/mb93090-mb00/pci-dma-nommu.c @@ -164,7 +164,7 @@ static int frv_dma_supported(struct device *dev, u64 mask) return 1; } -struct dma_map_ops frv_dma_ops = { +const struct dma_map_ops frv_dma_ops = { .alloc = frv_dma_alloc, .free = frv_dma_free, .map_page = frv_dma_map_page, diff --git a/arch/frv/mb93090-mb00/pci-dma.c b/arch/frv/mb93090-mb00/pci-dma.c index dba7df918144..e7130abc0dae 100644 --- a/arch/frv/mb93090-mb00/pci-dma.c +++ b/arch/frv/mb93090-mb00/pci-dma.c @@ -106,7 +106,7 @@ static int frv_dma_supported(struct device *dev, u64 mask) return 1; } -struct dma_map_ops frv_dma_ops = { +const struct dma_map_ops frv_dma_ops = { .alloc = frv_dma_alloc, .free = frv_dma_free, .map_page = frv_dma_map_page, diff --git a/arch/h8300/include/asm/dma-mapping.h b/arch/h8300/include/asm/dma-mapping.h index 7ac7fadffed0..f804bca4c13f 100644 --- a/arch/h8300/include/asm/dma-mapping.h +++ b/arch/h8300/include/asm/dma-mapping.h @@ -1,9 +1,9 @@ #ifndef _H8300_DMA_MAPPING_H #define _H8300_DMA_MAPPING_H -extern struct dma_map_ops h8300_dma_map_ops; +extern const struct dma_map_ops h8300_dma_map_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &h8300_dma_map_ops; } diff --git a/arch/h8300/kernel/dma.c b/arch/h8300/kernel/dma.c index 3651da045806..225dd0a188dc 100644 --- a/arch/h8300/kernel/dma.c +++ b/arch/h8300/kernel/dma.c @@ -60,7 +60,7 @@ static int map_sg(struct device *dev, struct scatterlist *sgl, return nents; } -struct dma_map_ops h8300_dma_map_ops = { +const struct dma_map_ops h8300_dma_map_ops = { .alloc = dma_alloc, .free = dma_free, .map_page = map_page, diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h index 7ef58df909fc..b812e917cd95 100644 --- a/arch/hexagon/include/asm/dma-mapping.h +++ b/arch/hexagon/include/asm/dma-mapping.h @@ -32,9 +32,9 @@ struct device; extern int bad_dma_address; #define DMA_ERROR_CODE bad_dma_address -extern struct dma_map_ops *dma_ops; +extern const struct dma_map_ops *dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (unlikely(dev == NULL)) return NULL; diff --git a/arch/hexagon/kernel/dma.c b/arch/hexagon/kernel/dma.c index dbc4f1003da4..e74b65009587 100644 --- a/arch/hexagon/kernel/dma.c +++ b/arch/hexagon/kernel/dma.c @@ -25,7 +25,7 @@ #include #include -struct dma_map_ops *dma_ops; +const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); int bad_dma_address; /* globals are automatically initialized to zero */ @@ -203,7 +203,7 @@ static void hexagon_sync_single_for_device(struct device *dev, dma_sync(dma_addr_to_virt(dma_handle), size, dir); } -struct dma_map_ops hexagon_dma_ops = { +const struct dma_map_ops hexagon_dma_ops = { .alloc = hexagon_dma_alloc_coherent, .free = hexagon_free_coherent, .map_sg = hexagon_map_sg, diff --git a/arch/ia64/hp/common/hwsw_iommu.c b/arch/ia64/hp/common/hwsw_iommu.c index 1e4cae5ae053..0310078a95f8 100644 --- a/arch/ia64/hp/common/hwsw_iommu.c +++ b/arch/ia64/hp/common/hwsw_iommu.c @@ -18,7 +18,7 @@ #include #include -extern struct dma_map_ops sba_dma_ops, swiotlb_dma_ops; +extern const struct dma_map_ops sba_dma_ops, swiotlb_dma_ops; /* swiotlb declarations & definitions: */ extern int swiotlb_late_init_with_default_size (size_t size); @@ -34,7 +34,7 @@ static inline int use_swiotlb(struct device *dev) !sba_dma_ops.dma_supported(dev, *dev->dma_mask); } -struct dma_map_ops *hwsw_dma_get_ops(struct device *dev) +const struct dma_map_ops *hwsw_dma_get_ops(struct device *dev) { if (use_swiotlb(dev)) return &swiotlb_dma_ops; diff --git a/arch/ia64/hp/common/sba_iommu.c b/arch/ia64/hp/common/sba_iommu.c index 630ee8073899..aec4a3354abe 100644 --- a/arch/ia64/hp/common/sba_iommu.c +++ b/arch/ia64/hp/common/sba_iommu.c @@ -2096,7 +2096,7 @@ static int __init acpi_sba_ioc_init_acpi(void) /* This has to run before acpi_scan_init(). */ arch_initcall(acpi_sba_ioc_init_acpi); -extern struct dma_map_ops swiotlb_dma_ops; +extern const struct dma_map_ops swiotlb_dma_ops; static int __init sba_init(void) @@ -2216,7 +2216,7 @@ sba_page_override(char *str) __setup("sbapagesize=",sba_page_override); -struct dma_map_ops sba_dma_ops = { +const struct dma_map_ops sba_dma_ops = { .alloc = sba_alloc_coherent, .free = sba_free_coherent, .map_page = sba_map_page, diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index d472805edfa9..05e467d56d86 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -14,7 +14,7 @@ #define DMA_ERROR_CODE 0 -extern struct dma_map_ops *dma_ops; +extern const struct dma_map_ops *dma_ops; extern struct ia64_machine_vector ia64_mv; extern void set_iommu_machvec(void); diff --git a/arch/ia64/include/asm/machvec.h b/arch/ia64/include/asm/machvec.h index ed7f09089f12..af285c423e1e 100644 --- a/arch/ia64/include/asm/machvec.h +++ b/arch/ia64/include/asm/machvec.h @@ -44,7 +44,7 @@ typedef void ia64_mv_kernel_launch_event_t(void); /* DMA-mapping interface: */ typedef void ia64_mv_dma_init (void); typedef u64 ia64_mv_dma_get_required_mask (struct device *); -typedef struct dma_map_ops *ia64_mv_dma_get_ops(struct device *); +typedef const struct dma_map_ops *ia64_mv_dma_get_ops(struct device *); /* * WARNING: The legacy I/O space is _architected_. Platforms are @@ -248,7 +248,7 @@ extern void machvec_init_from_cmdline(const char *cmdline); # endif /* CONFIG_IA64_GENERIC */ extern void swiotlb_dma_init(void); -extern struct dma_map_ops *dma_get_ops(struct device *); +extern const struct dma_map_ops *dma_get_ops(struct device *); /* * Define default versions so we can extend machvec for new platforms without having diff --git a/arch/ia64/kernel/dma-mapping.c b/arch/ia64/kernel/dma-mapping.c index 7f7916238208..e0dd97f4eb69 100644 --- a/arch/ia64/kernel/dma-mapping.c +++ b/arch/ia64/kernel/dma-mapping.c @@ -4,7 +4,7 @@ /* Set this to 1 if there is a HW IOMMU in the system */ int iommu_detected __read_mostly; -struct dma_map_ops *dma_ops; +const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) @@ -17,7 +17,7 @@ static int __init dma_init(void) } fs_initcall(dma_init); -struct dma_map_ops *dma_get_ops(struct device *dev) +const struct dma_map_ops *dma_get_ops(struct device *dev) { return dma_ops; } diff --git a/arch/ia64/kernel/pci-dma.c b/arch/ia64/kernel/pci-dma.c index 992c1098c522..9094a73f996f 100644 --- a/arch/ia64/kernel/pci-dma.c +++ b/arch/ia64/kernel/pci-dma.c @@ -90,11 +90,11 @@ void __init pci_iommu_alloc(void) { dma_ops = &intel_dma_ops; - dma_ops->sync_single_for_cpu = machvec_dma_sync_single; - dma_ops->sync_sg_for_cpu = machvec_dma_sync_sg; - dma_ops->sync_single_for_device = machvec_dma_sync_single; - dma_ops->sync_sg_for_device = machvec_dma_sync_sg; - dma_ops->dma_supported = iommu_dma_supported; + intel_dma_ops.sync_single_for_cpu = machvec_dma_sync_single; + intel_dma_ops.sync_sg_for_cpu = machvec_dma_sync_sg; + intel_dma_ops.sync_single_for_device = machvec_dma_sync_single; + intel_dma_ops.sync_sg_for_device = machvec_dma_sync_sg; + intel_dma_ops.dma_supported = iommu_dma_supported; /* * The order of these functions is important for diff --git a/arch/ia64/kernel/pci-swiotlb.c b/arch/ia64/kernel/pci-swiotlb.c index 2933208c0285..a14989dacded 100644 --- a/arch/ia64/kernel/pci-swiotlb.c +++ b/arch/ia64/kernel/pci-swiotlb.c @@ -30,7 +30,7 @@ static void ia64_swiotlb_free_coherent(struct device *dev, size_t size, swiotlb_free_coherent(dev, size, vaddr, dma_addr); } -struct dma_map_ops swiotlb_dma_ops = { +const struct dma_map_ops swiotlb_dma_ops = { .alloc = ia64_swiotlb_alloc_coherent, .free = ia64_swiotlb_free_coherent, .map_page = swiotlb_map_page, diff --git a/arch/m32r/include/asm/device.h b/arch/m32r/include/asm/device.h index 4a9f35e0973f..7955a9799466 100644 --- a/arch/m32r/include/asm/device.h +++ b/arch/m32r/include/asm/device.h @@ -4,7 +4,7 @@ * This file is released under the GPLv2 */ struct dev_archdata { - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; }; struct pdev_archdata { diff --git a/arch/m32r/include/asm/dma-mapping.h b/arch/m32r/include/asm/dma-mapping.h index 2c43a77fe942..99c43d2f05dc 100644 --- a/arch/m32r/include/asm/dma-mapping.h +++ b/arch/m32r/include/asm/dma-mapping.h @@ -10,7 +10,7 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0x0) -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; diff --git a/arch/m68k/include/asm/dma-mapping.h b/arch/m68k/include/asm/dma-mapping.h index 96c536194287..863509939d5a 100644 --- a/arch/m68k/include/asm/dma-mapping.h +++ b/arch/m68k/include/asm/dma-mapping.h @@ -1,9 +1,9 @@ #ifndef _M68K_DMA_MAPPING_H #define _M68K_DMA_MAPPING_H -extern struct dma_map_ops m68k_dma_ops; +extern const struct dma_map_ops m68k_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &m68k_dma_ops; } diff --git a/arch/m68k/kernel/dma.c b/arch/m68k/kernel/dma.c index 07070065a425..0fc5dabb4a42 100644 --- a/arch/m68k/kernel/dma.c +++ b/arch/m68k/kernel/dma.c @@ -158,7 +158,7 @@ static int m68k_dma_map_sg(struct device *dev, struct scatterlist *sglist, return nents; } -struct dma_map_ops m68k_dma_ops = { +const struct dma_map_ops m68k_dma_ops = { .alloc = m68k_dma_alloc, .free = m68k_dma_free, .map_page = m68k_dma_map_page, diff --git a/arch/metag/include/asm/dma-mapping.h b/arch/metag/include/asm/dma-mapping.h index 27af5d479ce6..c156a7ac732f 100644 --- a/arch/metag/include/asm/dma-mapping.h +++ b/arch/metag/include/asm/dma-mapping.h @@ -1,9 +1,9 @@ #ifndef _ASM_METAG_DMA_MAPPING_H #define _ASM_METAG_DMA_MAPPING_H -extern struct dma_map_ops metag_dma_ops; +extern const struct dma_map_ops metag_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &metag_dma_ops; } diff --git a/arch/metag/kernel/dma.c b/arch/metag/kernel/dma.c index 91968d92652b..f0ab3a498328 100644 --- a/arch/metag/kernel/dma.c +++ b/arch/metag/kernel/dma.c @@ -575,7 +575,7 @@ static void metag_dma_sync_sg_for_device(struct device *dev, dma_sync_for_device(sg_virt(sg), sg->length, direction); } -struct dma_map_ops metag_dma_ops = { +const struct dma_map_ops metag_dma_ops = { .alloc = metag_dma_alloc, .free = metag_dma_free, .map_page = metag_dma_map_page, diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h index 1768d4bdc8d3..c7faf2fb51d6 100644 --- a/arch/microblaze/include/asm/dma-mapping.h +++ b/arch/microblaze/include/asm/dma-mapping.h @@ -36,9 +36,9 @@ /* * Available generic sets of operations */ -extern struct dma_map_ops dma_direct_ops; +extern const struct dma_map_ops dma_direct_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &dma_direct_ops; } diff --git a/arch/microblaze/kernel/dma.c b/arch/microblaze/kernel/dma.c index 818daf230eb4..12e093a03e60 100644 --- a/arch/microblaze/kernel/dma.c +++ b/arch/microblaze/kernel/dma.c @@ -187,7 +187,7 @@ int dma_direct_mmap_coherent(struct device *dev, struct vm_area_struct *vma, #endif } -struct dma_map_ops dma_direct_ops = { +const struct dma_map_ops dma_direct_ops = { .alloc = dma_direct_alloc_coherent, .free = dma_direct_free_coherent, .mmap = dma_direct_mmap_coherent, diff --git a/arch/mips/cavium-octeon/dma-octeon.c b/arch/mips/cavium-octeon/dma-octeon.c index fd69528b24fb..897d32c888ee 100644 --- a/arch/mips/cavium-octeon/dma-octeon.c +++ b/arch/mips/cavium-octeon/dma-octeon.c @@ -205,7 +205,7 @@ static phys_addr_t octeon_unity_dma_to_phys(struct device *dev, dma_addr_t daddr } struct octeon_dma_map_ops { - struct dma_map_ops dma_map_ops; + const struct dma_map_ops dma_map_ops; dma_addr_t (*phys_to_dma)(struct device *dev, phys_addr_t paddr); phys_addr_t (*dma_to_phys)(struct device *dev, dma_addr_t daddr); }; @@ -333,7 +333,7 @@ static struct octeon_dma_map_ops _octeon_pci_dma_map_ops = { }, }; -struct dma_map_ops *octeon_pci_dma_map_ops; +const struct dma_map_ops *octeon_pci_dma_map_ops; void __init octeon_pci_dma_init(void) { diff --git a/arch/mips/include/asm/device.h b/arch/mips/include/asm/device.h index 21c2082a0dfb..ebc5c1265473 100644 --- a/arch/mips/include/asm/device.h +++ b/arch/mips/include/asm/device.h @@ -10,7 +10,7 @@ struct dma_map_ops; struct dev_archdata { /* DMA operations on that device */ - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; #ifdef CONFIG_DMA_PERDEV_COHERENT /* Non-zero if DMA is coherent with CPU caches */ diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index 7aa71b9b0258..b59b084a7569 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -9,9 +9,9 @@ #include #endif -extern struct dma_map_ops *mips_dma_map_ops; +extern const struct dma_map_ops *mips_dma_map_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; diff --git a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h index 460042ee5d6f..9110988b92a1 100644 --- a/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h +++ b/arch/mips/include/asm/mach-cavium-octeon/dma-coherence.h @@ -65,7 +65,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr); phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr); struct dma_map_ops; -extern struct dma_map_ops *octeon_pci_dma_map_ops; +extern const struct dma_map_ops *octeon_pci_dma_map_ops; extern char *octeon_swiotlb; #endif /* __ASM_MACH_CAVIUM_OCTEON_DMA_COHERENCE_H */ diff --git a/arch/mips/include/asm/netlogic/common.h b/arch/mips/include/asm/netlogic/common.h index be52c2125d71..e0717d10e650 100644 --- a/arch/mips/include/asm/netlogic/common.h +++ b/arch/mips/include/asm/netlogic/common.h @@ -88,7 +88,7 @@ extern struct plat_smp_ops nlm_smp_ops; extern char nlm_reset_entry[], nlm_reset_entry_end[]; /* SWIOTLB */ -extern struct dma_map_ops nlm_swiotlb_dma_ops; +extern const struct dma_map_ops nlm_swiotlb_dma_ops; extern unsigned int nlm_threads_per_core; extern cpumask_t nlm_cpumask; diff --git a/arch/mips/loongson64/common/dma-swiotlb.c b/arch/mips/loongson64/common/dma-swiotlb.c index aab4fd681e1f..7296df043d92 100644 --- a/arch/mips/loongson64/common/dma-swiotlb.c +++ b/arch/mips/loongson64/common/dma-swiotlb.c @@ -122,7 +122,7 @@ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) return daddr; } -static struct dma_map_ops loongson_dma_map_ops = { +static const struct dma_map_ops loongson_dma_map_ops = { .alloc = loongson_dma_alloc_coherent, .free = loongson_dma_free_coherent, .map_page = loongson_dma_map_page, diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index a39c36af97ad..1cb84472cb58 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -417,7 +417,7 @@ void dma_cache_sync(struct device *dev, void *vaddr, size_t size, EXPORT_SYMBOL(dma_cache_sync); -static struct dma_map_ops mips_default_dma_map_ops = { +static const struct dma_map_ops mips_default_dma_map_ops = { .alloc = mips_dma_alloc_coherent, .free = mips_dma_free_coherent, .mmap = mips_dma_mmap, @@ -433,7 +433,7 @@ static struct dma_map_ops mips_default_dma_map_ops = { .dma_supported = mips_dma_supported }; -struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops; +const struct dma_map_ops *mips_dma_map_ops = &mips_default_dma_map_ops; EXPORT_SYMBOL(mips_dma_map_ops); #define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) diff --git a/arch/mips/netlogic/common/nlm-dma.c b/arch/mips/netlogic/common/nlm-dma.c index 0630693bec2a..0ec9d9da6d51 100644 --- a/arch/mips/netlogic/common/nlm-dma.c +++ b/arch/mips/netlogic/common/nlm-dma.c @@ -67,7 +67,7 @@ static void nlm_dma_free_coherent(struct device *dev, size_t size, swiotlb_free_coherent(dev, size, vaddr, dma_handle); } -struct dma_map_ops nlm_swiotlb_dma_ops = { +const struct dma_map_ops nlm_swiotlb_dma_ops = { .alloc = nlm_dma_alloc_coherent, .free = nlm_dma_free_coherent, .map_page = swiotlb_map_page, diff --git a/arch/mn10300/include/asm/dma-mapping.h b/arch/mn10300/include/asm/dma-mapping.h index 1dcd44757f32..564e3927e005 100644 --- a/arch/mn10300/include/asm/dma-mapping.h +++ b/arch/mn10300/include/asm/dma-mapping.h @@ -14,9 +14,9 @@ #include #include -extern struct dma_map_ops mn10300_dma_ops; +extern const struct dma_map_ops mn10300_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &mn10300_dma_ops; } diff --git a/arch/mn10300/mm/dma-alloc.c b/arch/mn10300/mm/dma-alloc.c index 4f4b9029f0ea..86108d2496b3 100644 --- a/arch/mn10300/mm/dma-alloc.c +++ b/arch/mn10300/mm/dma-alloc.c @@ -121,7 +121,7 @@ static int mn10300_dma_supported(struct device *dev, u64 mask) return 1; } -struct dma_map_ops mn10300_dma_ops = { +const struct dma_map_ops mn10300_dma_ops = { .alloc = mn10300_dma_alloc, .free = mn10300_dma_free, .map_page = mn10300_dma_map_page, diff --git a/arch/nios2/include/asm/dma-mapping.h b/arch/nios2/include/asm/dma-mapping.h index bec8ac8e6ad2..aa00d839a64b 100644 --- a/arch/nios2/include/asm/dma-mapping.h +++ b/arch/nios2/include/asm/dma-mapping.h @@ -10,9 +10,9 @@ #ifndef _ASM_NIOS2_DMA_MAPPING_H #define _ASM_NIOS2_DMA_MAPPING_H -extern struct dma_map_ops nios2_dma_ops; +extern const struct dma_map_ops nios2_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &nios2_dma_ops; } diff --git a/arch/nios2/mm/dma-mapping.c b/arch/nios2/mm/dma-mapping.c index f6a5dcf9d682..7040c1adbb5e 100644 --- a/arch/nios2/mm/dma-mapping.c +++ b/arch/nios2/mm/dma-mapping.c @@ -192,7 +192,7 @@ static void nios2_dma_sync_sg_for_device(struct device *dev, } -struct dma_map_ops nios2_dma_ops = { +const struct dma_map_ops nios2_dma_ops = { .alloc = nios2_dma_alloc, .free = nios2_dma_free, .map_page = nios2_dma_map_page, diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h index 1f260bccb368..88acbedb4947 100644 --- a/arch/openrisc/include/asm/dma-mapping.h +++ b/arch/openrisc/include/asm/dma-mapping.h @@ -28,9 +28,9 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0x0) -extern struct dma_map_ops or1k_dma_map_ops; +extern const struct dma_map_ops or1k_dma_map_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &or1k_dma_map_ops; } diff --git a/arch/openrisc/kernel/dma.c b/arch/openrisc/kernel/dma.c index 906998bac957..b10369b7e31b 100644 --- a/arch/openrisc/kernel/dma.c +++ b/arch/openrisc/kernel/dma.c @@ -232,7 +232,7 @@ or1k_sync_single_for_device(struct device *dev, mtspr(SPR_DCBFR, cl); } -struct dma_map_ops or1k_dma_map_ops = { +const struct dma_map_ops or1k_dma_map_ops = { .alloc = or1k_dma_alloc, .free = or1k_dma_free, .map_page = or1k_map_page, diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h index 16e024602737..1749073e44fc 100644 --- a/arch/parisc/include/asm/dma-mapping.h +++ b/arch/parisc/include/asm/dma-mapping.h @@ -21,13 +21,13 @@ */ #ifdef CONFIG_PA11 -extern struct dma_map_ops pcxl_dma_ops; -extern struct dma_map_ops pcx_dma_ops; +extern const struct dma_map_ops pcxl_dma_ops; +extern const struct dma_map_ops pcx_dma_ops; #endif -extern struct dma_map_ops *hppa_dma_ops; +extern const struct dma_map_ops *hppa_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return hppa_dma_ops; } diff --git a/arch/parisc/kernel/drivers.c b/arch/parisc/kernel/drivers.c index 700e2d2da096..fa78419100c8 100644 --- a/arch/parisc/kernel/drivers.c +++ b/arch/parisc/kernel/drivers.c @@ -40,7 +40,7 @@ #include /* See comments in include/asm-parisc/pci.h */ -struct dma_map_ops *hppa_dma_ops __read_mostly; +const struct dma_map_ops *hppa_dma_ops __read_mostly; EXPORT_SYMBOL(hppa_dma_ops); static struct device root = { diff --git a/arch/parisc/kernel/pci-dma.c b/arch/parisc/kernel/pci-dma.c index 697c53543a4d..5f0067a62738 100644 --- a/arch/parisc/kernel/pci-dma.c +++ b/arch/parisc/kernel/pci-dma.c @@ -572,7 +572,7 @@ static void pa11_dma_sync_sg_for_device(struct device *dev, struct scatterlist * flush_kernel_vmap_range(sg_virt(sg), sg->length); } -struct dma_map_ops pcxl_dma_ops = { +const struct dma_map_ops pcxl_dma_ops = { .dma_supported = pa11_dma_supported, .alloc = pa11_dma_alloc, .free = pa11_dma_free, @@ -608,7 +608,7 @@ static void pcx_dma_free(struct device *dev, size_t size, void *vaddr, return; } -struct dma_map_ops pcx_dma_ops = { +const struct dma_map_ops pcx_dma_ops = { .dma_supported = pa11_dma_supported, .alloc = pcx_dma_alloc, .free = pcx_dma_free, diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 406c2b1ff82d..49cbb0fca233 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -21,7 +21,7 @@ struct iommu_table; */ struct dev_archdata { /* DMA operations on that device */ - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; /* * These two used to be a union. However, with the hybrid ops we need diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 84e3f8dd5e4f..2ec3eadf336f 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -76,9 +76,9 @@ static inline unsigned long device_to_mask(struct device *dev) #ifdef CONFIG_PPC64 extern struct dma_map_ops dma_iommu_ops; #endif -extern struct dma_map_ops dma_direct_ops; +extern const struct dma_map_ops dma_direct_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { /* We don't handle the NULL dev case for ISA for now. We could * do it via an out of line call but it is not needed for now. The @@ -91,7 +91,7 @@ static inline struct dma_map_ops *get_dma_ops(struct device *dev) return dev->archdata.dma_ops; } -static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *ops) { dev->archdata.dma_ops = ops; } diff --git a/arch/powerpc/include/asm/pci.h b/arch/powerpc/include/asm/pci.h index e9bd6cf0212f..93eded8d3843 100644 --- a/arch/powerpc/include/asm/pci.h +++ b/arch/powerpc/include/asm/pci.h @@ -53,8 +53,8 @@ static inline int pci_get_legacy_ide_irq(struct pci_dev *dev, int channel) } #ifdef CONFIG_PCI -extern void set_pci_dma_ops(struct dma_map_ops *dma_ops); -extern struct dma_map_ops *get_pci_dma_ops(void); +extern void set_pci_dma_ops(const struct dma_map_ops *dma_ops); +extern const struct dma_map_ops *get_pci_dma_ops(void); #else /* CONFIG_PCI */ #define set_pci_dma_ops(d) #define get_pci_dma_ops() NULL diff --git a/arch/powerpc/include/asm/swiotlb.h b/arch/powerpc/include/asm/swiotlb.h index de99d6e29430..01d45a5fd00b 100644 --- a/arch/powerpc/include/asm/swiotlb.h +++ b/arch/powerpc/include/asm/swiotlb.h @@ -13,7 +13,7 @@ #include -extern struct dma_map_ops swiotlb_dma_ops; +extern const struct dma_map_ops swiotlb_dma_ops; static inline void dma_mark_clean(void *addr, size_t size) {} diff --git a/arch/powerpc/kernel/dma-swiotlb.c b/arch/powerpc/kernel/dma-swiotlb.c index c6689f658b50..d0ea7860e02b 100644 --- a/arch/powerpc/kernel/dma-swiotlb.c +++ b/arch/powerpc/kernel/dma-swiotlb.c @@ -46,7 +46,7 @@ static u64 swiotlb_powerpc_get_required(struct device *dev) * map_page, and unmap_page on highmem, use normal dma_ops * for everything else. */ -struct dma_map_ops swiotlb_dma_ops = { +const struct dma_map_ops swiotlb_dma_ops = { .alloc = __dma_direct_alloc_coherent, .free = __dma_direct_free_coherent, .mmap = dma_direct_mmap_coherent, diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 6877e3fa95bb..03b98f1f98ec 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -274,7 +274,7 @@ static inline void dma_direct_sync_single(struct device *dev, } #endif -struct dma_map_ops dma_direct_ops = { +const struct dma_map_ops dma_direct_ops = { .alloc = dma_direct_alloc_coherent, .free = dma_direct_free_coherent, .mmap = dma_direct_mmap_coherent, @@ -316,7 +316,7 @@ EXPORT_SYMBOL(dma_set_coherent_mask); int __dma_set_mask(struct device *dev, u64 dma_mask) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); if ((dma_ops != NULL) && (dma_ops->set_dma_mask != NULL)) return dma_ops->set_dma_mask(dev, dma_mask); @@ -344,7 +344,7 @@ EXPORT_SYMBOL(dma_set_mask); u64 __dma_get_required_mask(struct device *dev) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); if (unlikely(dma_ops == NULL)) return 0; diff --git a/arch/powerpc/kernel/pci-common.c b/arch/powerpc/kernel/pci-common.c index 74bec5498972..09db4778435c 100644 --- a/arch/powerpc/kernel/pci-common.c +++ b/arch/powerpc/kernel/pci-common.c @@ -59,14 +59,14 @@ resource_size_t isa_mem_base; EXPORT_SYMBOL(isa_mem_base); -static struct dma_map_ops *pci_dma_ops = &dma_direct_ops; +static const struct dma_map_ops *pci_dma_ops = &dma_direct_ops; -void set_pci_dma_ops(struct dma_map_ops *dma_ops) +void set_pci_dma_ops(const struct dma_map_ops *dma_ops) { pci_dma_ops = dma_ops; } -struct dma_map_ops *get_pci_dma_ops(void) +const struct dma_map_ops *get_pci_dma_ops(void) { return pci_dma_ops; } diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index 7ff51f96a00e..e1413e69e5fe 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -651,7 +651,7 @@ static int dma_fixed_dma_supported(struct device *dev, u64 mask) static int dma_set_mask_and_switch(struct device *dev, u64 dma_mask); -static struct dma_map_ops dma_iommu_fixed_ops = { +static const struct dma_map_ops dma_iommu_fixed_ops = { .alloc = dma_fixed_alloc_coherent, .free = dma_fixed_free_coherent, .map_sg = dma_fixed_map_sg, @@ -1172,7 +1172,7 @@ __setup("iommu_fixed=", setup_iommu_fixed); static u64 cell_dma_get_required_mask(struct device *dev) { - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; if (!dev->dma_mask) return 0; diff --git a/arch/powerpc/platforms/powernv/npu-dma.c b/arch/powerpc/platforms/powernv/npu-dma.c index 73b155fd4481..1c383f38031d 100644 --- a/arch/powerpc/platforms/powernv/npu-dma.c +++ b/arch/powerpc/platforms/powernv/npu-dma.c @@ -115,7 +115,7 @@ static u64 dma_npu_get_required_mask(struct device *dev) return 0; } -static struct dma_map_ops dma_npu_ops = { +static const struct dma_map_ops dma_npu_ops = { .map_page = dma_npu_map_page, .map_sg = dma_npu_map_sg, .alloc = dma_npu_alloc, diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index 8af1c15aef85..c81450d98794 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -701,7 +701,7 @@ static u64 ps3_dma_get_required_mask(struct device *_dev) return DMA_BIT_MASK(32); } -static struct dma_map_ops ps3_sb_dma_ops = { +static const struct dma_map_ops ps3_sb_dma_ops = { .alloc = ps3_alloc_coherent, .free = ps3_free_coherent, .map_sg = ps3_sb_map_sg, @@ -712,7 +712,7 @@ static struct dma_map_ops ps3_sb_dma_ops = { .unmap_page = ps3_unmap_page, }; -static struct dma_map_ops ps3_ioc0_dma_ops = { +static const struct dma_map_ops ps3_ioc0_dma_ops = { .alloc = ps3_alloc_coherent, .free = ps3_free_coherent, .map_sg = ps3_ioc0_map_sg, diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 614c28537141..2e36a0b8944a 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -136,7 +136,7 @@ static u64 ibmebus_dma_get_required_mask(struct device *dev) return DMA_BIT_MASK(64); } -static struct dma_map_ops ibmebus_dma_ops = { +static const struct dma_map_ops ibmebus_dma_ops = { .alloc = ibmebus_alloc_coherent, .free = ibmebus_free_coherent, .map_sg = ibmebus_map_sg, diff --git a/arch/powerpc/platforms/pseries/vio.c b/arch/powerpc/platforms/pseries/vio.c index 2c8fb3ec989e..720493932486 100644 --- a/arch/powerpc/platforms/pseries/vio.c +++ b/arch/powerpc/platforms/pseries/vio.c @@ -615,7 +615,7 @@ static u64 vio_dma_get_required_mask(struct device *dev) return dma_iommu_ops.get_required_mask(dev); } -static struct dma_map_ops vio_dma_mapping_ops = { +static const struct dma_map_ops vio_dma_mapping_ops = { .alloc = vio_dma_iommu_alloc_coherent, .free = vio_dma_iommu_free_coherent, .mmap = dma_direct_mmap_coherent, diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h index 4a9f35e0973f..7955a9799466 100644 --- a/arch/s390/include/asm/device.h +++ b/arch/s390/include/asm/device.h @@ -4,7 +4,7 @@ * This file is released under the GPLv2 */ struct dev_archdata { - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; }; struct pdev_archdata { diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index ffaba07f50ab..2776d205b1ff 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -10,9 +10,9 @@ #define DMA_ERROR_CODE (~(dma_addr_t) 0x0) -extern struct dma_map_ops s390_pci_dma_ops; +extern const struct dma_map_ops s390_pci_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; diff --git a/arch/s390/pci/pci_dma.c b/arch/s390/pci/pci_dma.c index 1d7a9c71944a..9081a57fa340 100644 --- a/arch/s390/pci/pci_dma.c +++ b/arch/s390/pci/pci_dma.c @@ -650,7 +650,7 @@ static int __init dma_debug_do_init(void) } fs_initcall(dma_debug_do_init); -struct dma_map_ops s390_pci_dma_ops = { +const struct dma_map_ops s390_pci_dma_ops = { .alloc = s390_dma_alloc, .free = s390_dma_free, .map_sg = s390_dma_map_sg, diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index 0052ad40e86d..a7382c34c241 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h @@ -1,10 +1,10 @@ #ifndef __ASM_SH_DMA_MAPPING_H #define __ASM_SH_DMA_MAPPING_H -extern struct dma_map_ops *dma_ops; +extern const struct dma_map_ops *dma_ops; extern void no_iommu_init(void); -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return dma_ops; } diff --git a/arch/sh/kernel/dma-nommu.c b/arch/sh/kernel/dma-nommu.c index 47fee3b6e29c..d24c707b2181 100644 --- a/arch/sh/kernel/dma-nommu.c +++ b/arch/sh/kernel/dma-nommu.c @@ -65,7 +65,7 @@ static void nommu_sync_sg(struct device *dev, struct scatterlist *sg, } #endif -struct dma_map_ops nommu_dma_ops = { +const struct dma_map_ops nommu_dma_ops = { .alloc = dma_generic_alloc_coherent, .free = dma_generic_free_coherent, .map_page = nommu_map_page, diff --git a/arch/sh/mm/consistent.c b/arch/sh/mm/consistent.c index 92b6976fde59..d1275adfa0ef 100644 --- a/arch/sh/mm/consistent.c +++ b/arch/sh/mm/consistent.c @@ -22,7 +22,7 @@ #define PREALLOC_DMA_DEBUG_ENTRIES 4096 -struct dma_map_ops *dma_ops; +const struct dma_map_ops *dma_ops; EXPORT_SYMBOL(dma_ops); static int __init dma_init(void) diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 1180ae254154..3d2babc0c4c6 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -18,13 +18,13 @@ static inline void dma_cache_sync(struct device *dev, void *vaddr, size_t size, */ } -extern struct dma_map_ops *dma_ops; -extern struct dma_map_ops *leon_dma_ops; -extern struct dma_map_ops pci32_dma_ops; +extern const struct dma_map_ops *dma_ops; +extern const struct dma_map_ops *leon_dma_ops; +extern const struct dma_map_ops pci32_dma_ops; extern struct bus_type pci_bus_type; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { #ifdef CONFIG_SPARC_LEON if (sparc_cpu_model == sparc_leon) diff --git a/arch/sparc/kernel/iommu.c b/arch/sparc/kernel/iommu.c index 9df997995f6b..c63ba99ca551 100644 --- a/arch/sparc/kernel/iommu.c +++ b/arch/sparc/kernel/iommu.c @@ -741,7 +741,7 @@ static void dma_4u_sync_sg_for_cpu(struct device *dev, spin_unlock_irqrestore(&iommu->lock, flags); } -static struct dma_map_ops sun4u_dma_ops = { +static const struct dma_map_ops sun4u_dma_ops = { .alloc = dma_4u_alloc_coherent, .free = dma_4u_free_coherent, .map_page = dma_4u_map_page, @@ -752,7 +752,7 @@ static struct dma_map_ops sun4u_dma_ops = { .sync_sg_for_cpu = dma_4u_sync_sg_for_cpu, }; -struct dma_map_ops *dma_ops = &sun4u_dma_ops; +const struct dma_map_ops *dma_ops = &sun4u_dma_ops; EXPORT_SYMBOL(dma_ops); int dma_supported(struct device *dev, u64 device_mask) diff --git a/arch/sparc/kernel/ioport.c b/arch/sparc/kernel/ioport.c index 6ffaec44931a..cf20033a1458 100644 --- a/arch/sparc/kernel/ioport.c +++ b/arch/sparc/kernel/ioport.c @@ -401,7 +401,7 @@ static void sbus_sync_sg_for_device(struct device *dev, struct scatterlist *sg, BUG(); } -static struct dma_map_ops sbus_dma_ops = { +static const struct dma_map_ops sbus_dma_ops = { .alloc = sbus_alloc_coherent, .free = sbus_free_coherent, .map_page = sbus_map_page, @@ -637,7 +637,7 @@ static void pci32_sync_sg_for_device(struct device *device, struct scatterlist * } } -struct dma_map_ops pci32_dma_ops = { +const struct dma_map_ops pci32_dma_ops = { .alloc = pci32_alloc_coherent, .free = pci32_free_coherent, .map_page = pci32_map_page, @@ -652,10 +652,10 @@ struct dma_map_ops pci32_dma_ops = { EXPORT_SYMBOL(pci32_dma_ops); /* leon re-uses pci32_dma_ops */ -struct dma_map_ops *leon_dma_ops = &pci32_dma_ops; +const struct dma_map_ops *leon_dma_ops = &pci32_dma_ops; EXPORT_SYMBOL(leon_dma_ops); -struct dma_map_ops *dma_ops = &sbus_dma_ops; +const struct dma_map_ops *dma_ops = &sbus_dma_ops; EXPORT_SYMBOL(dma_ops); diff --git a/arch/sparc/kernel/pci_sun4v.c b/arch/sparc/kernel/pci_sun4v.c index f4daccd12bf5..68bec7c97cb8 100644 --- a/arch/sparc/kernel/pci_sun4v.c +++ b/arch/sparc/kernel/pci_sun4v.c @@ -669,7 +669,7 @@ static void dma_4v_unmap_sg(struct device *dev, struct scatterlist *sglist, local_irq_restore(flags); } -static struct dma_map_ops sun4v_dma_ops = { +static const struct dma_map_ops sun4v_dma_ops = { .alloc = dma_4v_alloc_coherent, .free = dma_4v_free_coherent, .map_page = dma_4v_map_page, diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h index 6ab8bf146d4c..25f23ac7d361 100644 --- a/arch/tile/include/asm/device.h +++ b/arch/tile/include/asm/device.h @@ -18,7 +18,7 @@ struct dev_archdata { /* DMA operations on that device */ - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; /* Offset of the DMA address from the PA. */ dma_addr_t dma_offset; diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 01ceb4a895b0..4a06cc75b856 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -24,12 +24,12 @@ #define ARCH_HAS_DMA_GET_REQUIRED_MASK #endif -extern struct dma_map_ops *tile_dma_map_ops; -extern struct dma_map_ops *gx_pci_dma_map_ops; -extern struct dma_map_ops *gx_legacy_pci_dma_map_ops; -extern struct dma_map_ops *gx_hybrid_pci_dma_map_ops; +extern const struct dma_map_ops *tile_dma_map_ops; +extern const struct dma_map_ops *gx_pci_dma_map_ops; +extern const struct dma_map_ops *gx_legacy_pci_dma_map_ops; +extern const struct dma_map_ops *gx_hybrid_pci_dma_map_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; @@ -59,7 +59,7 @@ static inline phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) static inline void dma_mark_clean(void *addr, size_t size) {} -static inline void set_dma_ops(struct device *dev, struct dma_map_ops *ops) +static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *ops) { dev->archdata.dma_ops = ops; } diff --git a/arch/tile/kernel/pci-dma.c b/arch/tile/kernel/pci-dma.c index 24e0f8c21f2f..569bb6dd154a 100644 --- a/arch/tile/kernel/pci-dma.c +++ b/arch/tile/kernel/pci-dma.c @@ -329,7 +329,7 @@ tile_dma_supported(struct device *dev, u64 mask) return 1; } -static struct dma_map_ops tile_default_dma_map_ops = { +static const struct dma_map_ops tile_default_dma_map_ops = { .alloc = tile_dma_alloc_coherent, .free = tile_dma_free_coherent, .map_page = tile_dma_map_page, @@ -344,7 +344,7 @@ static struct dma_map_ops tile_default_dma_map_ops = { .dma_supported = tile_dma_supported }; -struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops; +const struct dma_map_ops *tile_dma_map_ops = &tile_default_dma_map_ops; EXPORT_SYMBOL(tile_dma_map_ops); /* Generic PCI DMA mapping functions */ @@ -516,7 +516,7 @@ tile_pci_dma_supported(struct device *dev, u64 mask) return 1; } -static struct dma_map_ops tile_pci_default_dma_map_ops = { +static const struct dma_map_ops tile_pci_default_dma_map_ops = { .alloc = tile_pci_dma_alloc_coherent, .free = tile_pci_dma_free_coherent, .map_page = tile_pci_dma_map_page, @@ -531,7 +531,7 @@ static struct dma_map_ops tile_pci_default_dma_map_ops = { .dma_supported = tile_pci_dma_supported }; -struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops; +const struct dma_map_ops *gx_pci_dma_map_ops = &tile_pci_default_dma_map_ops; EXPORT_SYMBOL(gx_pci_dma_map_ops); /* PCI DMA mapping functions for legacy PCI devices */ @@ -552,7 +552,7 @@ static void tile_swiotlb_free_coherent(struct device *dev, size_t size, swiotlb_free_coherent(dev, size, vaddr, dma_addr); } -static struct dma_map_ops pci_swiotlb_dma_ops = { +static const struct dma_map_ops pci_swiotlb_dma_ops = { .alloc = tile_swiotlb_alloc_coherent, .free = tile_swiotlb_free_coherent, .map_page = swiotlb_map_page, @@ -567,7 +567,7 @@ static struct dma_map_ops pci_swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, }; -static struct dma_map_ops pci_hybrid_dma_ops = { +static const struct dma_map_ops pci_hybrid_dma_ops = { .alloc = tile_swiotlb_alloc_coherent, .free = tile_swiotlb_free_coherent, .map_page = tile_pci_dma_map_page, @@ -582,18 +582,18 @@ static struct dma_map_ops pci_hybrid_dma_ops = { .dma_supported = tile_pci_dma_supported }; -struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops; -struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops; +const struct dma_map_ops *gx_legacy_pci_dma_map_ops = &pci_swiotlb_dma_ops; +const struct dma_map_ops *gx_hybrid_pci_dma_map_ops = &pci_hybrid_dma_ops; #else -struct dma_map_ops *gx_legacy_pci_dma_map_ops; -struct dma_map_ops *gx_hybrid_pci_dma_map_ops; +const struct dma_map_ops *gx_legacy_pci_dma_map_ops; +const struct dma_map_ops *gx_hybrid_pci_dma_map_ops; #endif EXPORT_SYMBOL(gx_legacy_pci_dma_map_ops); EXPORT_SYMBOL(gx_hybrid_pci_dma_map_ops); int dma_set_mask(struct device *dev, u64 mask) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); /* * For PCI devices with 64-bit DMA addressing capability, promote @@ -623,7 +623,7 @@ EXPORT_SYMBOL(dma_set_mask); #ifdef CONFIG_ARCH_HAS_DMA_SET_COHERENT_MASK int dma_set_coherent_mask(struct device *dev, u64 mask) { - struct dma_map_ops *dma_ops = get_dma_ops(dev); + const struct dma_map_ops *dma_ops = get_dma_ops(dev); /* * For PCI devices with 64-bit DMA addressing capability, promote diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h index 4749854afd03..14d7729c7b73 100644 --- a/arch/unicore32/include/asm/dma-mapping.h +++ b/arch/unicore32/include/asm/dma-mapping.h @@ -21,9 +21,9 @@ #include #include -extern struct dma_map_ops swiotlb_dma_map_ops; +extern const struct dma_map_ops swiotlb_dma_map_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &swiotlb_dma_map_ops; } diff --git a/arch/unicore32/mm/dma-swiotlb.c b/arch/unicore32/mm/dma-swiotlb.c index 3e9f6489ba38..525413d6690e 100644 --- a/arch/unicore32/mm/dma-swiotlb.c +++ b/arch/unicore32/mm/dma-swiotlb.c @@ -31,7 +31,7 @@ static void unicore_swiotlb_free_coherent(struct device *dev, size_t size, swiotlb_free_coherent(dev, size, vaddr, dma_addr); } -struct dma_map_ops swiotlb_dma_map_ops = { +const struct dma_map_ops swiotlb_dma_map_ops = { .alloc = unicore_swiotlb_alloc_coherent, .free = unicore_swiotlb_free_coherent, .map_sg = swiotlb_map_sg_attrs, diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index 684ed6c3aa67..b2d0b4ced7e3 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -3,7 +3,7 @@ struct dev_archdata { #ifdef CONFIG_X86_DEV_DMA_OPS - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; #endif #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ @@ -13,7 +13,7 @@ struct dev_archdata { #if defined(CONFIG_X86_DEV_DMA_OPS) && defined(CONFIG_PCI_DOMAINS) struct dma_domain { struct list_head node; - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; int domain_nr; }; void add_dma_domain(struct dma_domain *domain); diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 44461626830e..5e4772886a1e 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -25,9 +25,9 @@ extern int iommu_merge; extern struct device x86_dma_fallback_dev; extern int panic_on_overflow; -extern struct dma_map_ops *dma_ops; +extern const struct dma_map_ops *dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { #ifndef CONFIG_X86_DEV_DMA_OPS return dma_ops; diff --git a/arch/x86/include/asm/iommu.h b/arch/x86/include/asm/iommu.h index 345c99cef152..793869879464 100644 --- a/arch/x86/include/asm/iommu.h +++ b/arch/x86/include/asm/iommu.h @@ -1,7 +1,7 @@ #ifndef _ASM_X86_IOMMU_H #define _ASM_X86_IOMMU_H -extern struct dma_map_ops nommu_dma_ops; +extern const struct dma_map_ops nommu_dma_ops; extern int force_iommu, no_iommu; extern int iommu_detected; extern int iommu_pass_through; diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 63ff468a7986..82dfe32faaf4 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -695,7 +695,7 @@ static __init int init_amd_gatt(struct agp_kern_info *info) return -1; } -static struct dma_map_ops gart_dma_ops = { +static const struct dma_map_ops gart_dma_ops = { .map_sg = gart_map_sg, .unmap_sg = gart_unmap_sg, .map_page = gart_map_page, diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 5d400ba1349d..17f180148c80 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -478,7 +478,7 @@ static void calgary_free_coherent(struct device *dev, size_t size, free_pages((unsigned long)vaddr, get_order(size)); } -static struct dma_map_ops calgary_dma_ops = { +static const struct dma_map_ops calgary_dma_ops = { .alloc = calgary_alloc_coherent, .free = calgary_free_coherent, .map_sg = calgary_map_sg, diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d30c37750765..76f4c039baae 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -17,7 +17,7 @@ static int forbid_dac __read_mostly; -struct dma_map_ops *dma_ops = &nommu_dma_ops; +const struct dma_map_ops *dma_ops = &nommu_dma_ops; EXPORT_SYMBOL(dma_ops); static int iommu_sac_force __read_mostly; @@ -214,7 +214,7 @@ early_param("iommu", iommu_setup); int dma_supported(struct device *dev, u64 mask) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); #ifdef CONFIG_PCI if (mask > 0xffffffff && forbid_dac > 0) { diff --git a/arch/x86/kernel/pci-nommu.c b/arch/x86/kernel/pci-nommu.c index 00e71ce396a8..a88952ef371c 100644 --- a/arch/x86/kernel/pci-nommu.c +++ b/arch/x86/kernel/pci-nommu.c @@ -88,7 +88,7 @@ static void nommu_sync_sg_for_device(struct device *dev, flush_write_buffers(); } -struct dma_map_ops nommu_dma_ops = { +const struct dma_map_ops nommu_dma_ops = { .alloc = dma_generic_alloc_coherent, .free = dma_generic_free_coherent, .map_sg = nommu_map_sg, diff --git a/arch/x86/kernel/pci-swiotlb.c b/arch/x86/kernel/pci-swiotlb.c index 410efb2c7b80..1e23577e17cf 100644 --- a/arch/x86/kernel/pci-swiotlb.c +++ b/arch/x86/kernel/pci-swiotlb.c @@ -45,7 +45,7 @@ void x86_swiotlb_free_coherent(struct device *dev, size_t size, dma_generic_free_coherent(dev, size, vaddr, dma_addr, attrs); } -static struct dma_map_ops swiotlb_dma_ops = { +static const struct dma_map_ops swiotlb_dma_ops = { .mapping_error = swiotlb_dma_mapping_error, .alloc = x86_swiotlb_alloc_coherent, .free = x86_swiotlb_free_coherent, diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index 052c1cb76305..aa3828823170 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -179,7 +179,7 @@ static void *sta2x11_swiotlb_alloc_coherent(struct device *dev, } /* We have our own dma_ops: the same as swiotlb but from alloc (above) */ -static struct dma_map_ops sta2x11_dma_ops = { +static const struct dma_map_ops sta2x11_dma_ops = { .alloc = sta2x11_swiotlb_alloc_coherent, .free = x86_swiotlb_free_coherent, .map_page = swiotlb_map_page, diff --git a/arch/x86/xen/pci-swiotlb-xen.c b/arch/x86/xen/pci-swiotlb-xen.c index a0b36a9d5df1..42b08f8fc2ca 100644 --- a/arch/x86/xen/pci-swiotlb-xen.c +++ b/arch/x86/xen/pci-swiotlb-xen.c @@ -18,7 +18,7 @@ int xen_swiotlb __read_mostly; -static struct dma_map_ops xen_swiotlb_dma_ops = { +static const struct dma_map_ops xen_swiotlb_dma_ops = { .alloc = xen_swiotlb_alloc_coherent, .free = xen_swiotlb_free_coherent, .sync_single_for_cpu = xen_swiotlb_sync_single_for_cpu, diff --git a/arch/xtensa/include/asm/device.h b/arch/xtensa/include/asm/device.h index fe1f5c878493..a77d45d39f35 100644 --- a/arch/xtensa/include/asm/device.h +++ b/arch/xtensa/include/asm/device.h @@ -10,7 +10,7 @@ struct dma_map_ops; struct dev_archdata { /* DMA operations on that device */ - struct dma_map_ops *dma_ops; + const struct dma_map_ops *dma_ops; }; struct pdev_archdata { diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index 3fc1170a6488..50d23106cce0 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h @@ -18,9 +18,9 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0x0) -extern struct dma_map_ops xtensa_dma_map_ops; +extern const struct dma_map_ops xtensa_dma_map_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { if (dev && dev->archdata.dma_ops) return dev->archdata.dma_ops; diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 70e362e6038e..ecec5265a66d 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -249,7 +249,7 @@ int xtensa_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) return 0; } -struct dma_map_ops xtensa_dma_map_ops = { +const struct dma_map_ops xtensa_dma_map_ops = { .alloc = xtensa_dma_alloc, .free = xtensa_dma_free, .map_page = xtensa_map_page, diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 3ef0f42984f2..3703fb9db419 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -117,7 +117,7 @@ static const struct iommu_ops amd_iommu_ops; static ATOMIC_NOTIFIER_HEAD(ppr_notifier); int amd_iommu_max_glx_val = -1; -static struct dma_map_ops amd_iommu_dma_ops; +static const struct dma_map_ops amd_iommu_dma_ops; /* * This struct contains device specific data for the IOMMU @@ -2728,7 +2728,7 @@ static int amd_iommu_dma_supported(struct device *dev, u64 mask) return check_device(dev); } -static struct dma_map_ops amd_iommu_dma_ops = { +static const struct dma_map_ops amd_iommu_dma_ops = { .alloc = alloc_coherent, .free = free_coherent, .map_page = map_page, diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c index be37890abb93..c4b27a25662a 100644 --- a/drivers/misc/mic/bus/mic_bus.c +++ b/drivers/misc/mic/bus/mic_bus.c @@ -143,7 +143,7 @@ static void mbus_release_dev(struct device *d) } struct mbus_device * -mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, +mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops, struct mbus_hw_ops *hw_ops, int index, void __iomem *mmio_va) { diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c index ff6e01c25810..e5d377e97c86 100644 --- a/drivers/misc/mic/bus/scif_bus.c +++ b/drivers/misc/mic/bus/scif_bus.c @@ -138,7 +138,7 @@ static void scif_release_dev(struct device *d) } struct scif_hw_dev * -scif_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, +scif_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops, struct scif_hw_ops *hw_ops, u8 dnode, u8 snode, struct mic_mw *mmio, struct mic_mw *aper, void *dp, void __iomem *rdp, struct dma_chan **chan, int num_chan, diff --git a/drivers/misc/mic/bus/scif_bus.h b/drivers/misc/mic/bus/scif_bus.h index 94f29ac608b6..ff59568219ad 100644 --- a/drivers/misc/mic/bus/scif_bus.h +++ b/drivers/misc/mic/bus/scif_bus.h @@ -113,7 +113,7 @@ int scif_register_driver(struct scif_driver *driver); void scif_unregister_driver(struct scif_driver *driver); struct scif_hw_dev * scif_register_device(struct device *pdev, int id, - struct dma_map_ops *dma_ops, + const struct dma_map_ops *dma_ops, struct scif_hw_ops *hw_ops, u8 dnode, u8 snode, struct mic_mw *mmio, struct mic_mw *aper, void *dp, void __iomem *rdp, diff --git a/drivers/misc/mic/bus/vop_bus.c b/drivers/misc/mic/bus/vop_bus.c index 303da222f5b6..e3caa6c53922 100644 --- a/drivers/misc/mic/bus/vop_bus.c +++ b/drivers/misc/mic/bus/vop_bus.c @@ -154,7 +154,7 @@ vop_register_device(struct device *pdev, int id, vdev->dev.parent = pdev; vdev->id.device = id; vdev->id.vendor = VOP_DEV_ANY_ID; - vdev->dev.archdata.dma_ops = (struct dma_map_ops *)dma_ops; + vdev->dev.archdata.dma_ops = dma_ops; vdev->dev.dma_mask = &vdev->dev.coherent_dma_mask; dma_set_mask(&vdev->dev, DMA_BIT_MASK(64)); vdev->dev.release = vop_release_dev; diff --git a/drivers/misc/mic/host/mic_boot.c b/drivers/misc/mic/host/mic_boot.c index 9599d732aff3..c327985c9523 100644 --- a/drivers/misc/mic/host/mic_boot.c +++ b/drivers/misc/mic/host/mic_boot.c @@ -245,7 +245,7 @@ static void __mic_dma_unmap_sg(struct device *dev, dma_unmap_sg(&mdev->pdev->dev, sg, nents, dir); } -static struct dma_map_ops __mic_dma_ops = { +static const struct dma_map_ops __mic_dma_ops = { .alloc = __mic_dma_alloc, .free = __mic_dma_free, .map_page = __mic_dma_map_page, @@ -344,7 +344,7 @@ mic_dma_unmap_page(struct device *dev, dma_addr_t dma_addr, mic_unmap_single(mdev, dma_addr, size); } -static struct dma_map_ops mic_dma_ops = { +static const struct dma_map_ops mic_dma_ops = { .map_page = mic_dma_map_page, .unmap_page = mic_dma_unmap_page, }; diff --git a/drivers/parisc/ccio-dma.c b/drivers/parisc/ccio-dma.c index 553ef8a5d588..aeb073b5fe16 100644 --- a/drivers/parisc/ccio-dma.c +++ b/drivers/parisc/ccio-dma.c @@ -1011,7 +1011,7 @@ ccio_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, DBG_RUN_SG("%s() DONE (nents %d)\n", __func__, nents); } -static struct dma_map_ops ccio_ops = { +static const struct dma_map_ops ccio_ops = { .dma_supported = ccio_dma_supported, .alloc = ccio_alloc, .free = ccio_free, diff --git a/drivers/parisc/sba_iommu.c b/drivers/parisc/sba_iommu.c index 151b86b6d2e2..33385e574433 100644 --- a/drivers/parisc/sba_iommu.c +++ b/drivers/parisc/sba_iommu.c @@ -1069,7 +1069,7 @@ sba_unmap_sg(struct device *dev, struct scatterlist *sglist, int nents, } -static struct dma_map_ops sba_ops = { +static const struct dma_map_ops sba_ops = { .dma_supported = sba_dma_supported, .alloc = sba_alloc, .free = sba_free, diff --git a/drivers/pci/host/vmd.c b/drivers/pci/host/vmd.c index 18ef1a93c10a..e27ad2a3bd33 100644 --- a/drivers/pci/host/vmd.c +++ b/drivers/pci/host/vmd.c @@ -282,7 +282,7 @@ static struct device *to_vmd_dev(struct device *dev) return &vmd->dev->dev; } -static struct dma_map_ops *vmd_dma_ops(struct device *dev) +static const struct dma_map_ops *vmd_dma_ops(struct device *dev) { return get_dma_ops(to_vmd_dev(dev)); } diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 10c5a17b1f51..f1da68b82c63 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -127,7 +127,7 @@ struct dma_map_ops { int is_phys; }; -extern struct dma_map_ops dma_noop_ops; +extern const struct dma_map_ops dma_noop_ops; #define DMA_BIT_MASK(n) (((n) == 64) ? ~0ULL : ((1ULL<<(n))-1)) @@ -170,8 +170,8 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, * dma dependent code. Code that depends on the dma-mapping * API needs to set 'depends on HAS_DMA' in its Kconfig */ -extern struct dma_map_ops bad_dma_ops; -static inline struct dma_map_ops *get_dma_ops(struct device *dev) +extern const struct dma_map_ops bad_dma_ops; +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { return &bad_dma_ops; } @@ -182,7 +182,7 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; kmemcheck_mark_initialized(ptr, size); @@ -201,7 +201,7 @@ static inline void dma_unmap_single_attrs(struct device *dev, dma_addr_t addr, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_page) @@ -217,7 +217,7 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); int i, ents; struct scatterlist *s; @@ -235,7 +235,7 @@ static inline void dma_unmap_sg_attrs(struct device *dev, struct scatterlist *sg int nents, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); debug_dma_unmap_sg(dev, sg, nents, dir); @@ -249,7 +249,7 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; kmemcheck_mark_initialized(page_address(page) + offset, size); @@ -265,7 +265,7 @@ static inline void dma_unmap_page_attrs(struct device *dev, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_page) @@ -279,7 +279,7 @@ static inline dma_addr_t dma_map_resource(struct device *dev, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; BUG_ON(!valid_dma_direction(dir)); @@ -300,7 +300,7 @@ static inline void dma_unmap_resource(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->unmap_resource) @@ -312,7 +312,7 @@ static inline void dma_sync_single_for_cpu(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_for_cpu) @@ -324,7 +324,7 @@ static inline void dma_sync_single_for_device(struct device *dev, dma_addr_t addr, size_t size, enum dma_data_direction dir) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->sync_single_for_device) @@ -364,7 +364,7 @@ static inline void dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->sync_sg_for_cpu) @@ -376,7 +376,7 @@ static inline void dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, int nelems, enum dma_data_direction dir) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!valid_dma_direction(dir)); if (ops->sync_sg_for_device) @@ -421,7 +421,7 @@ static inline int dma_mmap_attrs(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!ops); if (ops->mmap) return ops->mmap(dev, vma, cpu_addr, dma_addr, size, attrs); @@ -439,7 +439,7 @@ dma_get_sgtable_attrs(struct device *dev, struct sg_table *sgt, void *cpu_addr, dma_addr_t dma_addr, size_t size, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!ops); if (ops->get_sgtable) return ops->get_sgtable(dev, sgt, cpu_addr, dma_addr, size, @@ -457,7 +457,7 @@ static inline void *dma_alloc_attrs(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t flag, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); void *cpu_addr; BUG_ON(!ops); @@ -479,7 +479,7 @@ static inline void dma_free_attrs(struct device *dev, size_t size, void *cpu_addr, dma_addr_t dma_handle, unsigned long attrs) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); BUG_ON(!ops); WARN_ON(irqs_disabled()); @@ -537,7 +537,7 @@ static inline int dma_mapping_error(struct device *dev, dma_addr_t dma_addr) #ifndef HAVE_ARCH_DMA_SUPPORTED static inline int dma_supported(struct device *dev, u64 mask) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); if (!ops) return 0; @@ -550,7 +550,7 @@ static inline int dma_supported(struct device *dev, u64 mask) #ifndef HAVE_ARCH_DMA_SET_MASK static inline int dma_set_mask(struct device *dev, u64 mask) { - struct dma_map_ops *ops = get_dma_ops(dev); + const struct dma_map_ops *ops = get_dma_ops(dev); if (ops->set_dma_mask) return ops->set_dma_mask(dev, mask); diff --git a/include/linux/mic_bus.h b/include/linux/mic_bus.h index 27d7c95fd0da..504d54c71bdb 100644 --- a/include/linux/mic_bus.h +++ b/include/linux/mic_bus.h @@ -90,7 +90,7 @@ struct mbus_hw_ops { }; struct mbus_device * -mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, +mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ops, struct mbus_hw_ops *hw_ops, int index, void __iomem *mmio_va); void mbus_unregister_device(struct mbus_device *mbdev); diff --git a/include/xen/arm/hypervisor.h b/include/xen/arm/hypervisor.h index 95251512e2c4..44b587b49904 100644 --- a/include/xen/arm/hypervisor.h +++ b/include/xen/arm/hypervisor.h @@ -18,7 +18,7 @@ static inline enum paravirt_lazy_mode paravirt_get_lazy_mode(void) return PARAVIRT_LAZY_NONE; } -extern struct dma_map_ops *xen_dma_ops; +extern const struct dma_map_ops *xen_dma_ops; #ifdef CONFIG_XEN void __init xen_early_init(void); diff --git a/lib/dma-noop.c b/lib/dma-noop.c index 3d766e78fbe2..65e49dd35b7b 100644 --- a/lib/dma-noop.c +++ b/lib/dma-noop.c @@ -64,7 +64,7 @@ static int dma_noop_supported(struct device *dev, u64 mask) return 1; } -struct dma_map_ops dma_noop_ops = { +const struct dma_map_ops dma_noop_ops = { .alloc = dma_noop_alloc, .free = dma_noop_free, .map_page = dma_noop_map_page, -- cgit v1.2.3 From 5657933dbb6e25feaf5d8df8c88f96cdade693a3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 20 Jan 2017 13:04:02 -0800 Subject: treewide: Move dma_ops from struct dev_archdata into struct device Some but not all architectures provide set_dma_ops(). Move dma_ops from struct dev_archdata into struct device such that it becomes possible on all architectures to configure dma_ops per device. Signed-off-by: Bart Van Assche Acked-by: Greg Kroah-Hartman Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: David Woodhouse Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Russell King Cc: x86@kernel.org Signed-off-by: Doug Ledford --- arch/arm/include/asm/device.h | 1 - arch/arm/include/asm/dma-mapping.h | 6 +++--- arch/arm64/include/asm/device.h | 1 - arch/arm64/include/asm/dma-mapping.h | 4 ++-- arch/arm64/mm/dma-mapping.c | 8 ++++---- arch/m32r/include/asm/device.h | 1 - arch/m32r/include/asm/dma-mapping.h | 4 ++-- arch/mips/include/asm/device.h | 5 ----- arch/mips/include/asm/dma-mapping.h | 4 ++-- arch/mips/pci/pci-octeon.c | 2 +- arch/powerpc/include/asm/device.h | 4 ---- arch/powerpc/include/asm/dma-mapping.h | 4 ++-- arch/powerpc/kernel/dma.c | 2 +- arch/powerpc/platforms/cell/iommu.c | 2 +- arch/powerpc/platforms/pasemi/iommu.c | 2 +- arch/powerpc/platforms/pasemi/setup.c | 2 +- arch/powerpc/platforms/ps3/system-bus.c | 4 ++-- arch/powerpc/platforms/pseries/ibmebus.c | 2 +- arch/s390/include/asm/device.h | 1 - arch/s390/include/asm/dma-mapping.h | 4 ++-- arch/s390/pci/pci.c | 2 +- arch/tile/include/asm/device.h | 3 --- arch/tile/include/asm/dma-mapping.h | 6 +++--- arch/x86/include/asm/device.h | 3 --- arch/x86/include/asm/dma-mapping.h | 4 ++-- arch/x86/kernel/pci-calgary_64.c | 4 ++-- arch/x86/pci/common.c | 2 +- arch/x86/pci/sta2x11-fixup.c | 8 ++++---- arch/xtensa/include/asm/device.h | 4 ---- arch/xtensa/include/asm/dma-mapping.h | 4 ++-- drivers/infiniband/ulp/srpt/ib_srpt.c | 2 +- drivers/iommu/amd_iommu.c | 6 +++--- drivers/misc/mic/bus/mic_bus.c | 2 +- drivers/misc/mic/bus/scif_bus.c | 2 +- drivers/misc/mic/bus/vop_bus.c | 2 +- include/linux/device.h | 1 + 36 files changed, 48 insertions(+), 70 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/include/asm/device.h b/arch/arm/include/asm/device.h index d8a572f9c187..220ba207be91 100644 --- a/arch/arm/include/asm/device.h +++ b/arch/arm/include/asm/device.h @@ -7,7 +7,6 @@ #define ASMARM_DEVICE_H struct dev_archdata { - const struct dma_map_ops *dma_ops; #ifdef CONFIG_DMABOUNCE struct dmabounce_device_info *dmabounce; #endif diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index 1aabd781306f..312f4d0564d6 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -18,8 +18,8 @@ extern const struct dma_map_ops arm_coherent_dma_ops; static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) { - if (dev && dev->archdata.dma_ops) - return dev->archdata.dma_ops; + if (dev && dev->dma_ops) + return dev->dma_ops; return &arm_dma_ops; } @@ -34,7 +34,7 @@ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *ops) { BUG_ON(!dev); - dev->archdata.dma_ops = ops; + dev->dma_ops = ops; } #define HAVE_ARCH_DMA_SUPPORTED 1 diff --git a/arch/arm64/include/asm/device.h b/arch/arm64/include/asm/device.h index 00c678cc31e1..73d5bab015eb 100644 --- a/arch/arm64/include/asm/device.h +++ b/arch/arm64/include/asm/device.h @@ -17,7 +17,6 @@ #define __ASM_DEVICE_H struct dev_archdata { - const struct dma_map_ops *dma_ops; #ifdef CONFIG_IOMMU_API void *iommu; /* private IOMMU data */ #endif diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 1fedb43be712..58ae36cc3b60 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -29,8 +29,8 @@ extern const struct dma_map_ops dummy_dma_ops; static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) { - if (dev && dev->archdata.dma_ops) - return dev->archdata.dma_ops; + if (dev && dev->dma_ops) + return dev->dma_ops; /* * We expect no ISA devices, and all other DMA masters are expected to diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index bcef6368d48f..dbab4c6c084b 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -837,7 +837,7 @@ static bool do_iommu_attach(struct device *dev, const struct iommu_ops *ops, return false; } - dev->archdata.dma_ops = &iommu_dma_ops; + dev->dma_ops = &iommu_dma_ops; return true; } @@ -941,7 +941,7 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_teardown_dma_ops(struct device *dev) { - dev->archdata.dma_ops = NULL; + dev->dma_ops = NULL; } #else @@ -955,8 +955,8 @@ static void __iommu_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, const struct iommu_ops *iommu, bool coherent) { - if (!dev->archdata.dma_ops) - dev->archdata.dma_ops = &swiotlb_dma_ops; + if (!dev->dma_ops) + dev->dma_ops = &swiotlb_dma_ops; dev->archdata.dma_coherent = coherent; __iommu_setup_dma_ops(dev, dma_base, size, iommu); diff --git a/arch/m32r/include/asm/device.h b/arch/m32r/include/asm/device.h index 7955a9799466..5203fc87f080 100644 --- a/arch/m32r/include/asm/device.h +++ b/arch/m32r/include/asm/device.h @@ -4,7 +4,6 @@ * This file is released under the GPLv2 */ struct dev_archdata { - const struct dma_map_ops *dma_ops; }; struct pdev_archdata { diff --git a/arch/m32r/include/asm/dma-mapping.h b/arch/m32r/include/asm/dma-mapping.h index 99c43d2f05dc..27b1597ac563 100644 --- a/arch/m32r/include/asm/dma-mapping.h +++ b/arch/m32r/include/asm/dma-mapping.h @@ -12,8 +12,8 @@ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { - if (dev && dev->archdata.dma_ops) - return dev->archdata.dma_ops; + if (dev && dev->dma_ops) + return dev->dma_ops; return &dma_noop_ops; } diff --git a/arch/mips/include/asm/device.h b/arch/mips/include/asm/device.h index ebc5c1265473..6aa796f1081a 100644 --- a/arch/mips/include/asm/device.h +++ b/arch/mips/include/asm/device.h @@ -6,12 +6,7 @@ #ifndef _ASM_MIPS_DEVICE_H #define _ASM_MIPS_DEVICE_H -struct dma_map_ops; - struct dev_archdata { - /* DMA operations on that device */ - const struct dma_map_ops *dma_ops; - #ifdef CONFIG_DMA_PERDEV_COHERENT /* Non-zero if DMA is coherent with CPU caches */ bool dma_coherent; diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index b59b084a7569..dad3b09fe993 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -13,8 +13,8 @@ extern const struct dma_map_ops *mips_dma_map_ops; static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { - if (dev && dev->archdata.dma_ops) - return dev->archdata.dma_ops; + if (dev && dev->dma_ops) + return dev->dma_ops; else return mips_dma_map_ops; } diff --git a/arch/mips/pci/pci-octeon.c b/arch/mips/pci/pci-octeon.c index 308d051fc45c..9ee01936862e 100644 --- a/arch/mips/pci/pci-octeon.c +++ b/arch/mips/pci/pci-octeon.c @@ -167,7 +167,7 @@ int pcibios_plat_dev_init(struct pci_dev *dev) pci_write_config_dword(dev, pos + PCI_ERR_ROOT_STATUS, dconfig); } - dev->dev.archdata.dma_ops = octeon_pci_dma_map_ops; + dev->dev.dma_ops = octeon_pci_dma_map_ops; return 0; } diff --git a/arch/powerpc/include/asm/device.h b/arch/powerpc/include/asm/device.h index 49cbb0fca233..0245bfcaac32 100644 --- a/arch/powerpc/include/asm/device.h +++ b/arch/powerpc/include/asm/device.h @@ -6,7 +6,6 @@ #ifndef _ASM_POWERPC_DEVICE_H #define _ASM_POWERPC_DEVICE_H -struct dma_map_ops; struct device_node; #ifdef CONFIG_PPC64 struct pci_dn; @@ -20,9 +19,6 @@ struct iommu_table; * drivers/macintosh/macio_asic.c */ struct dev_archdata { - /* DMA operations on that device */ - const struct dma_map_ops *dma_ops; - /* * These two used to be a union. However, with the hybrid ops we need * both so here we store both a DMA offset for direct mappings and diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 2ec3eadf336f..59fbd4abcbf8 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -88,12 +88,12 @@ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) if (unlikely(dev == NULL)) return NULL; - return dev->archdata.dma_ops; + return dev->dma_ops; } static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *ops) { - dev->archdata.dma_ops = ops; + dev->dma_ops = ops; } /* diff --git a/arch/powerpc/kernel/dma.c b/arch/powerpc/kernel/dma.c index 03b98f1f98ec..41c749586bd2 100644 --- a/arch/powerpc/kernel/dma.c +++ b/arch/powerpc/kernel/dma.c @@ -33,7 +33,7 @@ static u64 __maybe_unused get_pfn_limit(struct device *dev) struct dev_archdata __maybe_unused *sd = &dev->archdata; #ifdef CONFIG_SWIOTLB - if (sd->max_direct_dma_addr && sd->dma_ops == &swiotlb_dma_ops) + if (sd->max_direct_dma_addr && dev->dma_ops == &swiotlb_dma_ops) pfn = min_t(u64, pfn, sd->max_direct_dma_addr >> PAGE_SHIFT); #endif diff --git a/arch/powerpc/platforms/cell/iommu.c b/arch/powerpc/platforms/cell/iommu.c index e1413e69e5fe..71b995bbcae0 100644 --- a/arch/powerpc/platforms/cell/iommu.c +++ b/arch/powerpc/platforms/cell/iommu.c @@ -692,7 +692,7 @@ static int cell_of_bus_notify(struct notifier_block *nb, unsigned long action, return 0; /* We use the PCI DMA ops */ - dev->archdata.dma_ops = get_pci_dma_ops(); + dev->dma_ops = get_pci_dma_ops(); cell_dma_dev_setup(dev); diff --git a/arch/powerpc/platforms/pasemi/iommu.c b/arch/powerpc/platforms/pasemi/iommu.c index e74adc4e7fd8..7fec04de27fc 100644 --- a/arch/powerpc/platforms/pasemi/iommu.c +++ b/arch/powerpc/platforms/pasemi/iommu.c @@ -186,7 +186,7 @@ static void pci_dma_dev_setup_pasemi(struct pci_dev *dev) */ if (dev->vendor == 0x1959 && dev->device == 0xa007 && !firmware_has_feature(FW_FEATURE_LPAR)) { - dev->dev.archdata.dma_ops = &dma_direct_ops; + dev->dev.dma_ops = &dma_direct_ops; /* * Set the coherent DMA mask to prevent the iommu * being used unnecessarily diff --git a/arch/powerpc/platforms/pasemi/setup.c b/arch/powerpc/platforms/pasemi/setup.c index 3182400cf48f..c4a3e93dc324 100644 --- a/arch/powerpc/platforms/pasemi/setup.c +++ b/arch/powerpc/platforms/pasemi/setup.c @@ -363,7 +363,7 @@ static int pcmcia_notify(struct notifier_block *nb, unsigned long action, return 0; /* We use the direct ops for localbus */ - dev->archdata.dma_ops = &dma_direct_ops; + dev->dma_ops = &dma_direct_ops; return 0; } diff --git a/arch/powerpc/platforms/ps3/system-bus.c b/arch/powerpc/platforms/ps3/system-bus.c index c81450d98794..2d2e5f80a3d3 100644 --- a/arch/powerpc/platforms/ps3/system-bus.c +++ b/arch/powerpc/platforms/ps3/system-bus.c @@ -756,11 +756,11 @@ int ps3_system_bus_device_register(struct ps3_system_bus_device *dev) switch (dev->dev_type) { case PS3_DEVICE_TYPE_IOC0: - dev->core.archdata.dma_ops = &ps3_ioc0_dma_ops; + dev->core.dma_ops = &ps3_ioc0_dma_ops; dev_set_name(&dev->core, "ioc0_%02x", ++dev_ioc0_count); break; case PS3_DEVICE_TYPE_SB: - dev->core.archdata.dma_ops = &ps3_sb_dma_ops; + dev->core.dma_ops = &ps3_sb_dma_ops; dev_set_name(&dev->core, "sb_%02x", ++dev_sb_count); break; diff --git a/arch/powerpc/platforms/pseries/ibmebus.c b/arch/powerpc/platforms/pseries/ibmebus.c index 2e36a0b8944a..99a6bf7f3bcf 100644 --- a/arch/powerpc/platforms/pseries/ibmebus.c +++ b/arch/powerpc/platforms/pseries/ibmebus.c @@ -169,7 +169,7 @@ static int ibmebus_create_device(struct device_node *dn) return -ENOMEM; dev->dev.bus = &ibmebus_bus_type; - dev->dev.archdata.dma_ops = &ibmebus_dma_ops; + dev->dev.dma_ops = &ibmebus_dma_ops; ret = of_device_add(dev); if (ret) diff --git a/arch/s390/include/asm/device.h b/arch/s390/include/asm/device.h index 7955a9799466..5203fc87f080 100644 --- a/arch/s390/include/asm/device.h +++ b/arch/s390/include/asm/device.h @@ -4,7 +4,6 @@ * This file is released under the GPLv2 */ struct dev_archdata { - const struct dma_map_ops *dma_ops; }; struct pdev_archdata { diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index 2776d205b1ff..a872027d0c1b 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -14,8 +14,8 @@ extern const struct dma_map_ops s390_pci_dma_ops; static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { - if (dev && dev->archdata.dma_ops) - return dev->archdata.dma_ops; + if (dev && dev->dma_ops) + return dev->dma_ops; return &dma_noop_ops; } diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 38e17d4d9884..82abef8b8574 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -641,7 +641,7 @@ int pcibios_add_device(struct pci_dev *pdev) int i; pdev->dev.groups = zpci_attr_groups; - pdev->dev.archdata.dma_ops = &s390_pci_dma_ops; + pdev->dev.dma_ops = &s390_pci_dma_ops; zpci_map_resources(pdev); for (i = 0; i < PCI_BAR_COUNT; i++) { diff --git a/arch/tile/include/asm/device.h b/arch/tile/include/asm/device.h index 25f23ac7d361..1cf45422a0df 100644 --- a/arch/tile/include/asm/device.h +++ b/arch/tile/include/asm/device.h @@ -17,9 +17,6 @@ #define _ASM_TILE_DEVICE_H struct dev_archdata { - /* DMA operations on that device */ - const struct dma_map_ops *dma_ops; - /* Offset of the DMA address from the PA. */ dma_addr_t dma_offset; diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 4a06cc75b856..c0620697eaad 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -31,8 +31,8 @@ extern const struct dma_map_ops *gx_hybrid_pci_dma_map_ops; static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { - if (dev && dev->archdata.dma_ops) - return dev->archdata.dma_ops; + if (dev && dev->dma_ops) + return dev->dma_ops; else return tile_dma_map_ops; } @@ -61,7 +61,7 @@ static inline void dma_mark_clean(void *addr, size_t size) {} static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *ops) { - dev->archdata.dma_ops = ops; + dev->dma_ops = ops; } static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) diff --git a/arch/x86/include/asm/device.h b/arch/x86/include/asm/device.h index b2d0b4ced7e3..1b3ef26e77df 100644 --- a/arch/x86/include/asm/device.h +++ b/arch/x86/include/asm/device.h @@ -2,9 +2,6 @@ #define _ASM_X86_DEVICE_H struct dev_archdata { -#ifdef CONFIG_X86_DEV_DMA_OPS - const struct dma_map_ops *dma_ops; -#endif #if defined(CONFIG_INTEL_IOMMU) || defined(CONFIG_AMD_IOMMU) void *iommu; /* hook for IOMMU specific extension */ #endif diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 5e4772886a1e..94b5b96966cb 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -32,10 +32,10 @@ static inline const struct dma_map_ops *get_dma_ops(struct device *dev) #ifndef CONFIG_X86_DEV_DMA_OPS return dma_ops; #else - if (unlikely(!dev) || !dev->archdata.dma_ops) + if (unlikely(!dev) || !dev->dma_ops) return dma_ops; else - return dev->archdata.dma_ops; + return dev->dma_ops; #endif } diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 17f180148c80..5070320780c6 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -1177,7 +1177,7 @@ static int __init calgary_init(void) tbl = find_iommu_table(&dev->dev); if (translation_enabled(tbl)) - dev->dev.archdata.dma_ops = &calgary_dma_ops; + dev->dev.dma_ops = &calgary_dma_ops; } return ret; @@ -1201,7 +1201,7 @@ error: calgary_disable_translation(dev); calgary_free_bus(dev); pci_dev_put(dev); /* Undo calgary_init_one()'s pci_dev_get() */ - dev->dev.archdata.dma_ops = NULL; + dev->dev.dma_ops = NULL; } while (1); return ret; diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index a4fdfa7dcc1b..0cb52ae0a8f0 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -667,7 +667,7 @@ static void set_dma_domain_ops(struct pci_dev *pdev) spin_lock(&dma_domain_list_lock); list_for_each_entry(domain, &dma_domain_list, node) { if (pci_domain_nr(pdev->bus) == domain->domain_nr) { - pdev->dev.archdata.dma_ops = domain->dma_ops; + pdev->dev.dma_ops = domain->dma_ops; break; } } diff --git a/arch/x86/pci/sta2x11-fixup.c b/arch/x86/pci/sta2x11-fixup.c index aa3828823170..ec008e800b45 100644 --- a/arch/x86/pci/sta2x11-fixup.c +++ b/arch/x86/pci/sta2x11-fixup.c @@ -203,7 +203,7 @@ static void sta2x11_setup_pdev(struct pci_dev *pdev) return; pci_set_consistent_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); pci_set_dma_mask(pdev, STA2X11_AMBA_SIZE - 1); - pdev->dev.archdata.dma_ops = &sta2x11_dma_ops; + pdev->dev.dma_ops = &sta2x11_dma_ops; /* We must enable all devices as master, for audio DMA to work */ pci_set_master(pdev); @@ -223,7 +223,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { struct sta2x11_mapping *map; - if (dev->archdata.dma_ops != &sta2x11_dma_ops) { + if (dev->dma_ops != &sta2x11_dma_ops) { if (!dev->dma_mask) return false; return addr + size - 1 <= *dev->dma_mask; @@ -247,7 +247,7 @@ bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) */ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) { - if (dev->archdata.dma_ops != &sta2x11_dma_ops) + if (dev->dma_ops != &sta2x11_dma_ops) return paddr; return p2a(paddr, to_pci_dev(dev)); } @@ -259,7 +259,7 @@ dma_addr_t phys_to_dma(struct device *dev, phys_addr_t paddr) */ phys_addr_t dma_to_phys(struct device *dev, dma_addr_t daddr) { - if (dev->archdata.dma_ops != &sta2x11_dma_ops) + if (dev->dma_ops != &sta2x11_dma_ops) return daddr; return a2p(daddr, to_pci_dev(dev)); } diff --git a/arch/xtensa/include/asm/device.h b/arch/xtensa/include/asm/device.h index a77d45d39f35..1deeb8ebbb1b 100644 --- a/arch/xtensa/include/asm/device.h +++ b/arch/xtensa/include/asm/device.h @@ -6,11 +6,7 @@ #ifndef _ASM_XTENSA_DEVICE_H #define _ASM_XTENSA_DEVICE_H -struct dma_map_ops; - struct dev_archdata { - /* DMA operations on that device */ - const struct dma_map_ops *dma_ops; }; struct pdev_archdata { diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index 50d23106cce0..9eecfc3c5dc4 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h @@ -22,8 +22,8 @@ extern const struct dma_map_ops xtensa_dma_map_ops; static inline const struct dma_map_ops *get_dma_ops(struct device *dev) { - if (dev && dev->archdata.dma_ops) - return dev->archdata.dma_ops; + if (dev && dev->dma_ops) + return dev->dma_ops; else return &xtensa_dma_map_ops; } diff --git a/drivers/infiniband/ulp/srpt/ib_srpt.c b/drivers/infiniband/ulp/srpt/ib_srpt.c index d21ba9d857c3..dfc24f19178b 100644 --- a/drivers/infiniband/ulp/srpt/ib_srpt.c +++ b/drivers/infiniband/ulp/srpt/ib_srpt.c @@ -2465,7 +2465,7 @@ static void srpt_add_one(struct ib_device *device) int i; pr_debug("device = %p, device->dma_ops = %p\n", device, - device->dma_ops); + device->dma_device->dma_ops); sdev = kzalloc(sizeof(*sdev), GFP_KERNEL); if (!sdev) diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 3703fb9db419..f7b86679bafe 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -515,7 +515,7 @@ static void iommu_uninit_device(struct device *dev) iommu_group_remove_device(dev); /* Remove dma-ops */ - dev->archdata.dma_ops = NULL; + dev->dma_ops = NULL; /* * We keep dev_data around for unplugged devices and reuse it when the @@ -2164,7 +2164,7 @@ static int amd_iommu_add_device(struct device *dev) dev_name(dev)); iommu_ignore_device(dev); - dev->archdata.dma_ops = &nommu_dma_ops; + dev->dma_ops = &nommu_dma_ops; goto out; } init_iommu_group(dev); @@ -2181,7 +2181,7 @@ static int amd_iommu_add_device(struct device *dev) if (domain->type == IOMMU_DOMAIN_IDENTITY) dev_data->passthrough = true; else - dev->archdata.dma_ops = &amd_iommu_dma_ops; + dev->dma_ops = &amd_iommu_dma_ops; out: iommu_completion_wait(iommu); diff --git a/drivers/misc/mic/bus/mic_bus.c b/drivers/misc/mic/bus/mic_bus.c index c4b27a25662a..77b16ca66846 100644 --- a/drivers/misc/mic/bus/mic_bus.c +++ b/drivers/misc/mic/bus/mic_bus.c @@ -158,7 +158,7 @@ mbus_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ mbdev->dev.parent = pdev; mbdev->id.device = id; mbdev->id.vendor = MBUS_DEV_ANY_ID; - mbdev->dev.archdata.dma_ops = dma_ops; + mbdev->dev.dma_ops = dma_ops; mbdev->dev.dma_mask = &mbdev->dev.coherent_dma_mask; dma_set_mask(&mbdev->dev, DMA_BIT_MASK(64)); mbdev->dev.release = mbus_release_dev; diff --git a/drivers/misc/mic/bus/scif_bus.c b/drivers/misc/mic/bus/scif_bus.c index e5d377e97c86..a444db5f61fe 100644 --- a/drivers/misc/mic/bus/scif_bus.c +++ b/drivers/misc/mic/bus/scif_bus.c @@ -154,7 +154,7 @@ scif_register_device(struct device *pdev, int id, const struct dma_map_ops *dma_ sdev->dev.parent = pdev; sdev->id.device = id; sdev->id.vendor = SCIF_DEV_ANY_ID; - sdev->dev.archdata.dma_ops = dma_ops; + sdev->dev.dma_ops = dma_ops; sdev->dev.release = scif_release_dev; sdev->hw_ops = hw_ops; sdev->dnode = dnode; diff --git a/drivers/misc/mic/bus/vop_bus.c b/drivers/misc/mic/bus/vop_bus.c index e3caa6c53922..fd7f2a6049f8 100644 --- a/drivers/misc/mic/bus/vop_bus.c +++ b/drivers/misc/mic/bus/vop_bus.c @@ -154,7 +154,7 @@ vop_register_device(struct device *pdev, int id, vdev->dev.parent = pdev; vdev->id.device = id; vdev->id.vendor = VOP_DEV_ANY_ID; - vdev->dev.archdata.dma_ops = dma_ops; + vdev->dev.dma_ops = dma_ops; vdev->dev.dma_mask = &vdev->dev.coherent_dma_mask; dma_set_mask(&vdev->dev, DMA_BIT_MASK(64)); vdev->dev.release = vop_release_dev; diff --git a/include/linux/device.h b/include/linux/device.h index 491b4c0ca633..46a567261ccc 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -921,6 +921,7 @@ struct device { #ifdef CONFIG_NUMA int numa_node; /* NUMA node this device is close to */ #endif + const struct dma_map_ops *dma_ops; u64 *dma_mask; /* dma mask (if dma'able device) */ u64 coherent_dma_mask;/* Like dma_mask, but for alloc_coherent mappings as -- cgit v1.2.3 From 815dd18788fe0d41899f51b91d0560279cf16b0d Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 20 Jan 2017 13:04:04 -0800 Subject: treewide: Consolidate get_dma_ops() implementations Introduce a new architecture-specific get_arch_dma_ops() function that takes a struct bus_type * argument. Add get_dma_ops() in . Signed-off-by: Bart Van Assche Cc: Benjamin Herrenschmidt Cc: Boris Ostrovsky Cc: David Woodhouse Cc: Juergen Gross Cc: H. Peter Anvin Cc: Ingo Molnar Cc: linux-arch@vger.kernel.org Cc: linux-kernel@vger.kernel.org Cc: Russell King Cc: x86@kernel.org Signed-off-by: Doug Ledford --- arch/alpha/include/asm/dma-mapping.h | 2 +- arch/arc/include/asm/dma-mapping.h | 2 +- arch/arm/include/asm/dma-mapping.h | 4 ++-- arch/arm64/include/asm/dma-mapping.h | 4 ++-- arch/avr32/include/asm/dma-mapping.h | 2 +- arch/blackfin/include/asm/dma-mapping.h | 2 +- arch/c6x/include/asm/dma-mapping.h | 2 +- arch/cris/include/asm/dma-mapping.h | 4 ++-- arch/frv/include/asm/dma-mapping.h | 2 +- arch/h8300/include/asm/dma-mapping.h | 2 +- arch/hexagon/include/asm/dma-mapping.h | 5 +---- arch/ia64/include/asm/dma-mapping.h | 5 ++++- arch/m32r/include/asm/dma-mapping.h | 4 +--- arch/m68k/include/asm/dma-mapping.h | 2 +- arch/metag/include/asm/dma-mapping.h | 2 +- arch/microblaze/include/asm/dma-mapping.h | 2 +- arch/mips/include/asm/dma-mapping.h | 7 ++----- arch/mn10300/include/asm/dma-mapping.h | 2 +- arch/nios2/include/asm/dma-mapping.h | 2 +- arch/openrisc/include/asm/dma-mapping.h | 2 +- arch/parisc/include/asm/dma-mapping.h | 2 +- arch/powerpc/include/asm/dma-mapping.h | 7 ++----- arch/powerpc/include/asm/ps3.h | 2 +- arch/s390/include/asm/dma-mapping.h | 4 +--- arch/sh/include/asm/dma-mapping.h | 2 +- arch/sparc/include/asm/dma-mapping.h | 4 ++-- arch/tile/include/asm/dma-mapping.h | 7 ++----- arch/unicore32/include/asm/dma-mapping.h | 2 +- arch/x86/include/asm/dma-mapping.h | 9 +-------- arch/xtensa/include/asm/dma-mapping.h | 7 ++----- include/linux/dma-mapping.h | 7 +++++++ 31 files changed, 48 insertions(+), 64 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/dma-mapping.h b/arch/alpha/include/asm/dma-mapping.h index d3480562411d..5d53666935e6 100644 --- a/arch/alpha/include/asm/dma-mapping.h +++ b/arch/alpha/include/asm/dma-mapping.h @@ -3,7 +3,7 @@ extern const struct dma_map_ops *dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return dma_ops; } diff --git a/arch/arc/include/asm/dma-mapping.h b/arch/arc/include/asm/dma-mapping.h index fdff3aa60052..94285031c4fb 100644 --- a/arch/arc/include/asm/dma-mapping.h +++ b/arch/arc/include/asm/dma-mapping.h @@ -20,7 +20,7 @@ extern const struct dma_map_ops arc_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &arc_dma_ops; } diff --git a/arch/arm/include/asm/dma-mapping.h b/arch/arm/include/asm/dma-mapping.h index c7432d647e53..716656925975 100644 --- a/arch/arm/include/asm/dma-mapping.h +++ b/arch/arm/include/asm/dma-mapping.h @@ -23,12 +23,12 @@ static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) return &arm_dma_ops; } -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { if (xen_initial_domain()) return xen_dma_ops; else - return __generic_dma_ops(dev); + return __generic_dma_ops(NULL); } #define HAVE_ARCH_DMA_SUPPORTED 1 diff --git a/arch/arm64/include/asm/dma-mapping.h b/arch/arm64/include/asm/dma-mapping.h index 58ae36cc3b60..505756cdc67a 100644 --- a/arch/arm64/include/asm/dma-mapping.h +++ b/arch/arm64/include/asm/dma-mapping.h @@ -39,12 +39,12 @@ static inline const struct dma_map_ops *__generic_dma_ops(struct device *dev) return &dummy_dma_ops; } -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { if (xen_initial_domain()) return xen_dma_ops; else - return __generic_dma_ops(dev); + return __generic_dma_ops(NULL); } void arch_setup_dma_ops(struct device *dev, u64 dma_base, u64 size, diff --git a/arch/avr32/include/asm/dma-mapping.h b/arch/avr32/include/asm/dma-mapping.h index b2b43c0e0774..7388451f9905 100644 --- a/arch/avr32/include/asm/dma-mapping.h +++ b/arch/avr32/include/asm/dma-mapping.h @@ -6,7 +6,7 @@ extern void dma_cache_sync(struct device *dev, void *vaddr, size_t size, extern const struct dma_map_ops avr32_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &avr32_dma_ops; } diff --git a/arch/blackfin/include/asm/dma-mapping.h b/arch/blackfin/include/asm/dma-mapping.h index 320fb50fbd41..04254ac36bed 100644 --- a/arch/blackfin/include/asm/dma-mapping.h +++ b/arch/blackfin/include/asm/dma-mapping.h @@ -38,7 +38,7 @@ _dma_sync(dma_addr_t addr, size_t size, enum dma_data_direction dir) extern const struct dma_map_ops bfin_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &bfin_dma_ops; } diff --git a/arch/c6x/include/asm/dma-mapping.h b/arch/c6x/include/asm/dma-mapping.h index 88258b9ebc8e..aca9f755e4f8 100644 --- a/arch/c6x/include/asm/dma-mapping.h +++ b/arch/c6x/include/asm/dma-mapping.h @@ -19,7 +19,7 @@ extern const struct dma_map_ops c6x_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &c6x_dma_ops; } diff --git a/arch/cris/include/asm/dma-mapping.h b/arch/cris/include/asm/dma-mapping.h index aae4fbc0a656..256169de3743 100644 --- a/arch/cris/include/asm/dma-mapping.h +++ b/arch/cris/include/asm/dma-mapping.h @@ -4,12 +4,12 @@ #ifdef CONFIG_PCI extern const struct dma_map_ops v32_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &v32_dma_ops; } #else -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { BUG(); return NULL; diff --git a/arch/frv/include/asm/dma-mapping.h b/arch/frv/include/asm/dma-mapping.h index 150cc00544a8..354900917585 100644 --- a/arch/frv/include/asm/dma-mapping.h +++ b/arch/frv/include/asm/dma-mapping.h @@ -9,7 +9,7 @@ extern unsigned long __nongprelbss dma_coherent_mem_end; extern const struct dma_map_ops frv_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &frv_dma_ops; } diff --git a/arch/h8300/include/asm/dma-mapping.h b/arch/h8300/include/asm/dma-mapping.h index f804bca4c13f..847c7562e046 100644 --- a/arch/h8300/include/asm/dma-mapping.h +++ b/arch/h8300/include/asm/dma-mapping.h @@ -3,7 +3,7 @@ extern const struct dma_map_ops h8300_dma_map_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &h8300_dma_map_ops; } diff --git a/arch/hexagon/include/asm/dma-mapping.h b/arch/hexagon/include/asm/dma-mapping.h index b812e917cd95..d3a87bd9b686 100644 --- a/arch/hexagon/include/asm/dma-mapping.h +++ b/arch/hexagon/include/asm/dma-mapping.h @@ -34,11 +34,8 @@ extern int bad_dma_address; extern const struct dma_map_ops *dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - if (unlikely(dev == NULL)) - return NULL; - return dma_ops; } diff --git a/arch/ia64/include/asm/dma-mapping.h b/arch/ia64/include/asm/dma-mapping.h index 05e467d56d86..73ec3c6f4cfe 100644 --- a/arch/ia64/include/asm/dma-mapping.h +++ b/arch/ia64/include/asm/dma-mapping.h @@ -23,7 +23,10 @@ extern void machvec_dma_sync_single(struct device *, dma_addr_t, size_t, extern void machvec_dma_sync_sg(struct device *, struct scatterlist *, int, enum dma_data_direction); -#define get_dma_ops(dev) platform_dma_get_ops(dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) +{ + return platform_dma_get_ops(NULL); +} static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) { diff --git a/arch/m32r/include/asm/dma-mapping.h b/arch/m32r/include/asm/dma-mapping.h index 27b1597ac563..c01d9f52d228 100644 --- a/arch/m32r/include/asm/dma-mapping.h +++ b/arch/m32r/include/asm/dma-mapping.h @@ -10,10 +10,8 @@ #define DMA_ERROR_CODE (~(dma_addr_t)0x0) -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - if (dev && dev->dma_ops) - return dev->dma_ops; return &dma_noop_ops; } diff --git a/arch/m68k/include/asm/dma-mapping.h b/arch/m68k/include/asm/dma-mapping.h index 863509939d5a..9210e470771b 100644 --- a/arch/m68k/include/asm/dma-mapping.h +++ b/arch/m68k/include/asm/dma-mapping.h @@ -3,7 +3,7 @@ extern const struct dma_map_ops m68k_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &m68k_dma_ops; } diff --git a/arch/metag/include/asm/dma-mapping.h b/arch/metag/include/asm/dma-mapping.h index c156a7ac732f..fad3dc3cb210 100644 --- a/arch/metag/include/asm/dma-mapping.h +++ b/arch/metag/include/asm/dma-mapping.h @@ -3,7 +3,7 @@ extern const struct dma_map_ops metag_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &metag_dma_ops; } diff --git a/arch/microblaze/include/asm/dma-mapping.h b/arch/microblaze/include/asm/dma-mapping.h index c7faf2fb51d6..3fad5e722a66 100644 --- a/arch/microblaze/include/asm/dma-mapping.h +++ b/arch/microblaze/include/asm/dma-mapping.h @@ -38,7 +38,7 @@ */ extern const struct dma_map_ops dma_direct_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &dma_direct_ops; } diff --git a/arch/mips/include/asm/dma-mapping.h b/arch/mips/include/asm/dma-mapping.h index dad3b09fe993..aba71385f9d1 100644 --- a/arch/mips/include/asm/dma-mapping.h +++ b/arch/mips/include/asm/dma-mapping.h @@ -11,12 +11,9 @@ extern const struct dma_map_ops *mips_dma_map_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - if (dev && dev->dma_ops) - return dev->dma_ops; - else - return mips_dma_map_ops; + return mips_dma_map_ops; } static inline bool dma_capable(struct device *dev, dma_addr_t addr, size_t size) diff --git a/arch/mn10300/include/asm/dma-mapping.h b/arch/mn10300/include/asm/dma-mapping.h index 564e3927e005..737ef574b3ea 100644 --- a/arch/mn10300/include/asm/dma-mapping.h +++ b/arch/mn10300/include/asm/dma-mapping.h @@ -16,7 +16,7 @@ extern const struct dma_map_ops mn10300_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &mn10300_dma_ops; } diff --git a/arch/nios2/include/asm/dma-mapping.h b/arch/nios2/include/asm/dma-mapping.h index aa00d839a64b..7b3c6f280293 100644 --- a/arch/nios2/include/asm/dma-mapping.h +++ b/arch/nios2/include/asm/dma-mapping.h @@ -12,7 +12,7 @@ extern const struct dma_map_ops nios2_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &nios2_dma_ops; } diff --git a/arch/openrisc/include/asm/dma-mapping.h b/arch/openrisc/include/asm/dma-mapping.h index 88acbedb4947..0c0075f17145 100644 --- a/arch/openrisc/include/asm/dma-mapping.h +++ b/arch/openrisc/include/asm/dma-mapping.h @@ -30,7 +30,7 @@ extern const struct dma_map_ops or1k_dma_map_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &or1k_dma_map_ops; } diff --git a/arch/parisc/include/asm/dma-mapping.h b/arch/parisc/include/asm/dma-mapping.h index 1749073e44fc..5404c6a726b2 100644 --- a/arch/parisc/include/asm/dma-mapping.h +++ b/arch/parisc/include/asm/dma-mapping.h @@ -27,7 +27,7 @@ extern const struct dma_map_ops pcx_dma_ops; extern const struct dma_map_ops *hppa_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return hppa_dma_ops; } diff --git a/arch/powerpc/include/asm/dma-mapping.h b/arch/powerpc/include/asm/dma-mapping.h index 8275603ba4d5..181a095468e4 100644 --- a/arch/powerpc/include/asm/dma-mapping.h +++ b/arch/powerpc/include/asm/dma-mapping.h @@ -78,17 +78,14 @@ extern struct dma_map_ops dma_iommu_ops; #endif extern const struct dma_map_ops dma_direct_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { /* We don't handle the NULL dev case for ISA for now. We could * do it via an out of line call but it is not needed for now. The * only ISA DMA device we support is the floppy and we have a hack * in the floppy driver directly to get a device for us. */ - if (unlikely(dev == NULL)) - return NULL; - - return dev->dma_ops; + return NULL; } /* diff --git a/arch/powerpc/include/asm/ps3.h b/arch/powerpc/include/asm/ps3.h index a19f831a4cc9..17ee719e799f 100644 --- a/arch/powerpc/include/asm/ps3.h +++ b/arch/powerpc/include/asm/ps3.h @@ -435,7 +435,7 @@ static inline void *ps3_system_bus_get_drvdata( return dev_get_drvdata(&dev->core); } -/* These two need global scope for get_dma_ops(). */ +/* These two need global scope for get_arch_dma_ops(). */ extern struct bus_type ps3_system_bus_type; diff --git a/arch/s390/include/asm/dma-mapping.h b/arch/s390/include/asm/dma-mapping.h index a872027d0c1b..3108b8dbe266 100644 --- a/arch/s390/include/asm/dma-mapping.h +++ b/arch/s390/include/asm/dma-mapping.h @@ -12,10 +12,8 @@ extern const struct dma_map_ops s390_pci_dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - if (dev && dev->dma_ops) - return dev->dma_ops; return &dma_noop_ops; } diff --git a/arch/sh/include/asm/dma-mapping.h b/arch/sh/include/asm/dma-mapping.h index a7382c34c241..d99008af5f73 100644 --- a/arch/sh/include/asm/dma-mapping.h +++ b/arch/sh/include/asm/dma-mapping.h @@ -4,7 +4,7 @@ extern const struct dma_map_ops *dma_ops; extern void no_iommu_init(void); -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return dma_ops; } diff --git a/arch/sparc/include/asm/dma-mapping.h b/arch/sparc/include/asm/dma-mapping.h index 3d2babc0c4c6..69cc627779f2 100644 --- a/arch/sparc/include/asm/dma-mapping.h +++ b/arch/sparc/include/asm/dma-mapping.h @@ -24,14 +24,14 @@ extern const struct dma_map_ops pci32_dma_ops; extern struct bus_type pci_bus_type; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { #ifdef CONFIG_SPARC_LEON if (sparc_cpu_model == sparc_leon) return leon_dma_ops; #endif #if defined(CONFIG_SPARC32) && defined(CONFIG_PCI) - if (dev->bus == &pci_bus_type) + if (bus == &pci_bus_type) return &pci32_dma_ops; #endif return dma_ops; diff --git a/arch/tile/include/asm/dma-mapping.h b/arch/tile/include/asm/dma-mapping.h index 2562995a6ac9..bbc71a29b2c6 100644 --- a/arch/tile/include/asm/dma-mapping.h +++ b/arch/tile/include/asm/dma-mapping.h @@ -29,12 +29,9 @@ extern const struct dma_map_ops *gx_pci_dma_map_ops; extern const struct dma_map_ops *gx_legacy_pci_dma_map_ops; extern const struct dma_map_ops *gx_hybrid_pci_dma_map_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - if (dev && dev->dma_ops) - return dev->dma_ops; - else - return tile_dma_map_ops; + return tile_dma_map_ops; } static inline dma_addr_t get_dma_offset(struct device *dev) diff --git a/arch/unicore32/include/asm/dma-mapping.h b/arch/unicore32/include/asm/dma-mapping.h index 14d7729c7b73..518ba5848dd6 100644 --- a/arch/unicore32/include/asm/dma-mapping.h +++ b/arch/unicore32/include/asm/dma-mapping.h @@ -23,7 +23,7 @@ extern const struct dma_map_ops swiotlb_dma_map_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { return &swiotlb_dma_map_ops; } diff --git a/arch/x86/include/asm/dma-mapping.h b/arch/x86/include/asm/dma-mapping.h index 94b5b96966cb..08a0838b83fb 100644 --- a/arch/x86/include/asm/dma-mapping.h +++ b/arch/x86/include/asm/dma-mapping.h @@ -27,16 +27,9 @@ extern int panic_on_overflow; extern const struct dma_map_ops *dma_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { -#ifndef CONFIG_X86_DEV_DMA_OPS return dma_ops; -#else - if (unlikely(!dev) || !dev->dma_ops) - return dma_ops; - else - return dev->dma_ops; -#endif } bool arch_dma_alloc_attrs(struct device **dev, gfp_t *gfp); diff --git a/arch/xtensa/include/asm/dma-mapping.h b/arch/xtensa/include/asm/dma-mapping.h index 9eecfc3c5dc4..c6140fa8c0be 100644 --- a/arch/xtensa/include/asm/dma-mapping.h +++ b/arch/xtensa/include/asm/dma-mapping.h @@ -20,12 +20,9 @@ extern const struct dma_map_ops xtensa_dma_map_ops; -static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +static inline const struct dma_map_ops *get_arch_dma_ops(struct bus_type *bus) { - if (dev && dev->dma_ops) - return dev->dma_ops; - else - return &xtensa_dma_map_ops; + return &xtensa_dma_map_ops; } void dma_cache_sync(struct device *dev, void *vaddr, size_t size, diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index e97f23e8b2d9..ab8710888ddf 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -164,6 +164,13 @@ int dma_mmap_from_coherent(struct device *dev, struct vm_area_struct *vma, #ifdef CONFIG_HAS_DMA #include +static inline const struct dma_map_ops *get_dma_ops(struct device *dev) +{ + if (dev && dev->dma_ops) + return dev->dma_ops; + return get_arch_dma_ops(dev ? dev->bus : NULL); +} + static inline void set_dma_ops(struct device *dev, const struct dma_map_ops *dma_ops) { -- cgit v1.2.3 From a5828ed3d03d38399159dc17a98cefde3109a66b Mon Sep 17 00:00:00 2001 From: Yu-cheng Yu Date: Tue, 24 Jan 2017 10:25:46 -0800 Subject: x86/fpu/xstate: Move XSAVES state init to a function Make XSTATE init similar to existing code; move it to a separate function. There is no functionality change. Signed-off-by: Yu-cheng Yu Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: Peter Zijlstra Cc: Ravi V. Shankar Cc: Rik van Riel Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1485282346-15437-1-git-send-email-yu-cheng.yu@intel.com [ Minor cleanliness edits. ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/fpu/internal.h | 10 ++++++++++ arch/x86/kernel/fpu/core.c | 9 +-------- 2 files changed, 11 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/fpu/internal.h b/arch/x86/include/asm/fpu/internal.h index d4a684997497..255645f60ca2 100644 --- a/arch/x86/include/asm/fpu/internal.h +++ b/arch/x86/include/asm/fpu/internal.h @@ -87,6 +87,16 @@ extern void fpstate_init_soft(struct swregs_state *soft); #else static inline void fpstate_init_soft(struct swregs_state *soft) {} #endif + +static inline void fpstate_init_xstate(struct xregs_state *xsave) +{ + /* + * XRSTORS requires these bits set in xcomp_bv, or it will + * trigger #GP: + */ + xsave->header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | xfeatures_mask; +} + static inline void fpstate_init_fxstate(struct fxregs_state *fx) { fx->cwd = 0x37f; diff --git a/arch/x86/kernel/fpu/core.c b/arch/x86/kernel/fpu/core.c index de7234401275..e1114f070c2d 100644 --- a/arch/x86/kernel/fpu/core.c +++ b/arch/x86/kernel/fpu/core.c @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -179,14 +178,8 @@ void fpstate_init(union fpregs_state *state) memset(state, 0, fpu_kernel_xstate_size); - /* - * XRSTORS requires that this bit is set in xcomp_bv, or - * it will #GP. Make sure it is replaced after the memset(). - */ if (static_cpu_has(X86_FEATURE_XSAVES)) - state->xsave.header.xcomp_bv = XCOMP_BV_COMPACTED_FORMAT | - xfeatures_mask; - + fpstate_init_xstate(&state->xsave); if (static_cpu_has(X86_FEATURE_FXSR)) fpstate_init_fxstate(&state->fxsave); else -- cgit v1.2.3 From 78d1b296843a719935277b0d24d1358416ed0204 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 18 Jan 2017 11:15:37 -0800 Subject: x86/cpu: Add X86_FEATURE_CPUID Add a synthetic CPUID flag denoting whether the CPU sports the CPUID instruction or not. This will come useful later when accomodating CPUID-less CPUs. Signed-off-by: Borislav Petkov [ Slightly prettified. ] Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matthew Whitehead Cc: Oleg Nesterov Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/dcb355adae3ab812c79397056a61c212f1a0c7cc.1484705016.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/cpufeatures.h | 2 +- arch/x86/kernel/cpu/common.c | 7 ++++--- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index eafee3161d1c..a0f4ff25669e 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -100,7 +100,7 @@ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_CPUID ( 3*32+25) /* CPU has CPUID instruction itself */ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bab7a8a4293..c3328d0e13f0 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -801,14 +801,12 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) memset(&c->x86_capability, 0, sizeof c->x86_capability); c->extended_cpuid_level = 0; - if (!have_cpuid_p()) - identify_cpu_without_cpuid(c); - /* cyrix could have cpuid enabled via c_identify()*/ if (have_cpuid_p()) { cpu_detect(c); get_cpu_vendor(c); get_cpu_cap(c); + setup_force_cpu_cap(X86_FEATURE_CPUID); if (this_cpu->c_early_init) this_cpu->c_early_init(c); @@ -818,6 +816,9 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c) if (this_cpu->c_bsp_init) this_cpu->c_bsp_init(c); + } else { + identify_cpu_without_cpuid(c); + setup_clear_cpu_cap(X86_FEATURE_CPUID); } setup_force_cpu_cap(X86_FEATURE_ALWAYS); -- cgit v1.2.3 From 8bf1ebca215c262e48c15a4a15f175991776f57f Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 18 Jan 2017 11:15:38 -0800 Subject: x86/cpu: Factor out application of forced CPU caps There are multiple call sites that apply forced CPU caps. Factor them into a helper. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matthew Whitehead Cc: Oleg Nesterov Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/623ff7555488122143e4417de09b18be2085ad06.1484705016.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 20 ++++++++++++-------- 1 file changed, 12 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c3328d0e13f0..2ea16e05de43 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -655,6 +655,16 @@ void cpu_detect(struct cpuinfo_x86 *c) } } +static void apply_forced_caps(struct cpuinfo_x86 *c) +{ + int i; + + for (i = 0; i < NCAPINTS; i++) { + c->x86_capability[i] &= ~cpu_caps_cleared[i]; + c->x86_capability[i] |= cpu_caps_set[i]; + } +} + void get_cpu_cap(struct cpuinfo_x86 *c) { u32 eax, ebx, ecx, edx; @@ -1035,10 +1045,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) this_cpu->c_identify(c); /* Clear/Set all flags overridden by options, after probe */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); #ifdef CONFIG_X86_64 c->apicid = apic->phys_pkg_id(c->initial_apicid, 0); @@ -1097,10 +1104,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) * Clear/Set all flags overridden by options, need do it * before following smp all cpus cap AND. */ - for (i = 0; i < NCAPINTS; i++) { - c->x86_capability[i] &= ~cpu_caps_cleared[i]; - c->x86_capability[i] |= cpu_caps_set[i]; - } + apply_forced_caps(c); /* * On SMP, boot_cpu_data holds the common feature set between -- cgit v1.2.3 From 60d3450167433f2d099ce2869dc52dd9e7dc9b29 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 18 Jan 2017 11:15:39 -0800 Subject: x86/cpu: Re-apply forced caps every time CPU caps are re-read Calling get_cpu_cap() will reset a bunch of CPU features. This will cause the system to lose track of force-set and force-cleared features in the words that are reset until the end of CPU initialization. This can cause X86_FEATURE_FPU, for example, to change back and forth during boot and potentially confuse CPU setup. To minimize the chance of confusion, re-apply forced caps every time get_cpu_cap() is called. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matthew Whitehead Cc: Oleg Nesterov Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/c817eb373d2c67c2c81413a70fc9b845fa34a37e.1484705016.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/common.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 2ea16e05de43..d09b5eefaddf 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -758,6 +758,13 @@ void get_cpu_cap(struct cpuinfo_x86 *c) c->x86_capability[CPUID_8000_000A_EDX] = cpuid_edx(0x8000000a); init_scattered_cpuid_features(c); + + /* + * Clear/Set all flags overridden by options, after probe. + * This needs to happen each time we re-probe, which may happen + * several times during CPU initialization. + */ + apply_forced_caps(c); } static void identify_cpu_without_cpuid(struct cpuinfo_x86 *c) -- cgit v1.2.3 From 9170fb409437b246078bcad3f1481d32dfe2ca28 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 18 Jan 2017 11:15:40 -0800 Subject: x86/fpu: Fix "x86/fpu: Legacy x87 FPU detected" message That message isn't at all clear -- what does "Legacy x87" even mean? Clarify it. If there's no FPU, say: x86/fpu: No FPU detected If there's an FPU that doesn't have XSAVE, say: x86/fpu: x87 FPU will use FSAVE|FXSAVE Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matthew Whitehead Cc: Oleg Nesterov Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/bb839385e18e27bca23fe8666dfdad8170473045.1484705016.git.luto@kernel.org [ Small tweaks to the messages. ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/xstate.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/xstate.c b/arch/x86/kernel/fpu/xstate.c index 1d7770447b3e..96002f19c113 100644 --- a/arch/x86/kernel/fpu/xstate.c +++ b/arch/x86/kernel/fpu/xstate.c @@ -705,8 +705,14 @@ void __init fpu__init_system_xstate(void) WARN_ON_FPU(!on_boot_cpu); on_boot_cpu = 0; + if (!boot_cpu_has(X86_FEATURE_FPU)) { + pr_info("x86/fpu: No FPU detected\n"); + return; + } + if (!boot_cpu_has(X86_FEATURE_XSAVE)) { - pr_info("x86/fpu: Legacy x87 FPU detected.\n"); + pr_info("x86/fpu: x87 FPU will use %s\n", + boot_cpu_has(X86_FEATURE_FXSR) ? "FXSAVE" : "FSAVE"); return; } -- cgit v1.2.3 From 37ac78b67b3384d1ced5424d5a13ee146041bda3 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 18 Jan 2017 11:15:41 -0800 Subject: x86/fpu: Fix CPUID-less FPU detection The old code didn't work at all because it adjusted the current caps instead of the forced caps. Anything it did would be undone later during CPU identification. Fix that and, while we're at it, improve the logging and don't bother running it if CPUID is available. Reported-by: Matthew Whitehead Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Oleg Nesterov Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/f1134e30cafa73c4e2e68119e9741793622cfd15.1484705016.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/init.c | 27 ++++++++++++++------------- 1 file changed, 14 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 60dece392b3a..8b526c5fc306 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -48,13 +48,7 @@ void fpu__init_cpu(void) fpu__init_cpu_xstate(); } -/* - * The earliest FPU detection code. - * - * Set the X86_FEATURE_FPU CPU-capability bit based on - * trying to execute an actual sequence of FPU instructions: - */ -static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +static bool fpu__probe_without_cpuid(void) { unsigned long cr0; u16 fsw, fcw; @@ -65,14 +59,21 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) cr0 &= ~(X86_CR0_TS | X86_CR0_EM); write_cr0(cr0); - if (!test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { - asm volatile("fninit ; fnstsw %0 ; fnstcw %1" - : "+m" (fsw), "+m" (fcw)); + asm volatile("fninit ; fnstsw %0 ; fnstcw %1" : "+m" (fsw), "+m" (fcw)); + + pr_info("x86/fpu: Probing for FPU: FSW=0x%04hx FCW=0x%04hx\n", fsw, fcw); - if (fsw == 0 && (fcw & 0x103f) == 0x003f) - set_cpu_cap(c, X86_FEATURE_FPU); + return fsw == 0 && (fcw & 0x103f) == 0x003f; +} + +static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) +{ + if (!boot_cpu_has(X86_FEATURE_CPUID) && + !test_bit(X86_FEATURE_FPU, (unsigned long *)cpu_caps_cleared)) { + if (fpu__probe_without_cpuid()) + setup_force_cpu_cap(X86_FEATURE_FPU); else - clear_cpu_cap(c, X86_FEATURE_FPU); + setup_clear_cpu_cap(X86_FEATURE_FPU); } #ifndef CONFIG_MATH_EMULATION -- cgit v1.2.3 From 9729017f844431ab2800519297d8d1b0ecbc420d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 18 Jan 2017 11:15:42 -0800 Subject: x86/fpu: Fix the "Giving up, no FPU found" test We would never print "Giving up, no FPU found" because X86_FEATURE_FPU was in REQUIRED_MASK on non-FPU-emulating builds, so the boot_cpu_has() test didn't do anything. Signed-off-by: Andy Lutomirski Reviewed-by: Borislav Petkov Cc: Borislav Petkov Cc: Brian Gerst Cc: Dave Hansen Cc: Fenghua Yu Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Matthew Whitehead Cc: Oleg Nesterov Cc: One Thousand Gnomes Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Yu-cheng Yu Link: http://lkml.kernel.org/r/1499077fa76f0f84b8ea28e37d3fa70beca4e310.1484705016.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/fpu/init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 8b526c5fc306..19bdd1bf8160 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -77,7 +77,7 @@ static void fpu__init_system_early_generic(struct cpuinfo_x86 *c) } #ifndef CONFIG_MATH_EMULATION - if (!boot_cpu_has(X86_FEATURE_FPU)) { + if (!test_cpu_cap(&boot_cpu_data, X86_FEATURE_FPU)) { pr_emerg("x86/fpu: Giving up, no FPU found and no math emulation present\n"); for (;;) asm volatile("hlt"); -- cgit v1.2.3 From f28442497b5caf7bf573ade22a7f8d3559e3ef56 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 11 Jan 2017 16:20:01 -0700 Subject: x86/boot: Fix KASLR and memmap= collision CONFIG_RANDOMIZE_BASE=y relocates the kernel to a random base address. However it does not take into account the memmap= parameter passed in from the kernel command line. This results in the kernel sometimes being put in the middle of memmap. Teach KASLR to not insert the kernel in memmap defined regions. We support up to 4 memmap regions: any additional regions will cause KASLR to disable. The mem_avoid set has been augmented to add up to 4 unusable regions of memmaps provided by the user to exclude those regions from the set of valid address range to insert the uncompressed kernel image. The nn@ss ranges will be skipped by the mem_avoid set since it indicates that memory is useable. Signed-off-by: Dave Jiang Reviewed-by: Thomas Gleixner Acked-by: Kees Cook Acked-by: Baoquan He Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: dan.j.williams@intel.com Cc: david@fromorbit.com Cc: linux-nvdimm@lists.01.org Link: http://lkml.kernel.org/r/148417664156.131935.2248592164852799738.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Ingo Molnar --- arch/x86/boot/boot.h | 1 + arch/x86/boot/compressed/kaslr.c | 140 ++++++++++++++++++++++++++++++++++++++- arch/x86/boot/string.c | 13 ++++ 3 files changed, 151 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/boot.h b/arch/x86/boot/boot.h index e5612f3e3b57..9b42b6d1e902 100644 --- a/arch/x86/boot/boot.h +++ b/arch/x86/boot/boot.h @@ -333,6 +333,7 @@ size_t strnlen(const char *s, size_t maxlen); unsigned int atou(const char *s); unsigned long long simple_strtoull(const char *cp, char **endp, unsigned int base); size_t strlen(const char *s); +char *strchr(const char *s, int c); /* tty.c */ void puts(const char *); diff --git a/arch/x86/boot/compressed/kaslr.c b/arch/x86/boot/compressed/kaslr.c index a66854d99ee1..8b7c9e75edcb 100644 --- a/arch/x86/boot/compressed/kaslr.c +++ b/arch/x86/boot/compressed/kaslr.c @@ -11,6 +11,7 @@ */ #include "misc.h" #include "error.h" +#include "../boot.h" #include #include @@ -52,15 +53,22 @@ static unsigned long get_boot_seed(void) #include "../../lib/kaslr.c" struct mem_vector { - unsigned long start; - unsigned long size; + unsigned long long start; + unsigned long long size; }; +/* Only supporting at most 4 unusable memmap regions with kaslr */ +#define MAX_MEMMAP_REGIONS 4 + +static bool memmap_too_large; + enum mem_avoid_index { MEM_AVOID_ZO_RANGE = 0, MEM_AVOID_INITRD, MEM_AVOID_CMDLINE, MEM_AVOID_BOOTPARAMS, + MEM_AVOID_MEMMAP_BEGIN, + MEM_AVOID_MEMMAP_END = MEM_AVOID_MEMMAP_BEGIN + MAX_MEMMAP_REGIONS - 1, MEM_AVOID_MAX, }; @@ -77,6 +85,123 @@ static bool mem_overlaps(struct mem_vector *one, struct mem_vector *two) return true; } +/** + * _memparse - Parse a string with mem suffixes into a number + * @ptr: Where parse begins + * @retptr: (output) Optional pointer to next char after parse completes + * + * Parses a string into a number. The number stored at @ptr is + * potentially suffixed with K, M, G, T, P, E. + */ +static unsigned long long _memparse(const char *ptr, char **retptr) +{ + char *endptr; /* Local pointer to end of parsed string */ + + unsigned long long ret = simple_strtoull(ptr, &endptr, 0); + + switch (*endptr) { + case 'E': + case 'e': + ret <<= 10; + case 'P': + case 'p': + ret <<= 10; + case 'T': + case 't': + ret <<= 10; + case 'G': + case 'g': + ret <<= 10; + case 'M': + case 'm': + ret <<= 10; + case 'K': + case 'k': + ret <<= 10; + endptr++; + default: + break; + } + + if (retptr) + *retptr = endptr; + + return ret; +} + +static int +parse_memmap(char *p, unsigned long long *start, unsigned long long *size) +{ + char *oldp; + + if (!p) + return -EINVAL; + + /* We don't care about this option here */ + if (!strncmp(p, "exactmap", 8)) + return -EINVAL; + + oldp = p; + *size = _memparse(p, &p); + if (p == oldp) + return -EINVAL; + + switch (*p) { + case '@': + /* Skip this region, usable */ + *start = 0; + *size = 0; + return 0; + case '#': + case '$': + case '!': + *start = _memparse(p + 1, &p); + return 0; + } + + return -EINVAL; +} + +static void mem_avoid_memmap(void) +{ + char arg[128]; + int rc; + int i; + char *str; + + /* See if we have any memmap areas */ + rc = cmdline_find_option("memmap", arg, sizeof(arg)); + if (rc <= 0) + return; + + i = 0; + str = arg; + while (str && (i < MAX_MEMMAP_REGIONS)) { + int rc; + unsigned long long start, size; + char *k = strchr(str, ','); + + if (k) + *k++ = 0; + + rc = parse_memmap(str, &start, &size); + if (rc < 0) + break; + str = k; + /* A usable region that should not be skipped */ + if (size == 0) + continue; + + mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].start = start; + mem_avoid[MEM_AVOID_MEMMAP_BEGIN + i].size = size; + i++; + } + + /* More than 4 memmaps, fail kaslr */ + if ((i >= MAX_MEMMAP_REGIONS) && str) + memmap_too_large = true; +} + /* * In theory, KASLR can put the kernel anywhere in the range of [16M, 64T). * The mem_avoid array is used to store the ranges that need to be avoided @@ -197,6 +322,9 @@ static void mem_avoid_init(unsigned long input, unsigned long input_size, /* We don't need to set a mapping for setup_data. */ + /* Mark the memmap regions we need to avoid */ + mem_avoid_memmap(); + #ifdef CONFIG_X86_VERBOSE_BOOTUP /* Make sure video RAM can be used. */ add_identity_map(0, PMD_SIZE); @@ -379,6 +507,12 @@ static unsigned long find_random_phys_addr(unsigned long minimum, int i; unsigned long addr; + /* Check if we had too many memmaps. */ + if (memmap_too_large) { + debug_putstr("Aborted e820 scan (more than 4 memmap= args)!\n"); + return 0; + } + /* Make sure minimum is aligned. */ minimum = ALIGN(minimum, CONFIG_PHYSICAL_ALIGN); @@ -456,7 +590,7 @@ void choose_random_location(unsigned long input, /* Walk e820 and find a random address. */ random_addr = find_random_phys_addr(min_addr, output_size); if (!random_addr) { - warn("KASLR disabled: could not find suitable E820 region!"); + warn("Physical KASLR disabled: no suitable memory region!"); } else { /* Update the new physical address location. */ if (*output != random_addr) { diff --git a/arch/x86/boot/string.c b/arch/x86/boot/string.c index cc3bd583dce1..93d9b991e6b1 100644 --- a/arch/x86/boot/string.c +++ b/arch/x86/boot/string.c @@ -155,3 +155,16 @@ char *strstr(const char *s1, const char *s2) } return NULL; } + +/** + * strchr - Find the first occurrence of the character c in the string s. + * @s: the string to be searched + * @c: the character to search for + */ +char *strchr(const char *s, int c) +{ + while (*s != (char)c) + if (*s++ == '\0') + return NULL; + return (char *)s; +} -- cgit v1.2.3 From 80a7581f38c0b2e83dc883a2125340b90b5635ec Mon Sep 17 00:00:00 2001 From: Irina Tirdea Date: Mon, 23 Jan 2017 12:07:43 -0600 Subject: arch/x86/platform/atom: Move pmc_atom to drivers/platform/x86 The pmc_atom driver does not contain any architecture specific code. It only enables the SoC Power Management Controller driver for BayTrail and CherryTrail platforms. Move the pmc_atom driver from arch/x86/platform/atom to drivers/platform/x86. Also clean-up and reorder include files by alphabetical order in pmc_atom.h Signed-off-by: Irina Tirdea Signed-off-by: Pierre-Louis Bossart Acked-by: Thomas Gleixner Acked-by: Andy Shevchenko Signed-off-by: Stephen Boyd --- arch/x86/Kconfig | 4 - arch/x86/include/asm/pmc_atom.h | 158 ---------- arch/x86/platform/atom/Makefile | 1 - arch/x86/platform/atom/pmc_atom.c | 460 ----------------------------- drivers/acpi/acpi_lpss.c | 2 +- drivers/platform/x86/Kconfig | 4 + drivers/platform/x86/Makefile | 1 + drivers/platform/x86/pmc_atom.c | 459 ++++++++++++++++++++++++++++ include/linux/platform_data/x86/pmc_atom.h | 158 ++++++++++ 9 files changed, 623 insertions(+), 624 deletions(-) delete mode 100644 arch/x86/include/asm/pmc_atom.h delete mode 100644 arch/x86/platform/atom/pmc_atom.c create mode 100644 drivers/platform/x86/pmc_atom.c create mode 100644 include/linux/platform_data/x86/pmc_atom.h (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e487493bbd47..7b4f1789f386 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -2789,10 +2789,6 @@ config X86_DMA_REMAP bool depends on STA2X11 -config PMC_ATOM - def_bool y - depends on PCI - source "net/Kconfig" source "drivers/Kconfig" diff --git a/arch/x86/include/asm/pmc_atom.h b/arch/x86/include/asm/pmc_atom.h deleted file mode 100644 index aa8744c77c6d..000000000000 --- a/arch/x86/include/asm/pmc_atom.h +++ /dev/null @@ -1,158 +0,0 @@ -/* - * Intel Atom SOC Power Management Controller Header File - * Copyright (c) 2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#ifndef PMC_ATOM_H -#define PMC_ATOM_H - -/* ValleyView Power Control Unit PCI Device ID */ -#define PCI_DEVICE_ID_VLV_PMC 0x0F1C -/* CherryTrail Power Control Unit PCI Device ID */ -#define PCI_DEVICE_ID_CHT_PMC 0x229C - -/* PMC Memory mapped IO registers */ -#define PMC_BASE_ADDR_OFFSET 0x44 -#define PMC_BASE_ADDR_MASK 0xFFFFFE00 -#define PMC_MMIO_REG_LEN 0x100 -#define PMC_REG_BIT_WIDTH 32 - -/* BIOS uses FUNC_DIS to disable specific function */ -#define PMC_FUNC_DIS 0x34 -#define PMC_FUNC_DIS_2 0x38 - -/* CHT specific bits in FUNC_DIS2 register */ -#define BIT_FD_GMM BIT(3) -#define BIT_FD_ISH BIT(4) - -/* S0ix wake event control */ -#define PMC_S0IX_WAKE_EN 0x3C - -#define BIT_LPC_CLOCK_RUN BIT(4) -#define BIT_SHARED_IRQ_GPSC BIT(5) -#define BIT_ORED_DEDICATED_IRQ_GPSS BIT(18) -#define BIT_ORED_DEDICATED_IRQ_GPSC BIT(19) -#define BIT_SHARED_IRQ_GPSS BIT(20) - -#define PMC_WAKE_EN_SETTING ~(BIT_LPC_CLOCK_RUN | \ - BIT_SHARED_IRQ_GPSC | \ - BIT_ORED_DEDICATED_IRQ_GPSS | \ - BIT_ORED_DEDICATED_IRQ_GPSC | \ - BIT_SHARED_IRQ_GPSS) - -/* The timers acumulate time spent in sleep state */ -#define PMC_S0IR_TMR 0x80 -#define PMC_S0I1_TMR 0x84 -#define PMC_S0I2_TMR 0x88 -#define PMC_S0I3_TMR 0x8C -#define PMC_S0_TMR 0x90 -/* Sleep state counter is in units of of 32us */ -#define PMC_TMR_SHIFT 5 - -/* Power status of power islands */ -#define PMC_PSS 0x98 - -#define PMC_PSS_BIT_GBE BIT(0) -#define PMC_PSS_BIT_SATA BIT(1) -#define PMC_PSS_BIT_HDA BIT(2) -#define PMC_PSS_BIT_SEC BIT(3) -#define PMC_PSS_BIT_PCIE BIT(4) -#define PMC_PSS_BIT_LPSS BIT(5) -#define PMC_PSS_BIT_LPE BIT(6) -#define PMC_PSS_BIT_DFX BIT(7) -#define PMC_PSS_BIT_USH_CTRL BIT(8) -#define PMC_PSS_BIT_USH_SUS BIT(9) -#define PMC_PSS_BIT_USH_VCCS BIT(10) -#define PMC_PSS_BIT_USH_VCCA BIT(11) -#define PMC_PSS_BIT_OTG_CTRL BIT(12) -#define PMC_PSS_BIT_OTG_VCCS BIT(13) -#define PMC_PSS_BIT_OTG_VCCA_CLK BIT(14) -#define PMC_PSS_BIT_OTG_VCCA BIT(15) -#define PMC_PSS_BIT_USB BIT(16) -#define PMC_PSS_BIT_USB_SUS BIT(17) - -/* CHT specific bits in PSS register */ -#define PMC_PSS_BIT_CHT_UFS BIT(7) -#define PMC_PSS_BIT_CHT_UXD BIT(11) -#define PMC_PSS_BIT_CHT_UXD_FD BIT(12) -#define PMC_PSS_BIT_CHT_UX_ENG BIT(15) -#define PMC_PSS_BIT_CHT_USB_SUS BIT(16) -#define PMC_PSS_BIT_CHT_GMM BIT(17) -#define PMC_PSS_BIT_CHT_ISH BIT(18) -#define PMC_PSS_BIT_CHT_DFX_MASTER BIT(26) -#define PMC_PSS_BIT_CHT_DFX_CLUSTER1 BIT(27) -#define PMC_PSS_BIT_CHT_DFX_CLUSTER2 BIT(28) -#define PMC_PSS_BIT_CHT_DFX_CLUSTER3 BIT(29) -#define PMC_PSS_BIT_CHT_DFX_CLUSTER4 BIT(30) -#define PMC_PSS_BIT_CHT_DFX_CLUSTER5 BIT(31) - -/* These registers reflect D3 status of functions */ -#define PMC_D3_STS_0 0xA0 - -#define BIT_LPSS1_F0_DMA BIT(0) -#define BIT_LPSS1_F1_PWM1 BIT(1) -#define BIT_LPSS1_F2_PWM2 BIT(2) -#define BIT_LPSS1_F3_HSUART1 BIT(3) -#define BIT_LPSS1_F4_HSUART2 BIT(4) -#define BIT_LPSS1_F5_SPI BIT(5) -#define BIT_LPSS1_F6_XXX BIT(6) -#define BIT_LPSS1_F7_XXX BIT(7) -#define BIT_SCC_EMMC BIT(8) -#define BIT_SCC_SDIO BIT(9) -#define BIT_SCC_SDCARD BIT(10) -#define BIT_SCC_MIPI BIT(11) -#define BIT_HDA BIT(12) -#define BIT_LPE BIT(13) -#define BIT_OTG BIT(14) -#define BIT_USH BIT(15) -#define BIT_GBE BIT(16) -#define BIT_SATA BIT(17) -#define BIT_USB_EHCI BIT(18) -#define BIT_SEC BIT(19) -#define BIT_PCIE_PORT0 BIT(20) -#define BIT_PCIE_PORT1 BIT(21) -#define BIT_PCIE_PORT2 BIT(22) -#define BIT_PCIE_PORT3 BIT(23) -#define BIT_LPSS2_F0_DMA BIT(24) -#define BIT_LPSS2_F1_I2C1 BIT(25) -#define BIT_LPSS2_F2_I2C2 BIT(26) -#define BIT_LPSS2_F3_I2C3 BIT(27) -#define BIT_LPSS2_F4_I2C4 BIT(28) -#define BIT_LPSS2_F5_I2C5 BIT(29) -#define BIT_LPSS2_F6_I2C6 BIT(30) -#define BIT_LPSS2_F7_I2C7 BIT(31) - -#define PMC_D3_STS_1 0xA4 -#define BIT_SMB BIT(0) -#define BIT_OTG_SS_PHY BIT(1) -#define BIT_USH_SS_PHY BIT(2) -#define BIT_DFX BIT(3) - -/* CHT specific bits in PMC_D3_STS_1 register */ -#define BIT_STS_GMM BIT(1) -#define BIT_STS_ISH BIT(2) - -/* PMC I/O Registers */ -#define ACPI_BASE_ADDR_OFFSET 0x40 -#define ACPI_BASE_ADDR_MASK 0xFFFFFE00 -#define ACPI_MMIO_REG_LEN 0x100 - -#define PM1_CNT 0x4 -#define SLEEP_TYPE_MASK 0xFFFFECFF -#define SLEEP_TYPE_S5 0x1C00 -#define SLEEP_ENABLE 0x2000 - -extern int pmc_atom_read(int offset, u32 *value); -extern int pmc_atom_write(int offset, u32 value); - -#endif /* PMC_ATOM_H */ diff --git a/arch/x86/platform/atom/Makefile b/arch/x86/platform/atom/Makefile index 40983f5b0858..57be88fa34bb 100644 --- a/arch/x86/platform/atom/Makefile +++ b/arch/x86/platform/atom/Makefile @@ -1,2 +1 @@ -obj-$(CONFIG_PMC_ATOM) += pmc_atom.o obj-$(CONFIG_PUNIT_ATOM_DEBUG) += punit_atom_debug.o diff --git a/arch/x86/platform/atom/pmc_atom.c b/arch/x86/platform/atom/pmc_atom.c deleted file mode 100644 index 964ff4fc61f9..000000000000 --- a/arch/x86/platform/atom/pmc_atom.c +++ /dev/null @@ -1,460 +0,0 @@ -/* - * Intel Atom SOC Power Management Controller Driver - * Copyright (c) 2014, Intel Corporation. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - * - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include -#include -#include -#include - -#include - -struct pmc_bit_map { - const char *name; - u32 bit_mask; -}; - -struct pmc_reg_map { - const struct pmc_bit_map *d3_sts_0; - const struct pmc_bit_map *d3_sts_1; - const struct pmc_bit_map *func_dis; - const struct pmc_bit_map *func_dis_2; - const struct pmc_bit_map *pss; -}; - -struct pmc_dev { - u32 base_addr; - void __iomem *regmap; - const struct pmc_reg_map *map; -#ifdef CONFIG_DEBUG_FS - struct dentry *dbgfs_dir; -#endif /* CONFIG_DEBUG_FS */ - bool init; -}; - -static struct pmc_dev pmc_device; -static u32 acpi_base_addr; - -static const struct pmc_bit_map d3_sts_0_map[] = { - {"LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, - {"LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, - {"LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, - {"LPSS1_F3_HSUART1", BIT_LPSS1_F3_HSUART1}, - {"LPSS1_F4_HSUART2", BIT_LPSS1_F4_HSUART2}, - {"LPSS1_F5_SPI", BIT_LPSS1_F5_SPI}, - {"LPSS1_F6_Reserved", BIT_LPSS1_F6_XXX}, - {"LPSS1_F7_Reserved", BIT_LPSS1_F7_XXX}, - {"SCC_EMMC", BIT_SCC_EMMC}, - {"SCC_SDIO", BIT_SCC_SDIO}, - {"SCC_SDCARD", BIT_SCC_SDCARD}, - {"SCC_MIPI", BIT_SCC_MIPI}, - {"HDA", BIT_HDA}, - {"LPE", BIT_LPE}, - {"OTG", BIT_OTG}, - {"USH", BIT_USH}, - {"GBE", BIT_GBE}, - {"SATA", BIT_SATA}, - {"USB_EHCI", BIT_USB_EHCI}, - {"SEC", BIT_SEC}, - {"PCIE_PORT0", BIT_PCIE_PORT0}, - {"PCIE_PORT1", BIT_PCIE_PORT1}, - {"PCIE_PORT2", BIT_PCIE_PORT2}, - {"PCIE_PORT3", BIT_PCIE_PORT3}, - {"LPSS2_F0_DMA", BIT_LPSS2_F0_DMA}, - {"LPSS2_F1_I2C1", BIT_LPSS2_F1_I2C1}, - {"LPSS2_F2_I2C2", BIT_LPSS2_F2_I2C2}, - {"LPSS2_F3_I2C3", BIT_LPSS2_F3_I2C3}, - {"LPSS2_F3_I2C4", BIT_LPSS2_F4_I2C4}, - {"LPSS2_F5_I2C5", BIT_LPSS2_F5_I2C5}, - {"LPSS2_F6_I2C6", BIT_LPSS2_F6_I2C6}, - {"LPSS2_F7_I2C7", BIT_LPSS2_F7_I2C7}, - {}, -}; - -static struct pmc_bit_map byt_d3_sts_1_map[] = { - {"SMB", BIT_SMB}, - {"OTG_SS_PHY", BIT_OTG_SS_PHY}, - {"USH_SS_PHY", BIT_USH_SS_PHY}, - {"DFX", BIT_DFX}, - {}, -}; - -static struct pmc_bit_map cht_d3_sts_1_map[] = { - {"SMB", BIT_SMB}, - {"GMM", BIT_STS_GMM}, - {"ISH", BIT_STS_ISH}, - {}, -}; - -static struct pmc_bit_map cht_func_dis_2_map[] = { - {"SMB", BIT_SMB}, - {"GMM", BIT_FD_GMM}, - {"ISH", BIT_FD_ISH}, - {}, -}; - -static const struct pmc_bit_map byt_pss_map[] = { - {"GBE", PMC_PSS_BIT_GBE}, - {"SATA", PMC_PSS_BIT_SATA}, - {"HDA", PMC_PSS_BIT_HDA}, - {"SEC", PMC_PSS_BIT_SEC}, - {"PCIE", PMC_PSS_BIT_PCIE}, - {"LPSS", PMC_PSS_BIT_LPSS}, - {"LPE", PMC_PSS_BIT_LPE}, - {"DFX", PMC_PSS_BIT_DFX}, - {"USH_CTRL", PMC_PSS_BIT_USH_CTRL}, - {"USH_SUS", PMC_PSS_BIT_USH_SUS}, - {"USH_VCCS", PMC_PSS_BIT_USH_VCCS}, - {"USH_VCCA", PMC_PSS_BIT_USH_VCCA}, - {"OTG_CTRL", PMC_PSS_BIT_OTG_CTRL}, - {"OTG_VCCS", PMC_PSS_BIT_OTG_VCCS}, - {"OTG_VCCA_CLK", PMC_PSS_BIT_OTG_VCCA_CLK}, - {"OTG_VCCA", PMC_PSS_BIT_OTG_VCCA}, - {"USB", PMC_PSS_BIT_USB}, - {"USB_SUS", PMC_PSS_BIT_USB_SUS}, - {}, -}; - -static const struct pmc_bit_map cht_pss_map[] = { - {"SATA", PMC_PSS_BIT_SATA}, - {"HDA", PMC_PSS_BIT_HDA}, - {"SEC", PMC_PSS_BIT_SEC}, - {"PCIE", PMC_PSS_BIT_PCIE}, - {"LPSS", PMC_PSS_BIT_LPSS}, - {"LPE", PMC_PSS_BIT_LPE}, - {"UFS", PMC_PSS_BIT_CHT_UFS}, - {"UXD", PMC_PSS_BIT_CHT_UXD}, - {"UXD_FD", PMC_PSS_BIT_CHT_UXD_FD}, - {"UX_ENG", PMC_PSS_BIT_CHT_UX_ENG}, - {"USB_SUS", PMC_PSS_BIT_CHT_USB_SUS}, - {"GMM", PMC_PSS_BIT_CHT_GMM}, - {"ISH", PMC_PSS_BIT_CHT_ISH}, - {"DFX_MASTER", PMC_PSS_BIT_CHT_DFX_MASTER}, - {"DFX_CLUSTER1", PMC_PSS_BIT_CHT_DFX_CLUSTER1}, - {"DFX_CLUSTER2", PMC_PSS_BIT_CHT_DFX_CLUSTER2}, - {"DFX_CLUSTER3", PMC_PSS_BIT_CHT_DFX_CLUSTER3}, - {"DFX_CLUSTER4", PMC_PSS_BIT_CHT_DFX_CLUSTER4}, - {"DFX_CLUSTER5", PMC_PSS_BIT_CHT_DFX_CLUSTER5}, - {}, -}; - -static const struct pmc_reg_map byt_reg_map = { - .d3_sts_0 = d3_sts_0_map, - .d3_sts_1 = byt_d3_sts_1_map, - .func_dis = d3_sts_0_map, - .func_dis_2 = byt_d3_sts_1_map, - .pss = byt_pss_map, -}; - -static const struct pmc_reg_map cht_reg_map = { - .d3_sts_0 = d3_sts_0_map, - .d3_sts_1 = cht_d3_sts_1_map, - .func_dis = d3_sts_0_map, - .func_dis_2 = cht_func_dis_2_map, - .pss = cht_pss_map, -}; - -static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) -{ - return readl(pmc->regmap + reg_offset); -} - -static inline void pmc_reg_write(struct pmc_dev *pmc, int reg_offset, u32 val) -{ - writel(val, pmc->regmap + reg_offset); -} - -int pmc_atom_read(int offset, u32 *value) -{ - struct pmc_dev *pmc = &pmc_device; - - if (!pmc->init) - return -ENODEV; - - *value = pmc_reg_read(pmc, offset); - return 0; -} -EXPORT_SYMBOL_GPL(pmc_atom_read); - -int pmc_atom_write(int offset, u32 value) -{ - struct pmc_dev *pmc = &pmc_device; - - if (!pmc->init) - return -ENODEV; - - pmc_reg_write(pmc, offset, value); - return 0; -} -EXPORT_SYMBOL_GPL(pmc_atom_write); - -static void pmc_power_off(void) -{ - u16 pm1_cnt_port; - u32 pm1_cnt_value; - - pr_info("Preparing to enter system sleep state S5\n"); - - pm1_cnt_port = acpi_base_addr + PM1_CNT; - - pm1_cnt_value = inl(pm1_cnt_port); - pm1_cnt_value &= SLEEP_TYPE_MASK; - pm1_cnt_value |= SLEEP_TYPE_S5; - pm1_cnt_value |= SLEEP_ENABLE; - - outl(pm1_cnt_value, pm1_cnt_port); -} - -static void pmc_hw_reg_setup(struct pmc_dev *pmc) -{ - /* - * Disable PMC S0IX_WAKE_EN events coming from: - * - LPC clock run - * - GPIO_SUS ored dedicated IRQs - * - GPIO_SCORE ored dedicated IRQs - * - GPIO_SUS shared IRQ - * - GPIO_SCORE shared IRQ - */ - pmc_reg_write(pmc, PMC_S0IX_WAKE_EN, (u32)PMC_WAKE_EN_SETTING); -} - -#ifdef CONFIG_DEBUG_FS -static void pmc_dev_state_print(struct seq_file *s, int reg_index, - u32 sts, const struct pmc_bit_map *sts_map, - u32 fd, const struct pmc_bit_map *fd_map) -{ - int offset = PMC_REG_BIT_WIDTH * reg_index; - int index; - - for (index = 0; sts_map[index].name; index++) { - seq_printf(s, "Dev: %-2d - %-32s\tState: %s [%s]\n", - offset + index, sts_map[index].name, - fd_map[index].bit_mask & fd ? "Disabled" : "Enabled ", - sts_map[index].bit_mask & sts ? "D3" : "D0"); - } -} - -static int pmc_dev_state_show(struct seq_file *s, void *unused) -{ - struct pmc_dev *pmc = s->private; - const struct pmc_reg_map *m = pmc->map; - u32 func_dis, func_dis_2; - u32 d3_sts_0, d3_sts_1; - - func_dis = pmc_reg_read(pmc, PMC_FUNC_DIS); - func_dis_2 = pmc_reg_read(pmc, PMC_FUNC_DIS_2); - d3_sts_0 = pmc_reg_read(pmc, PMC_D3_STS_0); - d3_sts_1 = pmc_reg_read(pmc, PMC_D3_STS_1); - - /* Low part */ - pmc_dev_state_print(s, 0, d3_sts_0, m->d3_sts_0, func_dis, m->func_dis); - - /* High part */ - pmc_dev_state_print(s, 1, d3_sts_1, m->d3_sts_1, func_dis_2, m->func_dis_2); - - return 0; -} - -static int pmc_dev_state_open(struct inode *inode, struct file *file) -{ - return single_open(file, pmc_dev_state_show, inode->i_private); -} - -static const struct file_operations pmc_dev_state_ops = { - .open = pmc_dev_state_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int pmc_pss_state_show(struct seq_file *s, void *unused) -{ - struct pmc_dev *pmc = s->private; - const struct pmc_bit_map *map = pmc->map->pss; - u32 pss = pmc_reg_read(pmc, PMC_PSS); - int index; - - for (index = 0; map[index].name; index++) { - seq_printf(s, "Island: %-2d - %-32s\tState: %s\n", - index, map[index].name, - map[index].bit_mask & pss ? "Off" : "On"); - } - return 0; -} - -static int pmc_pss_state_open(struct inode *inode, struct file *file) -{ - return single_open(file, pmc_pss_state_show, inode->i_private); -} - -static const struct file_operations pmc_pss_state_ops = { - .open = pmc_pss_state_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) -{ - struct pmc_dev *pmc = s->private; - u64 s0ir_tmr, s0i1_tmr, s0i2_tmr, s0i3_tmr, s0_tmr; - - s0ir_tmr = (u64)pmc_reg_read(pmc, PMC_S0IR_TMR) << PMC_TMR_SHIFT; - s0i1_tmr = (u64)pmc_reg_read(pmc, PMC_S0I1_TMR) << PMC_TMR_SHIFT; - s0i2_tmr = (u64)pmc_reg_read(pmc, PMC_S0I2_TMR) << PMC_TMR_SHIFT; - s0i3_tmr = (u64)pmc_reg_read(pmc, PMC_S0I3_TMR) << PMC_TMR_SHIFT; - s0_tmr = (u64)pmc_reg_read(pmc, PMC_S0_TMR) << PMC_TMR_SHIFT; - - seq_printf(s, "S0IR Residency:\t%lldus\n", s0ir_tmr); - seq_printf(s, "S0I1 Residency:\t%lldus\n", s0i1_tmr); - seq_printf(s, "S0I2 Residency:\t%lldus\n", s0i2_tmr); - seq_printf(s, "S0I3 Residency:\t%lldus\n", s0i3_tmr); - seq_printf(s, "S0 Residency:\t%lldus\n", s0_tmr); - return 0; -} - -static int pmc_sleep_tmr_open(struct inode *inode, struct file *file) -{ - return single_open(file, pmc_sleep_tmr_show, inode->i_private); -} - -static const struct file_operations pmc_sleep_tmr_ops = { - .open = pmc_sleep_tmr_open, - .read = seq_read, - .llseek = seq_lseek, - .release = single_release, -}; - -static void pmc_dbgfs_unregister(struct pmc_dev *pmc) -{ - debugfs_remove_recursive(pmc->dbgfs_dir); -} - -static int pmc_dbgfs_register(struct pmc_dev *pmc) -{ - struct dentry *dir, *f; - - dir = debugfs_create_dir("pmc_atom", NULL); - if (!dir) - return -ENOMEM; - - pmc->dbgfs_dir = dir; - - f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, - dir, pmc, &pmc_dev_state_ops); - if (!f) - goto err; - - f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO, - dir, pmc, &pmc_pss_state_ops); - if (!f) - goto err; - - f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, - dir, pmc, &pmc_sleep_tmr_ops); - if (!f) - goto err; - - return 0; -err: - pmc_dbgfs_unregister(pmc); - return -ENODEV; -} -#else -static int pmc_dbgfs_register(struct pmc_dev *pmc) -{ - return 0; -} -#endif /* CONFIG_DEBUG_FS */ - -static int pmc_setup_dev(struct pci_dev *pdev, const struct pci_device_id *ent) -{ - struct pmc_dev *pmc = &pmc_device; - const struct pmc_reg_map *map = (struct pmc_reg_map *)ent->driver_data; - int ret; - - /* Obtain ACPI base address */ - pci_read_config_dword(pdev, ACPI_BASE_ADDR_OFFSET, &acpi_base_addr); - acpi_base_addr &= ACPI_BASE_ADDR_MASK; - - /* Install power off function */ - if (acpi_base_addr != 0 && pm_power_off == NULL) - pm_power_off = pmc_power_off; - - pci_read_config_dword(pdev, PMC_BASE_ADDR_OFFSET, &pmc->base_addr); - pmc->base_addr &= PMC_BASE_ADDR_MASK; - - pmc->regmap = ioremap_nocache(pmc->base_addr, PMC_MMIO_REG_LEN); - if (!pmc->regmap) { - dev_err(&pdev->dev, "error: ioremap failed\n"); - return -ENOMEM; - } - - pmc->map = map; - - /* PMC hardware registers setup */ - pmc_hw_reg_setup(pmc); - - ret = pmc_dbgfs_register(pmc); - if (ret) - dev_warn(&pdev->dev, "debugfs register failed\n"); - - pmc->init = true; - return ret; -} - -/* - * Data for PCI driver interface - * - * used by pci_match_id() call below. - */ -static const struct pci_device_id pmc_pci_ids[] = { - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_VLV_PMC), (kernel_ulong_t)&byt_reg_map }, - { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_CHT_PMC), (kernel_ulong_t)&cht_reg_map }, - { 0, }, -}; - -static int __init pmc_atom_init(void) -{ - struct pci_dev *pdev = NULL; - const struct pci_device_id *ent; - - /* We look for our device - PCU PMC - * we assume that there is max. one device. - * - * We can't use plain pci_driver mechanism, - * as the device is really a multiple function device, - * main driver that binds to the pci_device is lpc_ich - * and have to find & bind to the device this way. - */ - for_each_pci_dev(pdev) { - ent = pci_match_id(pmc_pci_ids, pdev); - if (ent) - return pmc_setup_dev(pdev, ent); - } - /* Device not found. */ - return -ENODEV; -} - -device_initcall(pmc_atom_init); - -/* -MODULE_AUTHOR("Aubrey Li "); -MODULE_DESCRIPTION("Intel Atom SOC Power Management Controller Interface"); -MODULE_LICENSE("GPL v2"); -*/ diff --git a/drivers/acpi/acpi_lpss.c b/drivers/acpi/acpi_lpss.c index 8ea836c046f8..90d112a3063a 100644 --- a/drivers/acpi/acpi_lpss.c +++ b/drivers/acpi/acpi_lpss.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -31,7 +32,6 @@ ACPI_MODULE_NAME("acpi_lpss"); #include #include #include -#include #define LPSS_ADDR(desc) ((unsigned long)&desc) diff --git a/drivers/platform/x86/Kconfig b/drivers/platform/x86/Kconfig index 5fe8be089b8b..659b33fc2e18 100644 --- a/drivers/platform/x86/Kconfig +++ b/drivers/platform/x86/Kconfig @@ -1077,3 +1077,7 @@ config MLX_CPLD_PLATFORM cables and fans on the wide range Mellanox IB and Ethernet systems. endif # X86_PLATFORM_DEVICES + +config PMC_ATOM + def_bool y + depends on PCI diff --git a/drivers/platform/x86/Makefile b/drivers/platform/x86/Makefile index d4111f0f8a78..49ee7ef283bb 100644 --- a/drivers/platform/x86/Makefile +++ b/drivers/platform/x86/Makefile @@ -74,5 +74,6 @@ obj-$(CONFIG_INTEL_TELEMETRY) += intel_telemetry_core.o \ intel_telemetry_pltdrv.o \ intel_telemetry_debugfs.o obj-$(CONFIG_INTEL_PMC_CORE) += intel_pmc_core.o +obj-$(CONFIG_PMC_ATOM) += pmc_atom.o obj-$(CONFIG_MLX_PLATFORM) += mlx-platform.o obj-$(CONFIG_MLX_CPLD_PLATFORM) += mlxcpld-hotplug.o diff --git a/drivers/platform/x86/pmc_atom.c b/drivers/platform/x86/pmc_atom.c new file mode 100644 index 000000000000..e1dfb1b3632f --- /dev/null +++ b/drivers/platform/x86/pmc_atom.c @@ -0,0 +1,459 @@ +/* + * Intel Atom SOC Power Management Controller Driver + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include +#include +#include +#include +#include + +struct pmc_bit_map { + const char *name; + u32 bit_mask; +}; + +struct pmc_reg_map { + const struct pmc_bit_map *d3_sts_0; + const struct pmc_bit_map *d3_sts_1; + const struct pmc_bit_map *func_dis; + const struct pmc_bit_map *func_dis_2; + const struct pmc_bit_map *pss; +}; + +struct pmc_dev { + u32 base_addr; + void __iomem *regmap; + const struct pmc_reg_map *map; +#ifdef CONFIG_DEBUG_FS + struct dentry *dbgfs_dir; +#endif /* CONFIG_DEBUG_FS */ + bool init; +}; + +static struct pmc_dev pmc_device; +static u32 acpi_base_addr; + +static const struct pmc_bit_map d3_sts_0_map[] = { + {"LPSS1_F0_DMA", BIT_LPSS1_F0_DMA}, + {"LPSS1_F1_PWM1", BIT_LPSS1_F1_PWM1}, + {"LPSS1_F2_PWM2", BIT_LPSS1_F2_PWM2}, + {"LPSS1_F3_HSUART1", BIT_LPSS1_F3_HSUART1}, + {"LPSS1_F4_HSUART2", BIT_LPSS1_F4_HSUART2}, + {"LPSS1_F5_SPI", BIT_LPSS1_F5_SPI}, + {"LPSS1_F6_Reserved", BIT_LPSS1_F6_XXX}, + {"LPSS1_F7_Reserved", BIT_LPSS1_F7_XXX}, + {"SCC_EMMC", BIT_SCC_EMMC}, + {"SCC_SDIO", BIT_SCC_SDIO}, + {"SCC_SDCARD", BIT_SCC_SDCARD}, + {"SCC_MIPI", BIT_SCC_MIPI}, + {"HDA", BIT_HDA}, + {"LPE", BIT_LPE}, + {"OTG", BIT_OTG}, + {"USH", BIT_USH}, + {"GBE", BIT_GBE}, + {"SATA", BIT_SATA}, + {"USB_EHCI", BIT_USB_EHCI}, + {"SEC", BIT_SEC}, + {"PCIE_PORT0", BIT_PCIE_PORT0}, + {"PCIE_PORT1", BIT_PCIE_PORT1}, + {"PCIE_PORT2", BIT_PCIE_PORT2}, + {"PCIE_PORT3", BIT_PCIE_PORT3}, + {"LPSS2_F0_DMA", BIT_LPSS2_F0_DMA}, + {"LPSS2_F1_I2C1", BIT_LPSS2_F1_I2C1}, + {"LPSS2_F2_I2C2", BIT_LPSS2_F2_I2C2}, + {"LPSS2_F3_I2C3", BIT_LPSS2_F3_I2C3}, + {"LPSS2_F3_I2C4", BIT_LPSS2_F4_I2C4}, + {"LPSS2_F5_I2C5", BIT_LPSS2_F5_I2C5}, + {"LPSS2_F6_I2C6", BIT_LPSS2_F6_I2C6}, + {"LPSS2_F7_I2C7", BIT_LPSS2_F7_I2C7}, + {}, +}; + +static struct pmc_bit_map byt_d3_sts_1_map[] = { + {"SMB", BIT_SMB}, + {"OTG_SS_PHY", BIT_OTG_SS_PHY}, + {"USH_SS_PHY", BIT_USH_SS_PHY}, + {"DFX", BIT_DFX}, + {}, +}; + +static struct pmc_bit_map cht_d3_sts_1_map[] = { + {"SMB", BIT_SMB}, + {"GMM", BIT_STS_GMM}, + {"ISH", BIT_STS_ISH}, + {}, +}; + +static struct pmc_bit_map cht_func_dis_2_map[] = { + {"SMB", BIT_SMB}, + {"GMM", BIT_FD_GMM}, + {"ISH", BIT_FD_ISH}, + {}, +}; + +static const struct pmc_bit_map byt_pss_map[] = { + {"GBE", PMC_PSS_BIT_GBE}, + {"SATA", PMC_PSS_BIT_SATA}, + {"HDA", PMC_PSS_BIT_HDA}, + {"SEC", PMC_PSS_BIT_SEC}, + {"PCIE", PMC_PSS_BIT_PCIE}, + {"LPSS", PMC_PSS_BIT_LPSS}, + {"LPE", PMC_PSS_BIT_LPE}, + {"DFX", PMC_PSS_BIT_DFX}, + {"USH_CTRL", PMC_PSS_BIT_USH_CTRL}, + {"USH_SUS", PMC_PSS_BIT_USH_SUS}, + {"USH_VCCS", PMC_PSS_BIT_USH_VCCS}, + {"USH_VCCA", PMC_PSS_BIT_USH_VCCA}, + {"OTG_CTRL", PMC_PSS_BIT_OTG_CTRL}, + {"OTG_VCCS", PMC_PSS_BIT_OTG_VCCS}, + {"OTG_VCCA_CLK", PMC_PSS_BIT_OTG_VCCA_CLK}, + {"OTG_VCCA", PMC_PSS_BIT_OTG_VCCA}, + {"USB", PMC_PSS_BIT_USB}, + {"USB_SUS", PMC_PSS_BIT_USB_SUS}, + {}, +}; + +static const struct pmc_bit_map cht_pss_map[] = { + {"SATA", PMC_PSS_BIT_SATA}, + {"HDA", PMC_PSS_BIT_HDA}, + {"SEC", PMC_PSS_BIT_SEC}, + {"PCIE", PMC_PSS_BIT_PCIE}, + {"LPSS", PMC_PSS_BIT_LPSS}, + {"LPE", PMC_PSS_BIT_LPE}, + {"UFS", PMC_PSS_BIT_CHT_UFS}, + {"UXD", PMC_PSS_BIT_CHT_UXD}, + {"UXD_FD", PMC_PSS_BIT_CHT_UXD_FD}, + {"UX_ENG", PMC_PSS_BIT_CHT_UX_ENG}, + {"USB_SUS", PMC_PSS_BIT_CHT_USB_SUS}, + {"GMM", PMC_PSS_BIT_CHT_GMM}, + {"ISH", PMC_PSS_BIT_CHT_ISH}, + {"DFX_MASTER", PMC_PSS_BIT_CHT_DFX_MASTER}, + {"DFX_CLUSTER1", PMC_PSS_BIT_CHT_DFX_CLUSTER1}, + {"DFX_CLUSTER2", PMC_PSS_BIT_CHT_DFX_CLUSTER2}, + {"DFX_CLUSTER3", PMC_PSS_BIT_CHT_DFX_CLUSTER3}, + {"DFX_CLUSTER4", PMC_PSS_BIT_CHT_DFX_CLUSTER4}, + {"DFX_CLUSTER5", PMC_PSS_BIT_CHT_DFX_CLUSTER5}, + {}, +}; + +static const struct pmc_reg_map byt_reg_map = { + .d3_sts_0 = d3_sts_0_map, + .d3_sts_1 = byt_d3_sts_1_map, + .func_dis = d3_sts_0_map, + .func_dis_2 = byt_d3_sts_1_map, + .pss = byt_pss_map, +}; + +static const struct pmc_reg_map cht_reg_map = { + .d3_sts_0 = d3_sts_0_map, + .d3_sts_1 = cht_d3_sts_1_map, + .func_dis = d3_sts_0_map, + .func_dis_2 = cht_func_dis_2_map, + .pss = cht_pss_map, +}; + +static inline u32 pmc_reg_read(struct pmc_dev *pmc, int reg_offset) +{ + return readl(pmc->regmap + reg_offset); +} + +static inline void pmc_reg_write(struct pmc_dev *pmc, int reg_offset, u32 val) +{ + writel(val, pmc->regmap + reg_offset); +} + +int pmc_atom_read(int offset, u32 *value) +{ + struct pmc_dev *pmc = &pmc_device; + + if (!pmc->init) + return -ENODEV; + + *value = pmc_reg_read(pmc, offset); + return 0; +} +EXPORT_SYMBOL_GPL(pmc_atom_read); + +int pmc_atom_write(int offset, u32 value) +{ + struct pmc_dev *pmc = &pmc_device; + + if (!pmc->init) + return -ENODEV; + + pmc_reg_write(pmc, offset, value); + return 0; +} +EXPORT_SYMBOL_GPL(pmc_atom_write); + +static void pmc_power_off(void) +{ + u16 pm1_cnt_port; + u32 pm1_cnt_value; + + pr_info("Preparing to enter system sleep state S5\n"); + + pm1_cnt_port = acpi_base_addr + PM1_CNT; + + pm1_cnt_value = inl(pm1_cnt_port); + pm1_cnt_value &= SLEEP_TYPE_MASK; + pm1_cnt_value |= SLEEP_TYPE_S5; + pm1_cnt_value |= SLEEP_ENABLE; + + outl(pm1_cnt_value, pm1_cnt_port); +} + +static void pmc_hw_reg_setup(struct pmc_dev *pmc) +{ + /* + * Disable PMC S0IX_WAKE_EN events coming from: + * - LPC clock run + * - GPIO_SUS ored dedicated IRQs + * - GPIO_SCORE ored dedicated IRQs + * - GPIO_SUS shared IRQ + * - GPIO_SCORE shared IRQ + */ + pmc_reg_write(pmc, PMC_S0IX_WAKE_EN, (u32)PMC_WAKE_EN_SETTING); +} + +#ifdef CONFIG_DEBUG_FS +static void pmc_dev_state_print(struct seq_file *s, int reg_index, + u32 sts, const struct pmc_bit_map *sts_map, + u32 fd, const struct pmc_bit_map *fd_map) +{ + int offset = PMC_REG_BIT_WIDTH * reg_index; + int index; + + for (index = 0; sts_map[index].name; index++) { + seq_printf(s, "Dev: %-2d - %-32s\tState: %s [%s]\n", + offset + index, sts_map[index].name, + fd_map[index].bit_mask & fd ? "Disabled" : "Enabled ", + sts_map[index].bit_mask & sts ? "D3" : "D0"); + } +} + +static int pmc_dev_state_show(struct seq_file *s, void *unused) +{ + struct pmc_dev *pmc = s->private; + const struct pmc_reg_map *m = pmc->map; + u32 func_dis, func_dis_2; + u32 d3_sts_0, d3_sts_1; + + func_dis = pmc_reg_read(pmc, PMC_FUNC_DIS); + func_dis_2 = pmc_reg_read(pmc, PMC_FUNC_DIS_2); + d3_sts_0 = pmc_reg_read(pmc, PMC_D3_STS_0); + d3_sts_1 = pmc_reg_read(pmc, PMC_D3_STS_1); + + /* Low part */ + pmc_dev_state_print(s, 0, d3_sts_0, m->d3_sts_0, func_dis, m->func_dis); + + /* High part */ + pmc_dev_state_print(s, 1, d3_sts_1, m->d3_sts_1, func_dis_2, m->func_dis_2); + + return 0; +} + +static int pmc_dev_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, pmc_dev_state_show, inode->i_private); +} + +static const struct file_operations pmc_dev_state_ops = { + .open = pmc_dev_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int pmc_pss_state_show(struct seq_file *s, void *unused) +{ + struct pmc_dev *pmc = s->private; + const struct pmc_bit_map *map = pmc->map->pss; + u32 pss = pmc_reg_read(pmc, PMC_PSS); + int index; + + for (index = 0; map[index].name; index++) { + seq_printf(s, "Island: %-2d - %-32s\tState: %s\n", + index, map[index].name, + map[index].bit_mask & pss ? "Off" : "On"); + } + return 0; +} + +static int pmc_pss_state_open(struct inode *inode, struct file *file) +{ + return single_open(file, pmc_pss_state_show, inode->i_private); +} + +static const struct file_operations pmc_pss_state_ops = { + .open = pmc_pss_state_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static int pmc_sleep_tmr_show(struct seq_file *s, void *unused) +{ + struct pmc_dev *pmc = s->private; + u64 s0ir_tmr, s0i1_tmr, s0i2_tmr, s0i3_tmr, s0_tmr; + + s0ir_tmr = (u64)pmc_reg_read(pmc, PMC_S0IR_TMR) << PMC_TMR_SHIFT; + s0i1_tmr = (u64)pmc_reg_read(pmc, PMC_S0I1_TMR) << PMC_TMR_SHIFT; + s0i2_tmr = (u64)pmc_reg_read(pmc, PMC_S0I2_TMR) << PMC_TMR_SHIFT; + s0i3_tmr = (u64)pmc_reg_read(pmc, PMC_S0I3_TMR) << PMC_TMR_SHIFT; + s0_tmr = (u64)pmc_reg_read(pmc, PMC_S0_TMR) << PMC_TMR_SHIFT; + + seq_printf(s, "S0IR Residency:\t%lldus\n", s0ir_tmr); + seq_printf(s, "S0I1 Residency:\t%lldus\n", s0i1_tmr); + seq_printf(s, "S0I2 Residency:\t%lldus\n", s0i2_tmr); + seq_printf(s, "S0I3 Residency:\t%lldus\n", s0i3_tmr); + seq_printf(s, "S0 Residency:\t%lldus\n", s0_tmr); + return 0; +} + +static int pmc_sleep_tmr_open(struct inode *inode, struct file *file) +{ + return single_open(file, pmc_sleep_tmr_show, inode->i_private); +} + +static const struct file_operations pmc_sleep_tmr_ops = { + .open = pmc_sleep_tmr_open, + .read = seq_read, + .llseek = seq_lseek, + .release = single_release, +}; + +static void pmc_dbgfs_unregister(struct pmc_dev *pmc) +{ + debugfs_remove_recursive(pmc->dbgfs_dir); +} + +static int pmc_dbgfs_register(struct pmc_dev *pmc) +{ + struct dentry *dir, *f; + + dir = debugfs_create_dir("pmc_atom", NULL); + if (!dir) + return -ENOMEM; + + pmc->dbgfs_dir = dir; + + f = debugfs_create_file("dev_state", S_IFREG | S_IRUGO, + dir, pmc, &pmc_dev_state_ops); + if (!f) + goto err; + + f = debugfs_create_file("pss_state", S_IFREG | S_IRUGO, + dir, pmc, &pmc_pss_state_ops); + if (!f) + goto err; + + f = debugfs_create_file("sleep_state", S_IFREG | S_IRUGO, + dir, pmc, &pmc_sleep_tmr_ops); + if (!f) + goto err; + + return 0; +err: + pmc_dbgfs_unregister(pmc); + return -ENODEV; +} +#else +static int pmc_dbgfs_register(struct pmc_dev *pmc) +{ + return 0; +} +#endif /* CONFIG_DEBUG_FS */ + +static int pmc_setup_dev(struct pci_dev *pdev, const struct pci_device_id *ent) +{ + struct pmc_dev *pmc = &pmc_device; + const struct pmc_reg_map *map = (struct pmc_reg_map *)ent->driver_data; + int ret; + + /* Obtain ACPI base address */ + pci_read_config_dword(pdev, ACPI_BASE_ADDR_OFFSET, &acpi_base_addr); + acpi_base_addr &= ACPI_BASE_ADDR_MASK; + + /* Install power off function */ + if (acpi_base_addr != 0 && pm_power_off == NULL) + pm_power_off = pmc_power_off; + + pci_read_config_dword(pdev, PMC_BASE_ADDR_OFFSET, &pmc->base_addr); + pmc->base_addr &= PMC_BASE_ADDR_MASK; + + pmc->regmap = ioremap_nocache(pmc->base_addr, PMC_MMIO_REG_LEN); + if (!pmc->regmap) { + dev_err(&pdev->dev, "error: ioremap failed\n"); + return -ENOMEM; + } + + pmc->map = map; + + /* PMC hardware registers setup */ + pmc_hw_reg_setup(pmc); + + ret = pmc_dbgfs_register(pmc); + if (ret) + dev_warn(&pdev->dev, "debugfs register failed\n"); + + pmc->init = true; + return ret; +} + +/* + * Data for PCI driver interface + * + * used by pci_match_id() call below. + */ +static const struct pci_device_id pmc_pci_ids[] = { + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_VLV_PMC), (kernel_ulong_t)&byt_reg_map }, + { PCI_VDEVICE(INTEL, PCI_DEVICE_ID_CHT_PMC), (kernel_ulong_t)&cht_reg_map }, + { 0, }, +}; + +static int __init pmc_atom_init(void) +{ + struct pci_dev *pdev = NULL; + const struct pci_device_id *ent; + + /* We look for our device - PCU PMC + * we assume that there is max. one device. + * + * We can't use plain pci_driver mechanism, + * as the device is really a multiple function device, + * main driver that binds to the pci_device is lpc_ich + * and have to find & bind to the device this way. + */ + for_each_pci_dev(pdev) { + ent = pci_match_id(pmc_pci_ids, pdev); + if (ent) + return pmc_setup_dev(pdev, ent); + } + /* Device not found. */ + return -ENODEV; +} + +device_initcall(pmc_atom_init); + +/* +MODULE_AUTHOR("Aubrey Li "); +MODULE_DESCRIPTION("Intel Atom SOC Power Management Controller Interface"); +MODULE_LICENSE("GPL v2"); +*/ diff --git a/include/linux/platform_data/x86/pmc_atom.h b/include/linux/platform_data/x86/pmc_atom.h new file mode 100644 index 000000000000..aa8744c77c6d --- /dev/null +++ b/include/linux/platform_data/x86/pmc_atom.h @@ -0,0 +1,158 @@ +/* + * Intel Atom SOC Power Management Controller Header File + * Copyright (c) 2014, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef PMC_ATOM_H +#define PMC_ATOM_H + +/* ValleyView Power Control Unit PCI Device ID */ +#define PCI_DEVICE_ID_VLV_PMC 0x0F1C +/* CherryTrail Power Control Unit PCI Device ID */ +#define PCI_DEVICE_ID_CHT_PMC 0x229C + +/* PMC Memory mapped IO registers */ +#define PMC_BASE_ADDR_OFFSET 0x44 +#define PMC_BASE_ADDR_MASK 0xFFFFFE00 +#define PMC_MMIO_REG_LEN 0x100 +#define PMC_REG_BIT_WIDTH 32 + +/* BIOS uses FUNC_DIS to disable specific function */ +#define PMC_FUNC_DIS 0x34 +#define PMC_FUNC_DIS_2 0x38 + +/* CHT specific bits in FUNC_DIS2 register */ +#define BIT_FD_GMM BIT(3) +#define BIT_FD_ISH BIT(4) + +/* S0ix wake event control */ +#define PMC_S0IX_WAKE_EN 0x3C + +#define BIT_LPC_CLOCK_RUN BIT(4) +#define BIT_SHARED_IRQ_GPSC BIT(5) +#define BIT_ORED_DEDICATED_IRQ_GPSS BIT(18) +#define BIT_ORED_DEDICATED_IRQ_GPSC BIT(19) +#define BIT_SHARED_IRQ_GPSS BIT(20) + +#define PMC_WAKE_EN_SETTING ~(BIT_LPC_CLOCK_RUN | \ + BIT_SHARED_IRQ_GPSC | \ + BIT_ORED_DEDICATED_IRQ_GPSS | \ + BIT_ORED_DEDICATED_IRQ_GPSC | \ + BIT_SHARED_IRQ_GPSS) + +/* The timers acumulate time spent in sleep state */ +#define PMC_S0IR_TMR 0x80 +#define PMC_S0I1_TMR 0x84 +#define PMC_S0I2_TMR 0x88 +#define PMC_S0I3_TMR 0x8C +#define PMC_S0_TMR 0x90 +/* Sleep state counter is in units of of 32us */ +#define PMC_TMR_SHIFT 5 + +/* Power status of power islands */ +#define PMC_PSS 0x98 + +#define PMC_PSS_BIT_GBE BIT(0) +#define PMC_PSS_BIT_SATA BIT(1) +#define PMC_PSS_BIT_HDA BIT(2) +#define PMC_PSS_BIT_SEC BIT(3) +#define PMC_PSS_BIT_PCIE BIT(4) +#define PMC_PSS_BIT_LPSS BIT(5) +#define PMC_PSS_BIT_LPE BIT(6) +#define PMC_PSS_BIT_DFX BIT(7) +#define PMC_PSS_BIT_USH_CTRL BIT(8) +#define PMC_PSS_BIT_USH_SUS BIT(9) +#define PMC_PSS_BIT_USH_VCCS BIT(10) +#define PMC_PSS_BIT_USH_VCCA BIT(11) +#define PMC_PSS_BIT_OTG_CTRL BIT(12) +#define PMC_PSS_BIT_OTG_VCCS BIT(13) +#define PMC_PSS_BIT_OTG_VCCA_CLK BIT(14) +#define PMC_PSS_BIT_OTG_VCCA BIT(15) +#define PMC_PSS_BIT_USB BIT(16) +#define PMC_PSS_BIT_USB_SUS BIT(17) + +/* CHT specific bits in PSS register */ +#define PMC_PSS_BIT_CHT_UFS BIT(7) +#define PMC_PSS_BIT_CHT_UXD BIT(11) +#define PMC_PSS_BIT_CHT_UXD_FD BIT(12) +#define PMC_PSS_BIT_CHT_UX_ENG BIT(15) +#define PMC_PSS_BIT_CHT_USB_SUS BIT(16) +#define PMC_PSS_BIT_CHT_GMM BIT(17) +#define PMC_PSS_BIT_CHT_ISH BIT(18) +#define PMC_PSS_BIT_CHT_DFX_MASTER BIT(26) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER1 BIT(27) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER2 BIT(28) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER3 BIT(29) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER4 BIT(30) +#define PMC_PSS_BIT_CHT_DFX_CLUSTER5 BIT(31) + +/* These registers reflect D3 status of functions */ +#define PMC_D3_STS_0 0xA0 + +#define BIT_LPSS1_F0_DMA BIT(0) +#define BIT_LPSS1_F1_PWM1 BIT(1) +#define BIT_LPSS1_F2_PWM2 BIT(2) +#define BIT_LPSS1_F3_HSUART1 BIT(3) +#define BIT_LPSS1_F4_HSUART2 BIT(4) +#define BIT_LPSS1_F5_SPI BIT(5) +#define BIT_LPSS1_F6_XXX BIT(6) +#define BIT_LPSS1_F7_XXX BIT(7) +#define BIT_SCC_EMMC BIT(8) +#define BIT_SCC_SDIO BIT(9) +#define BIT_SCC_SDCARD BIT(10) +#define BIT_SCC_MIPI BIT(11) +#define BIT_HDA BIT(12) +#define BIT_LPE BIT(13) +#define BIT_OTG BIT(14) +#define BIT_USH BIT(15) +#define BIT_GBE BIT(16) +#define BIT_SATA BIT(17) +#define BIT_USB_EHCI BIT(18) +#define BIT_SEC BIT(19) +#define BIT_PCIE_PORT0 BIT(20) +#define BIT_PCIE_PORT1 BIT(21) +#define BIT_PCIE_PORT2 BIT(22) +#define BIT_PCIE_PORT3 BIT(23) +#define BIT_LPSS2_F0_DMA BIT(24) +#define BIT_LPSS2_F1_I2C1 BIT(25) +#define BIT_LPSS2_F2_I2C2 BIT(26) +#define BIT_LPSS2_F3_I2C3 BIT(27) +#define BIT_LPSS2_F4_I2C4 BIT(28) +#define BIT_LPSS2_F5_I2C5 BIT(29) +#define BIT_LPSS2_F6_I2C6 BIT(30) +#define BIT_LPSS2_F7_I2C7 BIT(31) + +#define PMC_D3_STS_1 0xA4 +#define BIT_SMB BIT(0) +#define BIT_OTG_SS_PHY BIT(1) +#define BIT_USH_SS_PHY BIT(2) +#define BIT_DFX BIT(3) + +/* CHT specific bits in PMC_D3_STS_1 register */ +#define BIT_STS_GMM BIT(1) +#define BIT_STS_ISH BIT(2) + +/* PMC I/O Registers */ +#define ACPI_BASE_ADDR_OFFSET 0x40 +#define ACPI_BASE_ADDR_MASK 0xFFFFFE00 +#define ACPI_MMIO_REG_LEN 0x100 + +#define PM1_CNT 0x4 +#define SLEEP_TYPE_MASK 0xFFFFECFF +#define SLEEP_TYPE_S5 0x1C00 +#define SLEEP_ENABLE 0x2000 + +extern int pmc_atom_read(int offset, u32 *value); +extern int pmc_atom_write(int offset, u32 value); + +#endif /* PMC_ATOM_H */ -- cgit v1.2.3 From 2dc8ffad8c53e65f85d1a9ece2721463d729054a Mon Sep 17 00:00:00 2001 From: Nick Desaulniers Date: Mon, 12 Dec 2016 15:28:05 -0800 Subject: ACPI / idle: small formatting fixes A quick cleanup with scripts/checkpatch.pl -f . Signed-off-by: Nick Desaulniers Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- arch/x86/kernel/acpi/cstate.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/cstate.c b/arch/x86/kernel/acpi/cstate.c index af15f4444330..8233a630280f 100644 --- a/arch/x86/kernel/acpi/cstate.c +++ b/arch/x86/kernel/acpi/cstate.c @@ -12,7 +12,6 @@ #include #include -#include #include #include @@ -89,7 +88,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) retval = 0; /* If the HW does not support any sub-states in this C-state */ if (num_cstate_subtype == 0) { - pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n", cx->address, edx_part); + pr_warn(FW_BUG "ACPI MWAIT C-state 0x%x not supported by HW (0x%x)\n", + cx->address, edx_part); retval = -1; goto out; } @@ -104,8 +104,8 @@ static long acpi_processor_ffh_cstate_probe_cpu(void *_cx) if (!mwait_supported[cstate_type]) { mwait_supported[cstate_type] = 1; printk(KERN_DEBUG - "Monitor-Mwait will be used to enter C-%d " - "state\n", cx->type); + "Monitor-Mwait will be used to enter C-%d state\n", + cx->type); } snprintf(cx->desc, ACPI_CX_DESC_LEN, "ACPI FFH INTEL MWAIT 0x%x", @@ -166,6 +166,7 @@ EXPORT_SYMBOL_GPL(acpi_processor_ffh_cstate_enter); static int __init ffh_cstate_init(void) { struct cpuinfo_x86 *c = &boot_cpu_data; + if (c->x86_vendor != X86_VENDOR_INTEL) return -1; -- cgit v1.2.3 From ab22a4733fe919d22bc2957680506ed17e40941e Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 21 Dec 2016 20:29:28 -0800 Subject: kvm: x86: mmu: Rename EPT_VIOLATION_READ/WRITE/INSTR constants Rename the EPT_VIOLATION_READ/WRITE/INSTR constants to EPT_VIOLATION_ACC_READ/WRITE/INSTR to more clearly indicate that these signify the type of the memory access as opposed to the permissions granted by the PTE. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/vmx.h | 12 ++++++------ arch/x86/kvm/vmx.c | 6 +++--- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/vmx.h b/arch/x86/include/asm/vmx.h index a22a4790f1ac..cc54b7026567 100644 --- a/arch/x86/include/asm/vmx.h +++ b/arch/x86/include/asm/vmx.h @@ -510,15 +510,15 @@ struct vmx_msr_entry { /* * Exit Qualifications for EPT Violations */ -#define EPT_VIOLATION_READ_BIT 0 -#define EPT_VIOLATION_WRITE_BIT 1 -#define EPT_VIOLATION_INSTR_BIT 2 +#define EPT_VIOLATION_ACC_READ_BIT 0 +#define EPT_VIOLATION_ACC_WRITE_BIT 1 +#define EPT_VIOLATION_ACC_INSTR_BIT 2 #define EPT_VIOLATION_READABLE_BIT 3 #define EPT_VIOLATION_WRITABLE_BIT 4 #define EPT_VIOLATION_EXECUTABLE_BIT 5 -#define EPT_VIOLATION_READ (1 << EPT_VIOLATION_READ_BIT) -#define EPT_VIOLATION_WRITE (1 << EPT_VIOLATION_WRITE_BIT) -#define EPT_VIOLATION_INSTR (1 << EPT_VIOLATION_INSTR_BIT) +#define EPT_VIOLATION_ACC_READ (1 << EPT_VIOLATION_ACC_READ_BIT) +#define EPT_VIOLATION_ACC_WRITE (1 << EPT_VIOLATION_ACC_WRITE_BIT) +#define EPT_VIOLATION_ACC_INSTR (1 << EPT_VIOLATION_ACC_INSTR_BIT) #define EPT_VIOLATION_READABLE (1 << EPT_VIOLATION_READABLE_BIT) #define EPT_VIOLATION_WRITABLE (1 << EPT_VIOLATION_WRITABLE_BIT) #define EPT_VIOLATION_EXECUTABLE (1 << EPT_VIOLATION_EXECUTABLE_BIT) diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c7bafa1457e2..81c301def1af 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6375,13 +6375,13 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) trace_kvm_page_fault(gpa, exit_qualification); /* Is it a read fault? */ - error_code = (exit_qualification & EPT_VIOLATION_READ) + error_code = (exit_qualification & EPT_VIOLATION_ACC_READ) ? PFERR_USER_MASK : 0; /* Is it a write fault? */ - error_code |= (exit_qualification & EPT_VIOLATION_WRITE) + error_code |= (exit_qualification & EPT_VIOLATION_ACC_WRITE) ? PFERR_WRITE_MASK : 0; /* Is it a fetch fault? */ - error_code |= (exit_qualification & EPT_VIOLATION_INSTR) + error_code |= (exit_qualification & EPT_VIOLATION_ACC_INSTR) ? PFERR_FETCH_MASK : 0; /* ept page table entry is present? */ error_code |= (exit_qualification & -- cgit v1.2.3 From 312b616b30d87581b88d3db54c14ed89610cc97b Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 21 Dec 2016 20:29:29 -0800 Subject: kvm: x86: mmu: Set SPTE_SPECIAL_MASK within mmu.c Instead of the caller including the SPTE_SPECIAL_MASK in the masks being supplied to kvm_mmu_set_mmio_spte_mask() and kvm_mmu_set_mask_ptes(), those functions now themselves include the SPTE_SPECIAL_MASK. Note that bit 63 is now reset in the default MMIO mask. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 5 ++++- arch/x86/kvm/vmx.c | 6 ++---- arch/x86/kvm/x86.c | 3 --- 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 64821ca3a7c3..e3312e22e8db 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -208,7 +208,7 @@ static void mmu_free_roots(struct kvm_vcpu *vcpu); void kvm_mmu_set_mmio_spte_mask(u64 mmio_mask) { - shadow_mmio_mask = mmio_mask; + shadow_mmio_mask = mmio_mask | SPTE_SPECIAL_MASK; } EXPORT_SYMBOL_GPL(kvm_mmu_set_mmio_spte_mask); @@ -318,6 +318,9 @@ void kvm_mmu_set_mask_ptes(u64 user_mask, u64 accessed_mask, u64 dirty_mask, u64 nx_mask, u64 x_mask, u64 p_mask, u64 acc_track_mask) { + if (acc_track_mask != 0) + acc_track_mask |= SPTE_SPECIAL_MASK; + shadow_user_mask = user_mask; shadow_accessed_mask = accessed_mask; shadow_dirty_mask = dirty_mask; diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 81c301def1af..d850d5d36182 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5236,10 +5236,8 @@ static void ept_set_mmio_spte_mask(void) /* * EPT Misconfigurations can be generated if the value of bits 2:0 * of an EPT paging-structure entry is 110b (write/execute). - * Also, special bit (62) is set to quickly identify mmio spte. */ - kvm_mmu_set_mmio_spte_mask(SPTE_SPECIAL_MASK | - VMX_EPT_MISCONFIG_WX_VALUE); + kvm_mmu_set_mmio_spte_mask(VMX_EPT_MISCONFIG_WX_VALUE); } #define VMX_XSS_EXIT_BITMAP 0 @@ -6585,7 +6583,7 @@ void vmx_enable_tdp(void) enable_ept_ad_bits ? VMX_EPT_DIRTY_BIT : 0ull, 0ull, VMX_EPT_EXECUTABLE_MASK, cpu_has_vmx_ept_execute_only() ? 0ull : VMX_EPT_READABLE_MASK, - enable_ept_ad_bits ? 0ull : SPTE_SPECIAL_MASK | VMX_EPT_RWX_MASK); + enable_ept_ad_bits ? 0ull : VMX_EPT_RWX_MASK); ept_set_mmio_spte_mask(); kvm_enable_tdp(); diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 6e2c71ea0627..4fd4d4f35caf 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -5952,9 +5952,6 @@ static void kvm_set_mmio_spte_mask(void) /* Mask the reserved physical address bits. */ mask = rsvd_bits(maxphyaddr, 51); - /* Bit 62 is always reserved for 32bit host. */ - mask |= 0x3ull << 62; - /* Set the present bit. */ mask |= 1ull; -- cgit v1.2.3 From 20d65236d01cdbe14a88f0e2c0f985669f8c41fc Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 21 Dec 2016 20:29:31 -0800 Subject: kvm: x86: mmu: Update comment in mark_spte_for_access_track Reword the comment to hopefully make it more clear. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e3312e22e8db..e13041ac7cdf 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -708,9 +708,9 @@ static u64 mark_spte_for_access_track(u64 spte) return spte; /* - * Verify that the write-protection that we do below will be fixable - * via the fast page fault path. Currently, that is always the case, at - * least when using EPT (which is when access tracking would be used). + * Making an Access Tracking PTE will result in removal of write access + * from the PTE. So, verify that we will be able to restore the write + * access in the fast page fault path later on. */ WARN_ONCE((spte & PT_WRITABLE_MASK) && !spte_can_locklessly_be_made_writable(spte), -- cgit v1.2.3 From d162f30a7cebe9731fd331419b3a14089d0b41e3 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 21 Dec 2016 20:29:30 -0800 Subject: kvm: x86: mmu: Move pgtbl walk inside retry loop in fast_page_fault Redo the page table walk in fast_page_fault when retrying so that we are working on the latest PTE even if the hierarchy changes. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index e13041ac7cdf..437d16274701 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -3088,14 +3088,16 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, return false; walk_shadow_page_lockless_begin(vcpu); - for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) - if (!is_shadow_present_pte(spte) || iterator.level < level) - break; do { bool remove_write_prot = false; bool remove_acc_track; + for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) + if (!is_shadow_present_pte(spte) || + iterator.level < level) + break; + sp = page_header(__pa(iterator.sptep)); if (!is_last_spte(spte, sp->role.level)) break; @@ -3176,8 +3178,6 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, break; } - spte = mmu_spte_get_lockless(iterator.sptep); - } while (true); trace_fast_page_fault(vcpu, gva, error_code, iterator.sptep, -- cgit v1.2.3 From d3e328f2cb01f6f09259a5810baae3edf5416076 Mon Sep 17 00:00:00 2001 From: Junaid Shahid Date: Wed, 21 Dec 2016 20:29:32 -0800 Subject: kvm: x86: mmu: Verify that restored PTE has needed perms in fast page fault Before fast page fault restores an access track PTE back to a regular PTE, it now also verifies that the restored PTE would grant the necessary permissions for the faulting access to succeed. If not, it falls back to the slow page fault path. Signed-off-by: Junaid Shahid Signed-off-by: Paolo Bonzini --- arch/x86/kvm/mmu.c | 127 ++++++++++++++++++++++++++++------------------------- 1 file changed, 68 insertions(+), 59 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 437d16274701..2fd7586aad4d 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -373,6 +373,11 @@ static int is_last_spte(u64 pte, int level) return 0; } +static bool is_executable_pte(u64 spte) +{ + return (spte & (shadow_x_mask | shadow_nx_mask)) == shadow_x_mask; +} + static kvm_pfn_t spte_to_pfn(u64 pte) { return (pte & PT64_BASE_ADDR_MASK) >> PAGE_SHIFT; @@ -728,6 +733,23 @@ static u64 mark_spte_for_access_track(u64 spte) return spte; } +/* Restore an acc-track PTE back to a regular PTE */ +static u64 restore_acc_track_spte(u64 spte) +{ + u64 new_spte = spte; + u64 saved_bits = (spte >> shadow_acc_track_saved_bits_shift) + & shadow_acc_track_saved_bits_mask; + + WARN_ON_ONCE(!is_access_track_spte(spte)); + + new_spte &= ~shadow_acc_track_mask; + new_spte &= ~(shadow_acc_track_saved_bits_mask << + shadow_acc_track_saved_bits_shift); + new_spte |= saved_bits; + + return new_spte; +} + /* Returns the Accessed status of the PTE and resets it at the same time. */ static bool mmu_spte_age(u64 *sptep) { @@ -3019,27 +3041,12 @@ static bool page_fault_can_be_fast(u32 error_code) */ static bool fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, - u64 *sptep, u64 old_spte, - bool remove_write_prot, bool remove_acc_track) + u64 *sptep, u64 old_spte, u64 new_spte) { gfn_t gfn; - u64 new_spte = old_spte; WARN_ON(!sp->role.direct); - if (remove_acc_track) { - u64 saved_bits = (old_spte >> shadow_acc_track_saved_bits_shift) - & shadow_acc_track_saved_bits_mask; - - new_spte &= ~shadow_acc_track_mask; - new_spte &= ~(shadow_acc_track_saved_bits_mask << - shadow_acc_track_saved_bits_shift); - new_spte |= saved_bits; - } - - if (remove_write_prot) - new_spte |= PT_WRITABLE_MASK; - /* * Theoretically we could also set dirty bit (and flush TLB) here in * order to eliminate unnecessary PML logging. See comments in @@ -3055,7 +3062,7 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, if (cmpxchg64(sptep, old_spte, new_spte) != old_spte) return false; - if (remove_write_prot) { + if (is_writable_pte(new_spte) && !is_writable_pte(old_spte)) { /* * The gfn of direct spte is stable since it is * calculated by sp->gfn. @@ -3067,6 +3074,18 @@ fast_pf_fix_direct_spte(struct kvm_vcpu *vcpu, struct kvm_mmu_page *sp, return true; } +static bool is_access_allowed(u32 fault_err_code, u64 spte) +{ + if (fault_err_code & PFERR_FETCH_MASK) + return is_executable_pte(spte); + + if (fault_err_code & PFERR_WRITE_MASK) + return is_writable_pte(spte); + + /* Fault was on Read access */ + return spte & PT_PRESENT_MASK; +} + /* * Return value: * - true: let the vcpu to access on the same address again. @@ -3090,8 +3109,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, walk_shadow_page_lockless_begin(vcpu); do { - bool remove_write_prot = false; - bool remove_acc_track; + u64 new_spte; for_each_shadow_entry_lockless(vcpu, gva, iterator, spte) if (!is_shadow_present_pte(spte) || @@ -3112,52 +3130,44 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, * Need not check the access of upper level table entries since * they are always ACC_ALL. */ + if (is_access_allowed(error_code, spte)) { + fault_handled = true; + break; + } - if (error_code & PFERR_FETCH_MASK) { - if ((spte & (shadow_x_mask | shadow_nx_mask)) - == shadow_x_mask) { - fault_handled = true; - break; - } - } else if (error_code & PFERR_WRITE_MASK) { - if (is_writable_pte(spte)) { - fault_handled = true; - break; - } + new_spte = spte; + + if (is_access_track_spte(spte)) + new_spte = restore_acc_track_spte(new_spte); + + /* + * Currently, to simplify the code, write-protection can + * be removed in the fast path only if the SPTE was + * write-protected for dirty-logging or access tracking. + */ + if ((error_code & PFERR_WRITE_MASK) && + spte_can_locklessly_be_made_writable(spte)) + { + new_spte |= PT_WRITABLE_MASK; /* - * Currently, to simplify the code, write-protection can - * be removed in the fast path only if the SPTE was - * write-protected for dirty-logging. + * Do not fix write-permission on the large spte. Since + * we only dirty the first page into the dirty-bitmap in + * fast_pf_fix_direct_spte(), other pages are missed + * if its slot has dirty logging enabled. + * + * Instead, we let the slow page fault path create a + * normal spte to fix the access. + * + * See the comments in kvm_arch_commit_memory_region(). */ - remove_write_prot = - spte_can_locklessly_be_made_writable(spte); - } else { - /* Fault was on Read access */ - if (spte & PT_PRESENT_MASK) { - fault_handled = true; + if (sp->role.level > PT_PAGE_TABLE_LEVEL) break; - } } - remove_acc_track = is_access_track_spte(spte); - /* Verify that the fault can be handled in the fast path */ - if (!remove_acc_track && !remove_write_prot) - break; - - /* - * Do not fix write-permission on the large spte since we only - * dirty the first page into the dirty-bitmap in - * fast_pf_fix_direct_spte() that means other pages are missed - * if its slot is dirty-logged. - * - * Instead, we let the slow page fault path create a normal spte - * to fix the access. - * - * See the comments in kvm_arch_commit_memory_region(). - */ - if (sp->role.level > PT_PAGE_TABLE_LEVEL && remove_write_prot) + if (new_spte == spte || + !is_access_allowed(error_code, new_spte)) break; /* @@ -3167,8 +3177,7 @@ static bool fast_page_fault(struct kvm_vcpu *vcpu, gva_t gva, int level, */ fault_handled = fast_pf_fix_direct_spte(vcpu, sp, iterator.sptep, spte, - remove_write_prot, - remove_acc_track); + new_spte); if (fault_handled) break; -- cgit v1.2.3 From bf29bddf0417a4783da3b24e8c9e017ac649326f Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Fri, 27 Jan 2017 22:25:52 +0000 Subject: x86/efi: Always map the first physical page into the EFI pagetables Commit: 129766708 ("x86/efi: Only map RAM into EFI page tables if in mixed-mode") stopped creating 1:1 mappings for all RAM, when running in native 64-bit mode. It turns out though that there are 64-bit EFI implementations in the wild (this particular problem has been reported on a Lenovo Yoga 710-11IKB), which still make use of the first physical page for their own private use, even though they explicitly mark it EFI_CONVENTIONAL_MEMORY in the memory map. In case there is no mapping for this particular frame in the EFI pagetables, as soon as firmware tries to make use of it, a triple fault occurs and the system reboots (in case of the Yoga 710-11IKB this is very early during bootup). Fix that by always mapping the first page of physical memory into the EFI pagetables. We're free to hand this page to the BIOS, as trim_bios_range() will reserve the first page and isolate it away from memory allocators anyway. Note that just reverting 129766708 alone is not enough on v4.9-rc1+ to fix the regression on affected hardware, as this commit: ab72a27da ("x86/efi: Consolidate region mapping logic") later made the first physical frame not to be mapped anyway. Reported-by: Hanka Pavlikova Signed-off-by: Jiri Kosina Signed-off-by: Matt Fleming Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Borislav Petkov Cc: Laura Abbott Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Vojtech Pavlik Cc: Waiman Long Cc: linux-efi@vger.kernel.org Cc: stable@kernel.org # v4.8+ Fixes: 129766708 ("x86/efi: Only map RAM into EFI page tables if in mixed-mode") Link: http://lkml.kernel.org/r/20170127222552.22336-1-matt@codeblueprint.co.uk [ Tidied up the changelog and the comment. ] Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 319148bd4b05..2f25a363068c 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -268,6 +268,22 @@ int __init efi_setup_page_tables(unsigned long pa_memmap, unsigned num_pages) efi_scratch.use_pgd = true; + /* + * Certain firmware versions are way too sentimential and still believe + * they are exclusive and unquestionable owners of the first physical page, + * even though they explicitly mark it as EFI_CONVENTIONAL_MEMORY + * (but then write-access it later during SetVirtualAddressMap()). + * + * Create a 1:1 mapping for this page, to avoid triple faults during early + * boot with such firmware. We are free to hand this page to the BIOS, + * as trim_bios_range() will reserve the first page and isolate it away + * from memory allocators anyway. + */ + if (kernel_map_pages_in_pgd(pgd, 0x0, 0x0, 1, _PAGE_RW)) { + pr_err("Failed to create 1:1 mapping for the first page!\n"); + return 1; + } + /* * When making calls to the firmware everything needs to be 1:1 * mapped and addressable with 32-bit pointers. Map the kernel -- cgit v1.2.3 From f58576666ccdcfb9cf7cae8669dffe1eed844f88 Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Fri, 27 Jan 2017 16:17:52 -0700 Subject: x86/mm: Improve documentation for low-level device I/O functions Add kerneldoc comments for memcpy_{to,from}io() and memset_io(). The existing documentation for ioremap() was distant from the definition, causing kernel-doc to miss it; move it appropriately. Signed-off-by: Jonathan Corbet Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170127161752.0b95e95b@lwn.net Signed-off-by: Ingo Molnar --- arch/x86/include/asm/io.h | 46 +++++++++++++++++++++++++++++++++++----------- 1 file changed, 35 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/io.h b/arch/x86/include/asm/io.h index d34bd370074b..7afb0e2f07f4 100644 --- a/arch/x86/include/asm/io.h +++ b/arch/x86/include/asm/io.h @@ -164,6 +164,17 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) #define virt_to_bus virt_to_phys #define bus_to_virt phys_to_virt +/* + * The default ioremap() behavior is non-cached; if you need something + * else, you probably want one of the following. + */ +extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); +#define ioremap_uc ioremap_uc + +extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); +extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, unsigned long prot_val); + /** * ioremap - map bus memory into CPU space * @offset: bus address of the memory @@ -178,17 +189,6 @@ static inline unsigned int isa_virt_to_bus(volatile void *address) * If the area you are trying to map is a PCI BAR you should have a * look at pci_iomap(). */ -extern void __iomem *ioremap_nocache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_uc(resource_size_t offset, unsigned long size); -#define ioremap_uc ioremap_uc - -extern void __iomem *ioremap_cache(resource_size_t offset, unsigned long size); -extern void __iomem *ioremap_prot(resource_size_t offset, unsigned long size, - unsigned long prot_val); - -/* - * The default ioremap() behavior is non-cached: - */ static inline void __iomem *ioremap(resource_size_t offset, unsigned long size) { return ioremap_nocache(offset, size); @@ -207,18 +207,42 @@ extern void set_iounmap_nonlazy(void); */ #define xlate_dev_kmem_ptr(p) p +/** + * memset_io Set a range of I/O memory to a constant value + * @addr: The beginning of the I/O-memory range to set + * @val: The value to set the memory to + * @count: The number of bytes to set + * + * Set a range of I/O memory to a given value. + */ static inline void memset_io(volatile void __iomem *addr, unsigned char val, size_t count) { memset((void __force *)addr, val, count); } +/** + * memcpy_fromio Copy a block of data from I/O memory + * @dst: The (RAM) destination for the copy + * @src: The (I/O memory) source for the data + * @count: The number of bytes to copy + * + * Copy a block of data from I/O memory. + */ static inline void memcpy_fromio(void *dst, const volatile void __iomem *src, size_t count) { memcpy(dst, (const void __force *)src, count); } +/** + * memcpy_toio Copy a block of data into I/O memory + * @dst: The (I/O memory) destination for the copy + * @src: The (RAM) source for the data + * @count: The number of bytes to copy + * + * Copy a block of data to I/O memory. + */ static inline void memcpy_toio(volatile void __iomem *dst, const void *src, size_t count) { -- cgit v1.2.3 From cc272163ea554a97dac180fa8dd6cd54c2810bd1 Mon Sep 17 00:00:00 2001 From: Mohit Gambhir Date: Thu, 26 Jan 2017 13:12:27 -0500 Subject: x86/xen: Fix APIC id mismatch warning on Intel This patch fixes the following warning message seen when booting the kernel as Dom0 with Xen on Intel machines. [0.003000] [Firmware Bug]: CPU1: APIC id mismatch. Firmware: 0 APIC: 1] The code generating the warning in validate_apic_and_package_id() matches cpu_data(cpu).apicid (initialized in init_intel()-> detect_extended_topology() using cpuid) against the apicid returned from xen_apic_read(). Now, xen_apic_read() makes a hypercall to retrieve apicid for the boot cpu but returns 0 otherwise. Hence the warning gets thrown for all but the boot cpu. The idea behind xen_apic_read() returning 0 for apicid is that the guests (even Dom0) should not need to know what physical processor their vcpus are running on. This is because we currently do not have topology information in Xen and also because xen allows more vcpus than physical processors. However, boot cpu's apicid is required for loading xen-acpi-processor driver on AMD machines. Look at following patch for details: commit 558daa289a40 ("xen/apic: Return the APIC ID (and version) for CPU 0.") So to get rid of the warning, this patch modifies xen_cpu_present_to_apicid() to return cpu_data(cpu).apicid instead of calling xen_apic_read(). The warning is not seen on AMD machines because init_amd() populates cpu_data(cpu).apicid by calling hard_smp_processor_id()->xen_apic_read() as opposed to using apicid from cpuid as is done on Intel machines. Signed-off-by: Mohit Gambhir Reviewed-by: Juergen Gross Signed-off-by: Boris Ostrovsky --- arch/x86/xen/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/apic.c b/arch/x86/xen/apic.c index 44c88ad1841a..bcea81f36fc5 100644 --- a/arch/x86/xen/apic.c +++ b/arch/x86/xen/apic.c @@ -145,7 +145,7 @@ static void xen_silent_inquire(int apicid) static int xen_cpu_present_to_apicid(int cpu) { if (cpu_present(cpu)) - return xen_get_apic_id(xen_apic_read(APIC_ID)); + return cpu_data(cpu).apicid; else return BAD_APICID; } -- cgit v1.2.3 From 24c2503255d35c269b67162c397a1a1c1e02f6ce Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 25 Jan 2017 21:00:48 +0100 Subject: x86/microcode: Do not access the initrd after it has been freed When we look for microcode blobs, we first try builtin and if that doesn't succeed, we fallback to the initrd supplied to the kernel. However, at some point doing boot, that initrd gets jettisoned and we shouldn't access it anymore. But we do, as the below KASAN report shows. That's because find_microcode_in_initrd() doesn't check whether the initrd is still valid or not. So do that. ================================================================== BUG: KASAN: use-after-free in find_cpio_data Read of size 1 by task swapper/1/0 page:ffffea0000db9d40 count:0 mapcount:0 mapping: (null) index:0x1 flags: 0x100000000000000() raw: 0100000000000000 0000000000000000 0000000000000001 00000000ffffffff raw: dead000000000100 dead000000000200 0000000000000000 0000000000000000 page dumped because: kasan: bad access detected CPU: 1 PID: 0 Comm: swapper/1 Tainted: G W 4.10.0-rc5-debug-00075-g2dbde22 #3 Hardware name: Dell Inc. XPS 13 9360/0839Y6, BIOS 1.2.3 12/01/2016 Call Trace: dump_stack ? _atomic_dec_and_lock ? __dump_page kasan_report_error ? pointer ? find_cpio_data __asan_report_load1_noabort ? find_cpio_data find_cpio_data ? vsprintf ? dump_stack ? get_ucode_user ? print_usage_bug find_microcode_in_initrd __load_ucode_intel ? collect_cpu_info_early ? debug_check_no_locks_freed load_ucode_intel_ap ? collect_cpu_info ? trace_hardirqs_on ? flat_send_IPI_mask_allbutself load_ucode_ap ? get_builtin_firmware ? flush_tlb_func ? do_raw_spin_trylock ? cpumask_weight cpu_init ? trace_hardirqs_off ? play_dead_common ? native_play_dead ? hlt_play_dead ? syscall_init ? arch_cpu_idle_dead ? do_idle start_secondary start_cpu Memory state around the buggy address: ffff880036e74f00: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff880036e74f80: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff >ffff880036e75000: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ^ ffff880036e75080: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ffff880036e75100: ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ff ================================================================== Reported-by: Andrey Ryabinin Tested-by: Andrey Ryabinin Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170126165833.evjemhbqzaepirxo@pd.tnic Signed-off-by: Ingo Molnar --- arch/x86/include/asm/microcode.h | 1 + arch/x86/kernel/cpu/microcode/amd.c | 5 +++-- arch/x86/kernel/cpu/microcode/core.c | 22 +++++++++++++++++----- 3 files changed, 21 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/microcode.h b/arch/x86/include/asm/microcode.h index 38711df3bcb5..2266f864b747 100644 --- a/arch/x86/include/asm/microcode.h +++ b/arch/x86/include/asm/microcode.h @@ -140,6 +140,7 @@ extern void __init load_ucode_bsp(void); extern void load_ucode_ap(void); void reload_early_microcode(void); extern bool get_builtin_firmware(struct cpio_data *cd, const char *name); +extern bool initrd_gone; #else static inline int __init microcode_init(void) { return 0; }; static inline void __init load_ucode_bsp(void) { } diff --git a/arch/x86/kernel/cpu/microcode/amd.c b/arch/x86/kernel/cpu/microcode/amd.c index 6a31e2691f3a..079e81733a58 100644 --- a/arch/x86/kernel/cpu/microcode/amd.c +++ b/arch/x86/kernel/cpu/microcode/amd.c @@ -384,8 +384,9 @@ void load_ucode_amd_ap(unsigned int family) reget: if (!get_builtin_microcode(&cp, family)) { #ifdef CONFIG_BLK_DEV_INITRD - cp = find_cpio_data(ucode_path, (void *)initrd_start, - initrd_end - initrd_start, NULL); + if (!initrd_gone) + cp = find_cpio_data(ucode_path, (void *)initrd_start, + initrd_end - initrd_start, NULL); #endif if (!(cp.data && cp.size)) { /* diff --git a/arch/x86/kernel/cpu/microcode/core.c b/arch/x86/kernel/cpu/microcode/core.c index 2af69d27da62..73102d932760 100644 --- a/arch/x86/kernel/cpu/microcode/core.c +++ b/arch/x86/kernel/cpu/microcode/core.c @@ -46,6 +46,8 @@ static struct microcode_ops *microcode_ops; static bool dis_ucode_ldr = true; +bool initrd_gone; + LIST_HEAD(microcode_cache); /* @@ -190,21 +192,24 @@ void load_ucode_ap(void) static int __init save_microcode_in_initrd(void) { struct cpuinfo_x86 *c = &boot_cpu_data; + int ret = -EINVAL; switch (c->x86_vendor) { case X86_VENDOR_INTEL: if (c->x86 >= 6) - return save_microcode_in_initrd_intel(); + ret = save_microcode_in_initrd_intel(); break; case X86_VENDOR_AMD: if (c->x86 >= 0x10) - return save_microcode_in_initrd_amd(c->x86); + ret = save_microcode_in_initrd_amd(c->x86); break; default: break; } - return -EINVAL; + initrd_gone = true; + + return ret; } struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) @@ -247,9 +252,16 @@ struct cpio_data find_microcode_in_initrd(const char *path, bool use_pa) * has the virtual address of the beginning of the initrd. It also * possibly relocates the ramdisk. In either case, initrd_start contains * the updated address so use that instead. + * + * initrd_gone is for the hotplug case where we've thrown out initrd + * already. */ - if (!use_pa && initrd_start) - start = initrd_start; + if (!use_pa) { + if (initrd_gone) + return (struct cpio_data){ NULL, 0, "" }; + if (initrd_start) + start = initrd_start; + } return find_cpio_data(path, (void *)start, size, NULL); #else /* !CONFIG_BLK_DEV_INITRD */ -- cgit v1.2.3 From a83f4c00dd6a646ac3c7604ee255d732fc5e0e0b Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Mon, 16 Jan 2017 17:36:21 -0600 Subject: perf/x86/amd/uncore: Rename 'L2' to 'LLC' This patch renames L2 counters to LLC counters. In AMD Family17h processors, L3 cache counter is supported. Since older families have at most L2 counters, last level cache (LLC) indicates L2/L3 based on the family. Signed-off-by: Janakarajan Natarajan Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/5d8cd8736d8d578354597a548e64ff16210c319b.1484598705.git.Janakarajan.Natarajan@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/uncore.c | 98 ++++++++++++++++++++++---------------------- 1 file changed, 49 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a0b1bdb3ad42..a53bfbe02c7c 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -25,7 +25,7 @@ #define MAX_COUNTERS NUM_COUNTERS_NB #define RDPMC_BASE_NB 6 -#define RDPMC_BASE_L2 10 +#define RDPMC_BASE_LLC 10 #define COUNTER_SHIFT 16 @@ -45,30 +45,30 @@ struct amd_uncore { }; static struct amd_uncore * __percpu *amd_uncore_nb; -static struct amd_uncore * __percpu *amd_uncore_l2; +static struct amd_uncore * __percpu *amd_uncore_llc; static struct pmu amd_nb_pmu; -static struct pmu amd_l2_pmu; +static struct pmu amd_llc_pmu; static cpumask_t amd_nb_active_mask; -static cpumask_t amd_l2_active_mask; +static cpumask_t amd_llc_active_mask; static bool is_nb_event(struct perf_event *event) { return event->pmu->type == amd_nb_pmu.type; } -static bool is_l2_event(struct perf_event *event) +static bool is_llc_event(struct perf_event *event) { - return event->pmu->type == amd_l2_pmu.type; + return event->pmu->type == amd_llc_pmu.type; } static struct amd_uncore *event_to_amd_uncore(struct perf_event *event) { if (is_nb_event(event) && amd_uncore_nb) return *per_cpu_ptr(amd_uncore_nb, event->cpu); - else if (is_l2_event(event) && amd_uncore_l2) - return *per_cpu_ptr(amd_uncore_l2, event->cpu); + else if (is_llc_event(event) && amd_uncore_llc) + return *per_cpu_ptr(amd_uncore_llc, event->cpu); return NULL; } @@ -183,16 +183,16 @@ static int amd_uncore_event_init(struct perf_event *event) return -ENOENT; /* - * NB and L2 counters (MSRs) are shared across all cores that share the - * same NB / L2 cache. Interrupts can be directed to a single target - * core, however, event counts generated by processes running on other - * cores cannot be masked out. So we do not support sampling and - * per-thread events. + * NB and Last level cache counters (MSRs) are shared across all cores + * that share the same NB / Last level cache. Interrupts can be directed + * to a single target core, however, event counts generated by processes + * running on other cores cannot be masked out. So we do not support + * sampling and per-thread events. */ if (is_sampling_event(event) || event->attach_state & PERF_ATTACH_TASK) return -EINVAL; - /* NB and L2 counters do not have usr/os/guest/host bits */ + /* NB and Last level cache counters do not have usr/os/guest/host bits */ if (event->attr.exclude_user || event->attr.exclude_kernel || event->attr.exclude_host || event->attr.exclude_guest) return -EINVAL; @@ -226,8 +226,8 @@ static ssize_t amd_uncore_attr_show_cpumask(struct device *dev, if (pmu->type == amd_nb_pmu.type) active_mask = &amd_nb_active_mask; - else if (pmu->type == amd_l2_pmu.type) - active_mask = &amd_l2_active_mask; + else if (pmu->type == amd_llc_pmu.type) + active_mask = &amd_llc_active_mask; else return 0; @@ -276,7 +276,7 @@ static struct pmu amd_nb_pmu = { .read = amd_uncore_read, }; -static struct pmu amd_l2_pmu = { +static struct pmu amd_llc_pmu = { .task_ctx_nr = perf_invalid_context, .attr_groups = amd_uncore_attr_groups, .name = "amd_l2", @@ -296,7 +296,7 @@ static struct amd_uncore *amd_uncore_alloc(unsigned int cpu) static int amd_uncore_cpu_up_prepare(unsigned int cpu) { - struct amd_uncore *uncore_nb = NULL, *uncore_l2; + struct amd_uncore *uncore_nb = NULL, *uncore_llc; if (amd_uncore_nb) { uncore_nb = amd_uncore_alloc(cpu); @@ -312,18 +312,18 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) *per_cpu_ptr(amd_uncore_nb, cpu) = uncore_nb; } - if (amd_uncore_l2) { - uncore_l2 = amd_uncore_alloc(cpu); - if (!uncore_l2) + if (amd_uncore_llc) { + uncore_llc = amd_uncore_alloc(cpu); + if (!uncore_llc) goto fail; - uncore_l2->cpu = cpu; - uncore_l2->num_counters = NUM_COUNTERS_L2; - uncore_l2->rdpmc_base = RDPMC_BASE_L2; - uncore_l2->msr_base = MSR_F16H_L2I_PERF_CTL; - uncore_l2->active_mask = &amd_l2_active_mask; - uncore_l2->pmu = &amd_l2_pmu; - uncore_l2->id = -1; - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore_l2; + uncore_llc->cpu = cpu; + uncore_llc->num_counters = NUM_COUNTERS_L2; + uncore_llc->rdpmc_base = RDPMC_BASE_LLC; + uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL; + uncore_llc->active_mask = &amd_llc_active_mask; + uncore_llc->pmu = &amd_llc_pmu; + uncore_llc->id = -1; + *per_cpu_ptr(amd_uncore_llc, cpu) = uncore_llc; } return 0; @@ -376,17 +376,17 @@ static int amd_uncore_cpu_starting(unsigned int cpu) *per_cpu_ptr(amd_uncore_nb, cpu) = uncore; } - if (amd_uncore_l2) { + if (amd_uncore_llc) { unsigned int apicid = cpu_data(cpu).apicid; unsigned int nshared; - uncore = *per_cpu_ptr(amd_uncore_l2, cpu); + uncore = *per_cpu_ptr(amd_uncore_llc, cpu); cpuid_count(0x8000001d, 2, &eax, &ebx, &ecx, &edx); nshared = ((eax >> 14) & 0xfff) + 1; uncore->id = apicid - (apicid % nshared); - uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_l2); - *per_cpu_ptr(amd_uncore_l2, cpu) = uncore; + uncore = amd_uncore_find_online_sibling(uncore, amd_uncore_llc); + *per_cpu_ptr(amd_uncore_llc, cpu) = uncore; } return 0; @@ -419,8 +419,8 @@ static int amd_uncore_cpu_online(unsigned int cpu) if (amd_uncore_nb) uncore_online(cpu, amd_uncore_nb); - if (amd_uncore_l2) - uncore_online(cpu, amd_uncore_l2); + if (amd_uncore_llc) + uncore_online(cpu, amd_uncore_llc); return 0; } @@ -456,8 +456,8 @@ static int amd_uncore_cpu_down_prepare(unsigned int cpu) if (amd_uncore_nb) uncore_down_prepare(cpu, amd_uncore_nb); - if (amd_uncore_l2) - uncore_down_prepare(cpu, amd_uncore_l2); + if (amd_uncore_llc) + uncore_down_prepare(cpu, amd_uncore_llc); return 0; } @@ -479,8 +479,8 @@ static int amd_uncore_cpu_dead(unsigned int cpu) if (amd_uncore_nb) uncore_dead(cpu, amd_uncore_nb); - if (amd_uncore_l2) - uncore_dead(cpu, amd_uncore_l2); + if (amd_uncore_llc) + uncore_dead(cpu, amd_uncore_llc); return 0; } @@ -510,16 +510,16 @@ static int __init amd_uncore_init(void) } if (boot_cpu_has(X86_FEATURE_PERFCTR_L2)) { - amd_uncore_l2 = alloc_percpu(struct amd_uncore *); - if (!amd_uncore_l2) { + amd_uncore_llc = alloc_percpu(struct amd_uncore *); + if (!amd_uncore_llc) { ret = -ENOMEM; - goto fail_l2; + goto fail_llc; } - ret = perf_pmu_register(&amd_l2_pmu, amd_l2_pmu.name, -1); + ret = perf_pmu_register(&amd_llc_pmu, amd_llc_pmu.name, -1); if (ret) - goto fail_l2; + goto fail_llc; - pr_info("perf: AMD L2I counters detected\n"); + pr_info("perf: AMD LLC counters detected\n"); ret = 0; } @@ -529,7 +529,7 @@ static int __init amd_uncore_init(void) if (cpuhp_setup_state(CPUHP_PERF_X86_AMD_UNCORE_PREP, "perf/x86/amd/uncore:prepare", amd_uncore_cpu_up_prepare, amd_uncore_cpu_dead)) - goto fail_l2; + goto fail_llc; if (cpuhp_setup_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, "perf/x86/amd/uncore:starting", @@ -546,11 +546,11 @@ fail_start: cpuhp_remove_state(CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING); fail_prep: cpuhp_remove_state(CPUHP_PERF_X86_AMD_UNCORE_PREP); -fail_l2: +fail_llc: if (boot_cpu_has(X86_FEATURE_PERFCTR_NB)) perf_pmu_unregister(&amd_nb_pmu); - if (amd_uncore_l2) - free_percpu(amd_uncore_l2); + if (amd_uncore_llc) + free_percpu(amd_uncore_llc); fail_nb: if (amd_uncore_nb) free_percpu(amd_uncore_nb); -- cgit v1.2.3 From bc1daef6b5da574bca0a2ec7f9b4d0c5fe0c7d11 Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Mon, 16 Jan 2017 17:36:22 -0600 Subject: perf/x86/amd/uncore: Update the number of uncore counters This patch updates the AMD uncore driver to support AMD Family17h processors. In Family17h, there are two extra last level cache counters. The maximum available counters is increased and the number of counters for each uncore type is now based on the family. Signed-off-by: Janakarajan Natarajan Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/799f9c5be8963cc209d9169a08f4a2643b748dc7.1484598705.git.Janakarajan.Natarajan@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/uncore.c | 31 ++++++++++++++++++++++++++++--- 1 file changed, 28 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index a53bfbe02c7c..e6a2eb54c4a4 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -22,13 +22,17 @@ #define NUM_COUNTERS_NB 4 #define NUM_COUNTERS_L2 4 -#define MAX_COUNTERS NUM_COUNTERS_NB +#define NUM_COUNTERS_L3 6 +#define MAX_COUNTERS 6 #define RDPMC_BASE_NB 6 #define RDPMC_BASE_LLC 10 #define COUNTER_SHIFT 16 +static int num_counters_llc; +static int num_counters_nb; + static HLIST_HEAD(uncore_unused_list); struct amd_uncore { @@ -303,7 +307,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) if (!uncore_nb) goto fail; uncore_nb->cpu = cpu; - uncore_nb->num_counters = NUM_COUNTERS_NB; + uncore_nb->num_counters = num_counters_nb; uncore_nb->rdpmc_base = RDPMC_BASE_NB; uncore_nb->msr_base = MSR_F15H_NB_PERF_CTL; uncore_nb->active_mask = &amd_nb_active_mask; @@ -317,7 +321,7 @@ static int amd_uncore_cpu_up_prepare(unsigned int cpu) if (!uncore_llc) goto fail; uncore_llc->cpu = cpu; - uncore_llc->num_counters = NUM_COUNTERS_L2; + uncore_llc->num_counters = num_counters_llc; uncore_llc->rdpmc_base = RDPMC_BASE_LLC; uncore_llc->msr_base = MSR_F16H_L2I_PERF_CTL; uncore_llc->active_mask = &amd_llc_active_mask; @@ -492,6 +496,27 @@ static int __init amd_uncore_init(void) if (boot_cpu_data.x86_vendor != X86_VENDOR_AMD) goto fail_nodev; + switch(boot_cpu_data.x86) { + case 23: + /* Family 17h: */ + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L3; + break; + case 22: + /* Family 16h - may change: */ + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L2; + break; + default: + /* + * All prior families have the same number of + * NorthBridge and Last Level Cache counters + */ + num_counters_nb = NUM_COUNTERS_NB; + num_counters_llc = NUM_COUNTERS_L2; + break; + } + if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) goto fail_nodev; -- cgit v1.2.3 From da6adaea2b7ef658c61a557c28508668eac29fe1 Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Mon, 16 Jan 2017 17:36:23 -0600 Subject: perf/x86/amd/uncore: Update sysfs attributes for Family17h processors This patch updates the sysfs attributes for AMD Family17h processors. In Family17h, the event bit position is changed for both the NorthBridge and Last level cache counters. The sysfs attributes are assigned based on the family and the type of the counter. Signed-off-by: Janakarajan Natarajan Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/617570ed3634e804991f95db62c3cf3856a9d2a7.1484598705.git.Janakarajan.Natarajan@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/uncore.c | 77 ++++++++++++++++++++++++++++++++------------ 1 file changed, 56 insertions(+), 21 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/amd/uncore.c b/arch/x86/events/amd/uncore.c index e6a2eb54c4a4..4d1f7f2d9aff 100644 --- a/arch/x86/events/amd/uncore.c +++ b/arch/x86/events/amd/uncore.c @@ -248,30 +248,47 @@ static struct attribute_group amd_uncore_attr_group = { .attrs = amd_uncore_attrs, }; -PMU_FORMAT_ATTR(event, "config:0-7,32-35"); -PMU_FORMAT_ATTR(umask, "config:8-15"); - -static struct attribute *amd_uncore_format_attr[] = { - &format_attr_event.attr, - &format_attr_umask.attr, - NULL, -}; - -static struct attribute_group amd_uncore_format_group = { - .name = "format", - .attrs = amd_uncore_format_attr, +/* + * Similar to PMU_FORMAT_ATTR but allowing for format_attr to be assigned based + * on family + */ +#define AMD_FORMAT_ATTR(_dev, _name, _format) \ +static ssize_t \ +_dev##_show##_name(struct device *dev, \ + struct device_attribute *attr, \ + char *page) \ +{ \ + BUILD_BUG_ON(sizeof(_format) >= PAGE_SIZE); \ + return sprintf(page, _format "\n"); \ +} \ +static struct device_attribute format_attr_##_dev##_name = __ATTR_RO(_dev); + +/* Used for each uncore counter type */ +#define AMD_ATTRIBUTE(_name) \ +static struct attribute *amd_uncore_format_attr_##_name[] = { \ + &format_attr_event_##_name.attr, \ + &format_attr_umask.attr, \ + NULL, \ +}; \ +static struct attribute_group amd_uncore_format_group_##_name = { \ + .name = "format", \ + .attrs = amd_uncore_format_attr_##_name, \ +}; \ +static const struct attribute_group *amd_uncore_attr_groups_##_name[] = { \ + &amd_uncore_attr_group, \ + &amd_uncore_format_group_##_name, \ + NULL, \ }; -static const struct attribute_group *amd_uncore_attr_groups[] = { - &amd_uncore_attr_group, - &amd_uncore_format_group, - NULL, -}; +AMD_FORMAT_ATTR(event, , "config:0-7,32-35"); +AMD_FORMAT_ATTR(umask, , "config:8-15"); +AMD_FORMAT_ATTR(event, _df, "config:0-7,32-35,59-60"); +AMD_FORMAT_ATTR(event, _l3, "config:0-7"); +AMD_ATTRIBUTE(df); +AMD_ATTRIBUTE(l3); static struct pmu amd_nb_pmu = { .task_ctx_nr = perf_invalid_context, - .attr_groups = amd_uncore_attr_groups, - .name = "amd_nb", .event_init = amd_uncore_event_init, .add = amd_uncore_add, .del = amd_uncore_del, @@ -282,8 +299,6 @@ static struct pmu amd_nb_pmu = { static struct pmu amd_llc_pmu = { .task_ctx_nr = perf_invalid_context, - .attr_groups = amd_uncore_attr_groups, - .name = "amd_l2", .event_init = amd_uncore_event_init, .add = amd_uncore_add, .del = amd_uncore_del, @@ -501,11 +516,25 @@ static int __init amd_uncore_init(void) /* Family 17h: */ num_counters_nb = NUM_COUNTERS_NB; num_counters_llc = NUM_COUNTERS_L3; + /* + * For Family17h, the NorthBridge counters are + * re-purposed as Data Fabric counters. Also, support is + * added for L3 counters. The pmus are exported based on + * family as either L2 or L3 and NB or DF. + */ + amd_nb_pmu.name = "amd_df"; + amd_llc_pmu.name = "amd_l3"; + format_attr_event_df.show = &event_show_df; + format_attr_event_l3.show = &event_show_l3; break; case 22: /* Family 16h - may change: */ num_counters_nb = NUM_COUNTERS_NB; num_counters_llc = NUM_COUNTERS_L2; + amd_nb_pmu.name = "amd_nb"; + amd_llc_pmu.name = "amd_l2"; + format_attr_event_df = format_attr_event; + format_attr_event_l3 = format_attr_event; break; default: /* @@ -514,8 +543,14 @@ static int __init amd_uncore_init(void) */ num_counters_nb = NUM_COUNTERS_NB; num_counters_llc = NUM_COUNTERS_L2; + amd_nb_pmu.name = "amd_nb"; + amd_llc_pmu.name = "amd_l2"; + format_attr_event_df = format_attr_event; + format_attr_event_l3 = format_attr_event; break; } + amd_nb_pmu.attr_groups = amd_uncore_attr_groups_df; + amd_llc_pmu.attr_groups = amd_uncore_attr_groups_l3; if (!boot_cpu_has(X86_FEATURE_TOPOEXT)) goto fail_nodev; -- cgit v1.2.3 From 612f0c0b859ee99f800dc88ad470d938d90ad111 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 26 Jan 2017 09:08:19 +0100 Subject: perf/x86/events: Add an AMD-specific Makefile Move the AMD pieces from the generic Makefile so that $ make arch/x86/events/amd/.s can work too. Otherwise you get: $ make arch/x86/events/amd/ibs.s scripts/Makefile.build:44: arch/x86/events/amd/Makefile: No such file or directory make[1]: *** No rule to make target 'arch/x86/events/amd/Makefile'. Stop. Makefile:1636: recipe for target 'arch/x86/events/amd/ibs.s' failed make: *** [arch/x86/events/amd/ibs.s] Error 2 Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/20170126080819.417-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/events/Makefile | 13 +++---------- arch/x86/events/amd/Makefile | 7 +++++++ 2 files changed, 10 insertions(+), 10 deletions(-) create mode 100644 arch/x86/events/amd/Makefile (limited to 'arch/x86') diff --git a/arch/x86/events/Makefile b/arch/x86/events/Makefile index 1d392c39fe56..b8ccdb5c9244 100644 --- a/arch/x86/events/Makefile +++ b/arch/x86/events/Makefile @@ -1,11 +1,4 @@ -obj-y += core.o - -obj-$(CONFIG_CPU_SUP_AMD) += amd/core.o amd/uncore.o -obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += amd/power.o -obj-$(CONFIG_X86_LOCAL_APIC) += amd/ibs.o msr.o -ifdef CONFIG_AMD_IOMMU -obj-$(CONFIG_CPU_SUP_AMD) += amd/iommu.o -endif - -obj-$(CONFIG_CPU_SUP_INTEL) += msr.o +obj-y += core.o +obj-y += amd/ +obj-$(CONFIG_X86_LOCAL_APIC) += msr.o obj-$(CONFIG_CPU_SUP_INTEL) += intel/ diff --git a/arch/x86/events/amd/Makefile b/arch/x86/events/amd/Makefile new file mode 100644 index 000000000000..b1da46f396e0 --- /dev/null +++ b/arch/x86/events/amd/Makefile @@ -0,0 +1,7 @@ +obj-$(CONFIG_CPU_SUP_AMD) += core.o uncore.o +obj-$(CONFIG_PERF_EVENTS_AMD_POWER) += power.o +obj-$(CONFIG_X86_LOCAL_APIC) += ibs.o +ifdef CONFIG_AMD_IOMMU +obj-$(CONFIG_CPU_SUP_AMD) += iommu.o +endif + -- cgit v1.2.3 From 459fbe00693449fade2d1bc802791b081c94edcf Mon Sep 17 00:00:00 2001 From: John Ogness Date: Mon, 30 Jan 2017 09:41:21 +0100 Subject: x86/mm/cpa: Avoid wbinvd() for PREEMPT Although wbinvd() is faster than flushing many individual pages, it blocks the memory bus for "long" periods of time (>100us), thus directly causing unusually large latencies on all CPUs, regardless of any CPU isolation features that may be active. This is an unpriviledged operatation as it is exposed to user space via the graphics subsystem. For 1024 pages, flushing those pages individually can take up to 2200us, but the task remains fully preemptible during that time. Signed-off-by: John Ogness Acked-by: Peter Zijlstra (Intel) Cc: Sebastian Siewior Cc: linux-rt-users Signed-off-by: Thomas Gleixner --- arch/x86/mm/pageattr.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/pageattr.c b/arch/x86/mm/pageattr.c index 5a287e523eab..28d42130243c 100644 --- a/arch/x86/mm/pageattr.c +++ b/arch/x86/mm/pageattr.c @@ -214,7 +214,20 @@ static void cpa_flush_array(unsigned long *start, int numpages, int cache, int in_flags, struct page **pages) { unsigned int i, level; +#ifdef CONFIG_PREEMPT + /* + * Avoid wbinvd() because it causes latencies on all CPUs, + * regardless of any CPU isolation that may be in effect. + * + * This should be extended for CAT enabled systems independent of + * PREEMPT because wbinvd() does not respect the CAT partitions and + * this is exposed to unpriviledged users through the graphics + * subsystem. + */ + unsigned long do_wbinvd = 0; +#else unsigned long do_wbinvd = cache && numpages >= 1024; /* 4M threshold */ +#endif BUG_ON(irqs_disabled()); -- cgit v1.2.3 From 3ad38ceb2769f08d0abd132331a7a6130536a36c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 30 Jan 2017 16:37:11 -0800 Subject: x86/mm: Remove CONFIG_DEBUG_NX_TEST CONFIG_DEBUG_NX_TEST has been broken since CONFIG_DEBUG_SET_MODULE_RONX=y was added in v2.6.37 via: 84e1c6bb38eb ("x86: Add RO/NX protection for loadable kernel modules") since the exception table was then made read-only. Additionally, the manually constructed extables were never fixed when relative extables were introduced in v3.5 via: 706276543b69 ("x86, extable: Switch to relative exception table entries") However, relative extables won't work for test_nx.c, since test instruction memory areas may be more than INT_MAX away from an executable fixup (e.g. stack and heap too far away from executable memory with the fixup). Since clearly no one has been using this code for a while now, and similar tests exist in LKDTM, this should just be removed entirely. Signed-off-by: Kees Cook Cc: Andy Lutomirski Cc: Arjan van de Ven Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Jinbum Park Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170131003711.GA74048@beast Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.debug | 8 --- arch/x86/kernel/Makefile | 1 - arch/x86/kernel/test_nx.c | 173 ---------------------------------------------- 3 files changed, 182 deletions(-) delete mode 100644 arch/x86/kernel/test_nx.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 67eec55093a5..783099f2ac72 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -120,14 +120,6 @@ config DEBUG_SET_MODULE_RONX against certain classes of kernel exploits. If in doubt, say "N". -config DEBUG_NX_TEST - tristate "Testcase for the NX non-executable stack feature" - depends on DEBUG_KERNEL && m - ---help--- - This option enables a testcase for the CPU NX capability - and the software setup of this feature. - If in doubt, say "N" - config DOUBLEFAULT default y bool "Enable doublefault exception handler" if EXPERT diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 581386c7e429..bdcdb3b3a219 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -101,7 +101,6 @@ obj-$(CONFIG_APB_TIMER) += apb_timer.o obj-$(CONFIG_AMD_NB) += amd_nb.o obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o -obj-$(CONFIG_DEBUG_NX_TEST) += test_nx.o obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o diff --git a/arch/x86/kernel/test_nx.c b/arch/x86/kernel/test_nx.c deleted file mode 100644 index a3b875c9e6af..000000000000 --- a/arch/x86/kernel/test_nx.c +++ /dev/null @@ -1,173 +0,0 @@ -/* - * test_nx.c: functional test for NX functionality - * - * (C) Copyright 2008 Intel Corporation - * Author: Arjan van de Ven - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#include -#include -#include - -#include -#include - -extern int rodata_test_data; - -/* - * This file checks 4 things: - * 1) Check if the stack is not executable - * 2) Check if kmalloc memory is not executable - * 3) Check if the .rodata section is not executable - * 4) Check if the .data section of a module is not executable - * - * To do this, the test code tries to execute memory in stack/kmalloc/etc, - * and then checks if the expected trap happens. - * - * Sadly, this implies having a dynamic exception handling table entry. - * ... which can be done (and will make Rusty cry)... but it can only - * be done in a stand-alone module with only 1 entry total. - * (otherwise we'd have to sort and that's just too messy) - */ - - - -/* - * We want to set up an exception handling point on our stack, - * which means a variable value. This function is rather dirty - * and walks the exception table of the module, looking for a magic - * marker and replaces it with a specific function. - */ -static void fudze_exception_table(void *marker, void *new) -{ - struct module *mod = THIS_MODULE; - struct exception_table_entry *extable; - - /* - * Note: This module has only 1 exception table entry, - * so searching and sorting is not needed. If that changes, - * this would be the place to search and re-sort the exception - * table. - */ - if (mod->num_exentries > 1) { - printk(KERN_ERR "test_nx: too many exception table entries!\n"); - printk(KERN_ERR "test_nx: test results are not reliable.\n"); - return; - } - extable = (struct exception_table_entry *)mod->extable; - extable[0].insn = (unsigned long)new; -} - - -/* - * exception tables get their symbols translated so we need - * to use a fake function to put in there, which we can then - * replace at runtime. - */ -void foo_label(void); - -/* - * returns 0 for not-executable, negative for executable - * - * Note: we cannot allow this function to be inlined, because - * that would give us more than 1 exception table entry. - * This in turn would break the assumptions above. - */ -static noinline int test_address(void *address) -{ - unsigned long result; - - /* Set up an exception table entry for our address */ - fudze_exception_table(&foo_label, address); - result = 1; - asm volatile( - "foo_label:\n" - "0: call *%[fake_code]\n" - "1:\n" - ".section .fixup,\"ax\"\n" - "2: mov %[zero], %[rslt]\n" - " ret\n" - ".previous\n" - _ASM_EXTABLE(0b,2b) - : [rslt] "=r" (result) - : [fake_code] "r" (address), [zero] "r" (0UL), "0" (result) - ); - /* change the exception table back for the next round */ - fudze_exception_table(address, &foo_label); - - if (result) - return -ENODEV; - return 0; -} - -static unsigned char test_data = 0xC3; /* 0xC3 is the opcode for "ret" */ - -static int test_NX(void) -{ - int ret = 0; - /* 0xC3 is the opcode for "ret" */ - char stackcode[] = {0xC3, 0x90, 0 }; - char *heap; - - test_data = 0xC3; - - printk(KERN_INFO "Testing NX protection\n"); - - /* Test 1: check if the stack is not executable */ - if (test_address(&stackcode)) { - printk(KERN_ERR "test_nx: stack was executable\n"); - ret = -ENODEV; - } - - - /* Test 2: Check if the heap is executable */ - heap = kmalloc(64, GFP_KERNEL); - if (!heap) - return -ENOMEM; - heap[0] = 0xC3; /* opcode for "ret" */ - - if (test_address(heap)) { - printk(KERN_ERR "test_nx: heap was executable\n"); - ret = -ENODEV; - } - kfree(heap); - - /* - * The following 2 tests currently fail, this needs to get fixed - * Until then, don't run them to avoid too many people getting scared - * by the error message - */ - - /* Test 3: Check if the .rodata section is executable */ - if (rodata_test_data != 0xC3) { - printk(KERN_ERR "test_nx: .rodata marker has invalid value\n"); - ret = -ENODEV; - } else if (test_address(&rodata_test_data)) { - printk(KERN_ERR "test_nx: .rodata section is executable\n"); - ret = -ENODEV; - } - -#if 0 - /* Test 4: Check if the .data section of a module is executable */ - if (test_address(&test_data)) { - printk(KERN_ERR "test_nx: .data section is executable\n"); - ret = -ENODEV; - } - -#endif - return ret; -} - -static void test_exit(void) -{ -} - -module_init(test_NX); -module_exit(test_exit); -MODULE_LICENSE("GPL"); -MODULE_DESCRIPTION("Testcase for the NX infrastructure"); -MODULE_AUTHOR("Arjan van de Ven "); -- cgit v1.2.3 From d6f3609d2b4c6d0eec01f398cb685e50da3e6013 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 28 Jan 2017 12:37:14 -0700 Subject: Drivers: hv: restore hypervcall page cleanup before kexec We need to cleanup the hypercall page before doing kexec/kdump or the new kernel may crash if it tries to use it. Reuse the now-empty hv_cleanup function renaming it to hyperv_cleanup and moving to the arch specific code. Fixes: 8730046c1498 ("Drivers: hv vmbus: Move Hypercall page setup out of common code") Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/hyperv/hv_init.c | 16 ++++++++++++++++ arch/x86/include/asm/mshyperv.h | 1 + drivers/hv/hv.c | 10 ---------- drivers/hv/hyperv_vmbus.h | 2 -- drivers/hv/vmbus_drv.c | 10 +++------- 5 files changed, 20 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index d289bc29d282..d6b018b86c42 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -165,6 +165,22 @@ void hyperv_init(void) clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); } +/* + * This routine is called before kexec/kdump, it does the required cleanup. + */ +void hyperv_cleanup(void) +{ + union hv_x64_msr_hypercall_contents hypercall_msr; + + /* Reset our OS id */ + wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); + + /* Reset the hypercall page */ + hypercall_msr.as_uint64 = 0; + wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); +} +EXPORT_SYMBOL_GPL(hyperv_cleanup); + /* * hv_do_hypercall- Invoke the specified hypercall */ diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 42505d1158d6..f8dc3700de67 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -171,5 +171,6 @@ void hv_remove_crash_handler(void); void hyperv_init(void); void hyperv_report_panic(struct pt_regs *regs); bool hv_is_hypercall_page_setup(void); +void hyperv_cleanup(void); #endif #endif diff --git a/drivers/hv/hv.c b/drivers/hv/hv.c index 60594fa3250d..0f73237bed0a 100644 --- a/drivers/hv/hv.c +++ b/drivers/hv/hv.c @@ -70,16 +70,6 @@ int hv_init(void) return 0; } -/* - * hv_cleanup - Cleanup routine. - * - * This routine is called normally during driver unloading or exiting. - */ -void hv_cleanup(bool crash) -{ - -} - /* * hv_post_message - Post a message using the hypervisor message IPC. * diff --git a/drivers/hv/hyperv_vmbus.h b/drivers/hv/hyperv_vmbus.h index 2463ef93c1f6..86b56b677dc3 100644 --- a/drivers/hv/hyperv_vmbus.h +++ b/drivers/hv/hyperv_vmbus.h @@ -255,8 +255,6 @@ struct hv_ring_buffer_debug_info { extern int hv_init(void); -extern void hv_cleanup(bool crash); - extern int hv_post_message(union hv_connection_id connection_id, enum hv_message_type message_type, void *payload, size_t payload_size); diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index 8e81346114d4..f8ebe13cf251 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -962,7 +962,7 @@ static int vmbus_bus_init(void) ret = bus_register(&hv_bus); if (ret) - goto err_cleanup; + return ret; hv_setup_vmbus_irq(vmbus_isr); @@ -1004,9 +1004,6 @@ err_alloc: bus_unregister(&hv_bus); -err_cleanup: - hv_cleanup(false); - return ret; } @@ -1462,7 +1459,7 @@ static void hv_kexec_handler(void) /* Make sure conn_state is set as hv_synic_cleanup checks for it */ mb(); cpuhp_remove_state(hyperv_cpuhp_online); - hv_cleanup(false); + hyperv_cleanup(); }; static void hv_crash_handler(struct pt_regs *regs) @@ -1475,7 +1472,7 @@ static void hv_crash_handler(struct pt_regs *regs) */ vmbus_connection.conn_state = DISCONNECTED; hv_synic_cleanup(smp_processor_id()); - hv_cleanup(true); + hyperv_cleanup(); }; static int __init hv_acpi_init(void) @@ -1535,7 +1532,6 @@ static void __exit vmbus_exit(void) &hyperv_panic_block); } bus_unregister(&hv_bus); - hv_cleanup(false); for_each_online_cpu(cpu) { tasklet_kill(hv_context.event_dpc[cpu]); } -- cgit v1.2.3 From 5647dbf8f0807a35421bd0232247b02413ef2cab Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 28 Jan 2017 12:37:15 -0700 Subject: Drivers: hv: restore TSC page cleanup before kexec We need to cleanup the TSC page before doing kexec/kdump or the new kernel may crash if it tries to use it. Fixes: 63ed4e0c67df ("Drivers: hv: vmbus: Consolidate all Hyper-V specific clocksource code") Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/hyperv/hv_init.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index d6b018b86c42..b371d0e984a9 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -178,6 +178,10 @@ void hyperv_cleanup(void) /* Reset the hypercall page */ hypercall_msr.as_uint64 = 0; wrmsrl(HV_X64_MSR_HYPERCALL, hypercall_msr.as_uint64); + + /* Reset the TSC page */ + hypercall_msr.as_uint64 = 0; + wrmsrl(HV_X64_MSR_REFERENCE_TSC, hypercall_msr.as_uint64); } EXPORT_SYMBOL_GPL(hyperv_cleanup); -- cgit v1.2.3 From aaaec6fc755447a1d056765b11b24d8ff2b81366 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 19:03:21 +0100 Subject: x86/irq: Make irq activate operations symmetric The recent commit which prevents double activation of interrupts unearthed interesting code in x86. The code (ab)uses irq_domain_activate_irq() to reconfigure an already activated interrupt. That trips over the prevention code now. Fix it by deactivating the interrupt before activating the new configuration. Fixes: 08d85f3ea99f1 "irqdomain: Avoid activating interrupts more than once" Reported-and-tested-by: Mike Galbraith Reported-and-tested-by: Borislav Petkov Signed-off-by: Thomas Gleixner Cc: Andrey Ryabinin Cc: Marc Zyngier Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701311901580.3457@nanos --- arch/x86/kernel/apic/io_apic.c | 2 ++ arch/x86/kernel/hpet.c | 1 + 2 files changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 1e35dd06b090..52f352b063fd 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -2117,6 +2117,7 @@ static inline void __init check_timer(void) if (idx != -1 && irq_trigger(idx)) unmask_ioapic_irq(irq_get_chip_data(0)); } + irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); if (timer_irq_works()) { if (disable_timer_pin_1 > 0) @@ -2138,6 +2139,7 @@ static inline void __init check_timer(void) * legacy devices should be connected to IO APIC #0 */ replace_pin_at_irq_node(data, node, apic1, pin1, apic2, pin2); + irq_domain_deactivate_irq(irq_data); irq_domain_activate_irq(irq_data); legacy_pic->unmask(0); if (timer_irq_works()) { diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index 85e87b46c318..dc6ba5bda9fc 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -352,6 +352,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) } else { struct hpet_dev *hdev = EVT_TO_HPET_DEV(evt); + irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); disable_irq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); -- cgit v1.2.3 From 0becc0ae5b42828785b589f686725ff5bc3b9b25 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 09:37:34 +0100 Subject: x86/mce: Make timer handling more robust Erik reported that on a preproduction hardware a CMCI storm triggers the BUG_ON in add_timer_on(). The reason is that the per CPU MCE timer is started by the CMCI logic before the MCE CPU hotplug callback starts the timer with add_timer_on(). So the timer is already queued which triggers the BUG. Using add_timer_on() is pretty pointless in this code because the timer is strictlty per CPU, initialized as pinned and all operations which arm the timer happen on the CPU to which the timer belongs. Simplify the whole machinery by using mod_timer() instead of add_timer_on() which avoids the problem because mod_timer() can handle already queued timers. Use __start_timer() everywhere so the earliest armed expiry time is preserved. Reported-by: Erik Veijola Tested-by: Borislav Petkov Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Cc: Tony Luck Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1701310936080.3457@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce.c | 31 ++++++++++++------------------- 1 file changed, 12 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c index 00ef43233e03..537c6647d84c 100644 --- a/arch/x86/kernel/cpu/mcheck/mce.c +++ b/arch/x86/kernel/cpu/mcheck/mce.c @@ -1373,20 +1373,15 @@ static unsigned long mce_adjust_timer_default(unsigned long interval) static unsigned long (*mce_adjust_timer)(unsigned long interval) = mce_adjust_timer_default; -static void __restart_timer(struct timer_list *t, unsigned long interval) +static void __start_timer(struct timer_list *t, unsigned long interval) { unsigned long when = jiffies + interval; unsigned long flags; local_irq_save(flags); - if (timer_pending(t)) { - if (time_before(when, t->expires)) - mod_timer(t, when); - } else { - t->expires = round_jiffies(when); - add_timer_on(t, smp_processor_id()); - } + if (!timer_pending(t) || time_before(when, t->expires)) + mod_timer(t, round_jiffies(when)); local_irq_restore(flags); } @@ -1421,7 +1416,7 @@ static void mce_timer_fn(unsigned long data) done: __this_cpu_write(mce_next_interval, iv); - __restart_timer(t, iv); + __start_timer(t, iv); } /* @@ -1432,7 +1427,7 @@ void mce_timer_kick(unsigned long interval) struct timer_list *t = this_cpu_ptr(&mce_timer); unsigned long iv = __this_cpu_read(mce_next_interval); - __restart_timer(t, interval); + __start_timer(t, interval); if (interval < iv) __this_cpu_write(mce_next_interval, interval); @@ -1779,17 +1774,15 @@ static void __mcheck_cpu_clear_vendor(struct cpuinfo_x86 *c) } } -static void mce_start_timer(unsigned int cpu, struct timer_list *t) +static void mce_start_timer(struct timer_list *t) { unsigned long iv = check_interval * HZ; if (mca_cfg.ignore_ce || !iv) return; - per_cpu(mce_next_interval, cpu) = iv; - - t->expires = round_jiffies(jiffies + iv); - add_timer_on(t, cpu); + this_cpu_write(mce_next_interval, iv); + __start_timer(t, iv); } static void __mcheck_cpu_setup_timer(void) @@ -1806,7 +1799,7 @@ static void __mcheck_cpu_init_timer(void) unsigned int cpu = smp_processor_id(); setup_pinned_timer(t, mce_timer_fn, cpu); - mce_start_timer(cpu, t); + mce_start_timer(t); } /* Handle unconfigured int18 (should never happen) */ @@ -2566,7 +2559,7 @@ static int mce_cpu_dead(unsigned int cpu) static int mce_cpu_online(unsigned int cpu) { - struct timer_list *t = &per_cpu(mce_timer, cpu); + struct timer_list *t = this_cpu_ptr(&mce_timer); int ret; mce_device_create(cpu); @@ -2577,13 +2570,13 @@ static int mce_cpu_online(unsigned int cpu) return ret; } mce_reenable_cpu(); - mce_start_timer(cpu, t); + mce_start_timer(t); return 0; } static int mce_cpu_pre_down(unsigned int cpu) { - struct timer_list *t = &per_cpu(mce_timer, cpu); + struct timer_list *t = this_cpu_ptr(&mce_timer); mce_disable_cpu(); del_timer_sync(t); -- cgit v1.2.3 From dd86e373e09fb16b83e8adf5c48c421a4ca76468 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 23:58:38 +0100 Subject: perf/x86/intel/rapl: Make package handling more robust The package management code in RAPL relies on package mapping being available before a CPU is started. This changed with: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") because the ACPI/BIOS information turned out to be unreliable, but that left RAPL in broken state. This was not noticed because on a regular boot all CPUs are online before RAPL is initialized. A possible fix would be to reintroduce the mess which allocates a package data structure in CPU prepare and when it turns out to already exist in starting throw it away later in the CPU online callback. But that's a horrible hack and not required at all because RAPL becomes functional for perf only in the CPU online callback. That's correct because user space is not yet informed about the CPU being onlined, so nothing caan rely on RAPL being available on that particular CPU. Move the allocation to the CPU online callback and simplify the hotplug handling. At this point the package mapping is established and correct. This also adds a missing check for available package data in the event_init() function. Reported-by: Yasuaki Ishimatsu Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Stephane Eranian Cc: Vince Weaver Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") Link: http://lkml.kernel.org/r/20170131230141.212593966@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/events/intel/rapl.c | 60 +++++++++++++++++++------------------------- include/linux/cpuhotplug.h | 1 - 2 files changed, 26 insertions(+), 35 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 17c3564d087a..22ef4f72cf32 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -161,7 +161,13 @@ static u64 rapl_timer_ms; static inline struct rapl_pmu *cpu_to_rapl_pmu(unsigned int cpu) { - return rapl_pmus->pmus[topology_logical_package_id(cpu)]; + unsigned int pkgid = topology_logical_package_id(cpu); + + /* + * The unsigned check also catches the '-1' return value for non + * existent mappings in the topology map. + */ + return pkgid < rapl_pmus->maxpkg ? rapl_pmus->pmus[pkgid] : NULL; } static inline u64 rapl_read_counter(struct perf_event *event) @@ -402,6 +408,8 @@ static int rapl_pmu_event_init(struct perf_event *event) /* must be done before validate_group */ pmu = cpu_to_rapl_pmu(event->cpu); + if (!pmu) + return -EINVAL; event->cpu = pmu->cpu; event->pmu_private = pmu; event->hw.event_base = msr; @@ -585,6 +593,20 @@ static int rapl_cpu_online(unsigned int cpu) struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); int target; + if (!pmu) { + pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); + if (!pmu) + return -ENOMEM; + + raw_spin_lock_init(&pmu->lock); + INIT_LIST_HEAD(&pmu->active_list); + pmu->pmu = &rapl_pmus->pmu; + pmu->timer_interval = ms_to_ktime(rapl_timer_ms); + rapl_hrtimer_init(pmu); + + rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; + } + /* * Check if there is an online cpu in the package which collects rapl * events already. @@ -598,27 +620,6 @@ static int rapl_cpu_online(unsigned int cpu) return 0; } -static int rapl_cpu_prepare(unsigned int cpu) -{ - struct rapl_pmu *pmu = cpu_to_rapl_pmu(cpu); - - if (pmu) - return 0; - - pmu = kzalloc_node(sizeof(*pmu), GFP_KERNEL, cpu_to_node(cpu)); - if (!pmu) - return -ENOMEM; - - raw_spin_lock_init(&pmu->lock); - INIT_LIST_HEAD(&pmu->active_list); - pmu->pmu = &rapl_pmus->pmu; - pmu->timer_interval = ms_to_ktime(rapl_timer_ms); - pmu->cpu = -1; - rapl_hrtimer_init(pmu); - rapl_pmus->pmus[topology_logical_package_id(cpu)] = pmu; - return 0; -} - static int rapl_check_hw_unit(bool apply_quirk) { u64 msr_rapl_power_unit_bits; @@ -803,29 +804,21 @@ static int __init rapl_pmu_init(void) /* * Install callbacks. Core will call them for each online cpu. */ - - ret = cpuhp_setup_state(CPUHP_PERF_X86_RAPL_PREP, "perf/x86/rapl:prepare", - rapl_cpu_prepare, NULL); - if (ret) - goto out; - ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_RAPL_ONLINE, "perf/x86/rapl:online", rapl_cpu_online, rapl_cpu_offline); if (ret) - goto out1; + goto out; ret = perf_pmu_register(&rapl_pmus->pmu, "power", -1); if (ret) - goto out2; + goto out1; rapl_advertise(); return 0; -out2: - cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out1: - cpuhp_remove_state(CPUHP_PERF_X86_RAPL_PREP); + cpuhp_remove_state(CPUHP_AP_PERF_X86_RAPL_ONLINE); out: pr_warn("Initialization failed (%d), disabled\n", ret); cleanup_rapl_pmus(); @@ -836,7 +829,6 @@ module_init(rapl_pmu_init); static void __exit intel_rapl_exit(void) { cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_RAPL_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_PERF_X86_RAPL_PREP); perf_pmu_unregister(&rapl_pmus->pmu); cleanup_rapl_pmus(); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index d936a0021839..8329f3dc592c 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -10,7 +10,6 @@ enum cpuhp_state { CPUHP_PERF_X86_PREPARE, CPUHP_PERF_X86_UNCORE_PREP, CPUHP_PERF_X86_AMD_UNCORE_PREP, - CPUHP_PERF_X86_RAPL_PREP, CPUHP_PERF_BFIN, CPUHP_PERF_POWER, CPUHP_PERF_SUPERH, -- cgit v1.2.3 From 1aa6cfd33df492939b0be15ebdbcff1f8ae5ddb6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 23:58:39 +0100 Subject: perf/x86/intel/uncore: Clean up hotplug conversion fallout The recent conversion to the hotplug state machine kept two mechanisms from the original code: 1) The first_init logic which adds the number of online CPUs in a package to the refcount. That's wrong because the callbacks are executed for all online CPUs. Remove it so the refcounting is correct. 2) The on_each_cpu() call to undo box->init() in the error handling path. That's bogus because when the prepare callback fails no box has been initialized yet. Remove it. Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Stephane Eranian Cc: Vince Weaver Cc: Yasuaki Ishimatsu Fixes: 1a246b9f58c6 ("perf/x86/intel/uncore: Convert to hotplug state machine") Link: http://lkml.kernel.org/r/20170131230141.298032324@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 44 ++++-------------------------------------- 1 file changed, 4 insertions(+), 40 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 8c4ccdc3a3f3..56c5235dcc29 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -764,30 +764,6 @@ static void uncore_pmu_unregister(struct intel_uncore_pmu *pmu) pmu->registered = false; } -static void __uncore_exit_boxes(struct intel_uncore_type *type, int cpu) -{ - struct intel_uncore_pmu *pmu = type->pmus; - struct intel_uncore_box *box; - int i, pkg; - - if (pmu) { - pkg = topology_physical_package_id(cpu); - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (box) - uncore_box_exit(box); - } - } -} - -static void uncore_exit_boxes(void *dummy) -{ - struct intel_uncore_type **types; - - for (types = uncore_msr_uncores; *types; types++) - __uncore_exit_boxes(*types++, smp_processor_id()); -} - static void uncore_free_boxes(struct intel_uncore_pmu *pmu) { int pkg; @@ -1078,22 +1054,12 @@ static int uncore_cpu_dying(unsigned int cpu) return 0; } -static int first_init; - static int uncore_cpu_starting(unsigned int cpu) { struct intel_uncore_type *type, **types = uncore_msr_uncores; struct intel_uncore_pmu *pmu; struct intel_uncore_box *box; - int i, pkg, ncpus = 1; - - if (first_init) { - /* - * On init we get the number of online cpus in the package - * and set refcount for all of them. - */ - ncpus = cpumask_weight(topology_core_cpumask(cpu)); - } + int i, pkg; pkg = topology_logical_package_id(cpu); for (; *types; types++) { @@ -1104,7 +1070,7 @@ static int uncore_cpu_starting(unsigned int cpu) if (!box) continue; /* The first cpu on a package activates the box */ - if (atomic_add_return(ncpus, &box->refcnt) == ncpus) + if (atomic_inc_return(&box->refcnt) == 1) uncore_box_init(box); } } @@ -1408,19 +1374,17 @@ static int __init intel_uncore_init(void) "perf/x86/intel/uncore:prepare", uncore_cpu_prepare, NULL); } - first_init = 1; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, "perf/x86/uncore:starting", uncore_cpu_starting, uncore_cpu_dying); - first_init = 0; + cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, "perf/x86/uncore:online", uncore_event_cpu_online, uncore_event_cpu_offline); return 0; err: - /* Undo box->init_box() */ - on_each_cpu_mask(&uncore_cpu_mask, uncore_exit_boxes, NULL, 1); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); return ret; -- cgit v1.2.3 From fff4b87e594ad3d2e4f51e8d3d86a6f9d3d8b654 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Tue, 31 Jan 2017 23:58:40 +0100 Subject: perf/x86/intel/uncore: Make package handling more robust The package management code in uncore relies on package mapping being available before a CPU is started. This changed with: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") because the ACPI/BIOS information turned out to be unreliable, but that left uncore in broken state. This was not noticed because on a regular boot all CPUs are online before uncore is initialized. Move the allocation to the CPU online callback and simplify the hotplug handling. At this point the package mapping is established and correct. Signed-off-by: Thomas Gleixner Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Sebastian Siewior Cc: Stephane Eranian Cc: Vince Weaver Cc: Yasuaki Ishimatsu Fixes: 9d85eb9119f4 ("x86/smpboot: Make logical package management more robust") Link: http://lkml.kernel.org/r/20170131230141.377156255@linutronix.de Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 196 +++++++++++++++++++---------------------- include/linux/cpuhotplug.h | 2 - 2 files changed, 91 insertions(+), 107 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 56c5235dcc29..1ab45976474d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -100,7 +100,13 @@ ssize_t uncore_event_show(struct kobject *kobj, struct intel_uncore_box *uncore_pmu_to_box(struct intel_uncore_pmu *pmu, int cpu) { - return pmu->boxes[topology_logical_package_id(cpu)]; + unsigned int pkgid = topology_logical_package_id(cpu); + + /* + * The unsigned check also catches the '-1' return value for non + * existent mappings in the topology map. + */ + return pkgid < max_packages ? pmu->boxes[pkgid] : NULL; } u64 uncore_msr_read_counter(struct intel_uncore_box *box, struct perf_event *event) @@ -1034,76 +1040,6 @@ static void uncore_pci_exit(void) } } -static int uncore_cpu_dying(unsigned int cpu) -{ - struct intel_uncore_type *type, **types = uncore_msr_uncores; - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - int i, pkg; - - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (box && atomic_dec_return(&box->refcnt) == 0) - uncore_box_exit(box); - } - } - return 0; -} - -static int uncore_cpu_starting(unsigned int cpu) -{ - struct intel_uncore_type *type, **types = uncore_msr_uncores; - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - int i, pkg; - - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - box = pmu->boxes[pkg]; - if (!box) - continue; - /* The first cpu on a package activates the box */ - if (atomic_inc_return(&box->refcnt) == 1) - uncore_box_init(box); - } - } - - return 0; -} - -static int uncore_cpu_prepare(unsigned int cpu) -{ - struct intel_uncore_type *type, **types = uncore_msr_uncores; - struct intel_uncore_pmu *pmu; - struct intel_uncore_box *box; - int i, pkg; - - pkg = topology_logical_package_id(cpu); - for (; *types; types++) { - type = *types; - pmu = type->pmus; - for (i = 0; i < type->num_boxes; i++, pmu++) { - if (pmu->boxes[pkg]) - continue; - /* First cpu of a package allocates the box */ - box = uncore_alloc_box(type, cpu_to_node(cpu)); - if (!box) - return -ENOMEM; - box->pmu = pmu; - box->pkgid = pkg; - pmu->boxes[pkg] = box; - } - } - return 0; -} - static void uncore_change_type_ctx(struct intel_uncore_type *type, int old_cpu, int new_cpu) { @@ -1143,12 +1079,14 @@ static void uncore_change_context(struct intel_uncore_type **uncores, static int uncore_event_cpu_offline(unsigned int cpu) { - int target; + struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, pkg, target; /* Check if exiting cpu is used for collecting uncore events */ if (!cpumask_test_and_clear_cpu(cpu, &uncore_cpu_mask)) - return 0; - + goto unref; /* Find a new cpu to collect uncore events */ target = cpumask_any_but(topology_core_cpumask(cpu), cpu); @@ -1160,12 +1098,82 @@ static int uncore_event_cpu_offline(unsigned int cpu) uncore_change_context(uncore_msr_uncores, cpu, target); uncore_change_context(uncore_pci_uncores, cpu, target); + +unref: + /* Clear the references */ + pkg = topology_logical_package_id(cpu); + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (box && atomic_dec_return(&box->refcnt) == 0) + uncore_box_exit(box); + } + } return 0; } +static int allocate_boxes(struct intel_uncore_type **types, + unsigned int pkg, unsigned int cpu) +{ + struct intel_uncore_box *box, *tmp; + struct intel_uncore_type *type; + struct intel_uncore_pmu *pmu; + LIST_HEAD(allocated); + int i; + + /* Try to allocate all required boxes */ + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + if (pmu->boxes[pkg]) + continue; + box = uncore_alloc_box(type, cpu_to_node(cpu)); + if (!box) + goto cleanup; + box->pmu = pmu; + box->pkgid = pkg; + list_add(&box->active_list, &allocated); + } + } + /* Install them in the pmus */ + list_for_each_entry_safe(box, tmp, &allocated, active_list) { + list_del_init(&box->active_list); + box->pmu->boxes[pkg] = box; + } + return 0; + +cleanup: + list_for_each_entry_safe(box, tmp, &allocated, active_list) { + list_del_init(&box->active_list); + kfree(box); + } + return -ENOMEM; +} + static int uncore_event_cpu_online(unsigned int cpu) { - int target; + struct intel_uncore_type *type, **types = uncore_msr_uncores; + struct intel_uncore_pmu *pmu; + struct intel_uncore_box *box; + int i, ret, pkg, target; + + pkg = topology_logical_package_id(cpu); + ret = allocate_boxes(types, pkg, cpu); + if (ret) + return ret; + + for (; *types; types++) { + type = *types; + pmu = type->pmus; + for (i = 0; i < type->num_boxes; i++, pmu++) { + box = pmu->boxes[pkg]; + if (!box && atomic_inc_return(&box->refcnt) == 1) + uncore_box_init(box); + } + } /* * Check if there is an online cpu in the package @@ -1355,33 +1363,13 @@ static int __init intel_uncore_init(void) if (cret && pret) return -ENODEV; - /* - * Install callbacks. Core will call them for each online cpu. - * - * The first online cpu of each package allocates and takes - * the refcounts for all other online cpus in that package. - * If msrs are not enabled no allocation is required and - * uncore_cpu_prepare() is not called for each online cpu. - */ - if (!cret) { - ret = cpuhp_setup_state(CPUHP_PERF_X86_UNCORE_PREP, - "perf/x86/intel/uncore:prepare", - uncore_cpu_prepare, NULL); - if (ret) - goto err; - } else { - cpuhp_setup_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP, - "perf/x86/intel/uncore:prepare", - uncore_cpu_prepare, NULL); - } - - cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_STARTING, - "perf/x86/uncore:starting", - uncore_cpu_starting, uncore_cpu_dying); - - cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, - "perf/x86/uncore:online", - uncore_event_cpu_online, uncore_event_cpu_offline); + /* Install hotplug callbacks to setup the targets for each package */ + ret = cpuhp_setup_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE, + "perf/x86/intel/uncore:online", + uncore_event_cpu_online, + uncore_event_cpu_offline); + if (ret) + goto err; return 0; err: @@ -1393,9 +1381,7 @@ module_init(intel_uncore_init); static void __exit intel_uncore_exit(void) { - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_ONLINE); - cpuhp_remove_state_nocalls(CPUHP_AP_PERF_X86_UNCORE_STARTING); - cpuhp_remove_state_nocalls(CPUHP_PERF_X86_UNCORE_PREP); + cpuhp_remove_state(CPUHP_AP_PERF_X86_UNCORE_ONLINE); uncore_types_exit(uncore_msr_uncores); uncore_pci_exit(); } diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 8329f3dc592c..921acaaa1601 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -8,7 +8,6 @@ enum cpuhp_state { CPUHP_CREATE_THREADS, CPUHP_PERF_PREPARE, CPUHP_PERF_X86_PREPARE, - CPUHP_PERF_X86_UNCORE_PREP, CPUHP_PERF_X86_AMD_UNCORE_PREP, CPUHP_PERF_BFIN, CPUHP_PERF_POWER, @@ -85,7 +84,6 @@ enum cpuhp_state { CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, - CPUHP_AP_PERF_X86_UNCORE_STARTING, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, CPUHP_AP_PERF_X86_AMD_IBS_STARTING, -- cgit v1.2.3 From 2bd79f30eea1a7c3082c930a91370bb68435b86d Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 31 Jan 2017 13:21:33 +0000 Subject: efi: Deduplicate efi_file_size() / _read() / _close() There's one ARM, one x86_32 and one x86_64 version which can be folded into a single shared version by masking their differences with the shiny new efi_call_proto() macro. No functional change intended. Signed-off-by: Lukas Wunner Signed-off-by: Matt Fleming Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1485868902-20401-2-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 148 ------------------------- drivers/firmware/efi/libstub/arm-stub.c | 69 ------------ drivers/firmware/efi/libstub/efi-stub-helper.c | 63 +++++++++++ drivers/firmware/efi/libstub/efistub.h | 8 -- 4 files changed, 63 insertions(+), 225 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index ff01c8fc76f7..f1cf284d631e 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -38,154 +38,6 @@ static void setup_boot_services##bits(struct efi_config *c) \ BOOT_SERVICES(32); BOOT_SERVICES(64); -void efi_char16_printk(efi_system_table_t *, efi_char16_t *); - -static efi_status_t -__file_size32(void *__fh, efi_char16_t *filename_16, - void **handle, u64 *file_sz) -{ - efi_file_handle_32_t *h, *fh = __fh; - efi_file_info_t *info; - efi_status_t status; - efi_guid_t info_guid = EFI_FILE_INFO_ID; - u32 info_sz; - - status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, - EFI_FILE_MODE_READ, (u64)0); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to open file: "); - efi_char16_printk(sys_table, filename_16); - efi_printk(sys_table, "\n"); - return status; - } - - *handle = h; - - info_sz = 0; - status = efi_early->call((unsigned long)h->get_info, h, &info_guid, - &info_sz, NULL); - if (status != EFI_BUFFER_TOO_SMALL) { - efi_printk(sys_table, "Failed to get file info size\n"); - return status; - } - -grow: - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - info_sz, (void **)&info); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to alloc mem for file info\n"); - return status; - } - - status = efi_early->call((unsigned long)h->get_info, h, &info_guid, - &info_sz, info); - if (status == EFI_BUFFER_TOO_SMALL) { - efi_call_early(free_pool, info); - goto grow; - } - - *file_sz = info->file_size; - efi_call_early(free_pool, info); - - if (status != EFI_SUCCESS) - efi_printk(sys_table, "Failed to get initrd info\n"); - - return status; -} - -static efi_status_t -__file_size64(void *__fh, efi_char16_t *filename_16, - void **handle, u64 *file_sz) -{ - efi_file_handle_64_t *h, *fh = __fh; - efi_file_info_t *info; - efi_status_t status; - efi_guid_t info_guid = EFI_FILE_INFO_ID; - u64 info_sz; - - status = efi_early->call((unsigned long)fh->open, fh, &h, filename_16, - EFI_FILE_MODE_READ, (u64)0); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to open file: "); - efi_char16_printk(sys_table, filename_16); - efi_printk(sys_table, "\n"); - return status; - } - - *handle = h; - - info_sz = 0; - status = efi_early->call((unsigned long)h->get_info, h, &info_guid, - &info_sz, NULL); - if (status != EFI_BUFFER_TOO_SMALL) { - efi_printk(sys_table, "Failed to get file info size\n"); - return status; - } - -grow: - status = efi_call_early(allocate_pool, EFI_LOADER_DATA, - info_sz, (void **)&info); - if (status != EFI_SUCCESS) { - efi_printk(sys_table, "Failed to alloc mem for file info\n"); - return status; - } - - status = efi_early->call((unsigned long)h->get_info, h, &info_guid, - &info_sz, info); - if (status == EFI_BUFFER_TOO_SMALL) { - efi_call_early(free_pool, info); - goto grow; - } - - *file_sz = info->file_size; - efi_call_early(free_pool, info); - - if (status != EFI_SUCCESS) - efi_printk(sys_table, "Failed to get initrd info\n"); - - return status; -} -efi_status_t -efi_file_size(efi_system_table_t *sys_table, void *__fh, - efi_char16_t *filename_16, void **handle, u64 *file_sz) -{ - if (efi_early->is64) - return __file_size64(__fh, filename_16, handle, file_sz); - - return __file_size32(__fh, filename_16, handle, file_sz); -} - -efi_status_t -efi_file_read(void *handle, unsigned long *size, void *addr) -{ - unsigned long func; - - if (efi_early->is64) { - efi_file_handle_64_t *fh = handle; - - func = (unsigned long)fh->read; - return efi_early->call(func, handle, size, addr); - } else { - efi_file_handle_32_t *fh = handle; - - func = (unsigned long)fh->read; - return efi_early->call(func, handle, size, addr); - } -} - -efi_status_t efi_file_close(void *handle) -{ - if (efi_early->is64) { - efi_file_handle_64_t *fh = handle; - - return efi_early->call((unsigned long)fh->close, handle); - } else { - efi_file_handle_32_t *fh = handle; - - return efi_early->call((unsigned long)fh->close, handle); - } -} - static inline efi_status_t __open_volume32(void *__image, void **__fh) { efi_file_io_interface_t *io; diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index b4f7d78f9e8b..6fca48c9e054 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -91,75 +91,6 @@ efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, return status; } -efi_status_t efi_file_close(void *handle) -{ - efi_file_handle_t *fh = handle; - - return fh->close(handle); -} - -efi_status_t -efi_file_read(void *handle, unsigned long *size, void *addr) -{ - efi_file_handle_t *fh = handle; - - return fh->read(handle, size, addr); -} - - -efi_status_t -efi_file_size(efi_system_table_t *sys_table_arg, void *__fh, - efi_char16_t *filename_16, void **handle, u64 *file_sz) -{ - efi_file_handle_t *h, *fh = __fh; - efi_file_info_t *info; - efi_status_t status; - efi_guid_t info_guid = EFI_FILE_INFO_ID; - unsigned long info_sz; - - status = fh->open(fh, &h, filename_16, EFI_FILE_MODE_READ, (u64)0); - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to open file: "); - efi_char16_printk(sys_table_arg, filename_16); - efi_printk(sys_table_arg, "\n"); - return status; - } - - *handle = h; - - info_sz = 0; - status = h->get_info(h, &info_guid, &info_sz, NULL); - if (status != EFI_BUFFER_TOO_SMALL) { - efi_printk(sys_table_arg, "Failed to get file info size\n"); - return status; - } - -grow: - status = sys_table_arg->boottime->allocate_pool(EFI_LOADER_DATA, - info_sz, (void **)&info); - if (status != EFI_SUCCESS) { - efi_printk(sys_table_arg, "Failed to alloc mem for file info\n"); - return status; - } - - status = h->get_info(h, &info_guid, &info_sz, - info); - if (status == EFI_BUFFER_TOO_SMALL) { - sys_table_arg->boottime->free_pool(info); - goto grow; - } - - *file_sz = info->file_size; - sys_table_arg->boottime->free_pool(info); - - if (status != EFI_SUCCESS) - efi_printk(sys_table_arg, "Failed to get initrd info\n"); - - return status; -} - - - void efi_char16_printk(efi_system_table_t *sys_table_arg, efi_char16_t *str) { diff --git a/drivers/firmware/efi/libstub/efi-stub-helper.c b/drivers/firmware/efi/libstub/efi-stub-helper.c index 757badc1debb..6ee9164251a9 100644 --- a/drivers/firmware/efi/libstub/efi-stub-helper.c +++ b/drivers/firmware/efi/libstub/efi-stub-helper.c @@ -338,6 +338,69 @@ void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, efi_call_early(free_pages, addr, nr_pages); } +static efi_status_t efi_file_size(efi_system_table_t *sys_table_arg, void *__fh, + efi_char16_t *filename_16, void **handle, + u64 *file_sz) +{ + efi_file_handle_t *h, *fh = __fh; + efi_file_info_t *info; + efi_status_t status; + efi_guid_t info_guid = EFI_FILE_INFO_ID; + unsigned long info_sz; + + status = efi_call_proto(efi_file_handle, open, fh, &h, filename_16, + EFI_FILE_MODE_READ, (u64)0); + if (status != EFI_SUCCESS) { + efi_printk(sys_table_arg, "Failed to open file: "); + efi_char16_printk(sys_table_arg, filename_16); + efi_printk(sys_table_arg, "\n"); + return status; + } + + *handle = h; + + info_sz = 0; + status = efi_call_proto(efi_file_handle, get_info, h, &info_guid, + &info_sz, NULL); + if (status != EFI_BUFFER_TOO_SMALL) { + efi_printk(sys_table_arg, "Failed to get file info size\n"); + return status; + } + +grow: + status = efi_call_early(allocate_pool, EFI_LOADER_DATA, + info_sz, (void **)&info); + if (status != EFI_SUCCESS) { + efi_printk(sys_table_arg, "Failed to alloc mem for file info\n"); + return status; + } + + status = efi_call_proto(efi_file_handle, get_info, h, &info_guid, + &info_sz, info); + if (status == EFI_BUFFER_TOO_SMALL) { + efi_call_early(free_pool, info); + goto grow; + } + + *file_sz = info->file_size; + efi_call_early(free_pool, info); + + if (status != EFI_SUCCESS) + efi_printk(sys_table_arg, "Failed to get initrd info\n"); + + return status; +} + +static efi_status_t efi_file_read(void *handle, unsigned long *size, void *addr) +{ + return efi_call_proto(efi_file_handle, read, handle, size, addr); +} + +static efi_status_t efi_file_close(void *handle) +{ + return efi_call_proto(efi_file_handle, close, handle); +} + /* * Parse the ASCII string 'cmdline' for EFI options, denoted by the efi= * option, e.g. efi=nochunk. diff --git a/drivers/firmware/efi/libstub/efistub.h b/drivers/firmware/efi/libstub/efistub.h index 0e2a96b12cb3..71c4d0e3c4ed 100644 --- a/drivers/firmware/efi/libstub/efistub.h +++ b/drivers/firmware/efi/libstub/efistub.h @@ -29,14 +29,6 @@ void efi_char16_printk(efi_system_table_t *, efi_char16_t *); efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, void **__fh); -efi_status_t efi_file_size(efi_system_table_t *sys_table_arg, void *__fh, - efi_char16_t *filename_16, void **handle, - u64 *file_sz); - -efi_status_t efi_file_read(void *handle, unsigned long *size, void *addr); - -efi_status_t efi_file_close(void *handle); - unsigned long get_dram_base(efi_system_table_t *sys_table_arg); efi_status_t allocate_new_fdt_and_exit_boot(efi_system_table_t *sys_table, -- cgit v1.2.3 From db4545d9a7881db0a7e18599e6cd1adbcb93db33 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 31 Jan 2017 13:21:34 +0000 Subject: x86/efi: Deduplicate efi_char16_printk() Eliminate the separate 32-bit and 64x- bit code paths by way of the shiny new efi_call_proto() macro. No functional change intended. Signed-off-by: Lukas Wunner Signed-off-by: Matt Fleming Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1485868902-20401-3-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 26 ++------------------------ include/linux/efi.h | 8 ++++---- 2 files changed, 6 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index f1cf284d631e..6d3aeabbce68 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -101,30 +101,8 @@ efi_open_volume(efi_system_table_t *sys_table, void *__image, void **__fh) void efi_char16_printk(efi_system_table_t *table, efi_char16_t *str) { - unsigned long output_string; - size_t offset; - - if (efi_early->is64) { - struct efi_simple_text_output_protocol_64 *out; - u64 *func; - - offset = offsetof(typeof(*out), output_string); - output_string = efi_early->text_output + offset; - out = (typeof(out))(unsigned long)efi_early->text_output; - func = (u64 *)output_string; - - efi_early->call(*func, out, str); - } else { - struct efi_simple_text_output_protocol_32 *out; - u32 *func; - - offset = offsetof(typeof(*out), output_string); - output_string = efi_early->text_output + offset; - out = (typeof(out))(unsigned long)efi_early->text_output; - func = (u32 *)output_string; - - efi_early->call(*func, out, str); - } + efi_call_proto(efi_simple_text_output_protocol, output_string, + efi_early->text_output, str); } static efi_status_t diff --git a/include/linux/efi.h b/include/linux/efi.h index 5b1af30ece55..6642c4d9d11d 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1240,17 +1240,17 @@ struct efivar_entry { bool deleting; }; -struct efi_simple_text_output_protocol_32 { +typedef struct { u32 reset; u32 output_string; u32 test_string; -}; +} efi_simple_text_output_protocol_32_t; -struct efi_simple_text_output_protocol_64 { +typedef struct { u64 reset; u64 output_string; u64 test_string; -}; +} efi_simple_text_output_protocol_64_t; struct efi_simple_text_output_protocol { void *reset; -- cgit v1.2.3 From 18141e89a76c58101860486fd9cc0999da2eed43 Mon Sep 17 00:00:00 2001 From: Sai Praneeth Date: Tue, 31 Jan 2017 13:21:37 +0000 Subject: x86/efi: Add support for EFI_MEMORY_ATTRIBUTES_TABLE UEFI v2.6 introduces EFI_MEMORY_ATTRIBUTES_TABLE which describes memory protections that may be applied to the EFI Runtime code and data regions by the kernel. This enables the kernel to map these regions more strictly thereby increasing security. Presently, the only valid bits for the attribute field of a memory descriptor are EFI_MEMORY_RO and EFI_MEMORY_XP, hence use these bits to update the mappings in efi_pgd. The UEFI specification recommends to use this feature instead of EFI_PROPERTIES_TABLE and hence while updating EFI mappings we first check for EFI_MEMORY_ATTRIBUTES_TABLE and if it's present we update the mappings according to this table and hence disregarding EFI_PROPERTIES_TABLE even if it's published by the firmware. We consider EFI_PROPERTIES_TABLE only when EFI_MEMORY_ATTRIBUTES_TABLE is absent. Signed-off-by: Sai Praneeth Prakhya Signed-off-by: Matt Fleming Signed-off-by: Ard Biesheuvel Cc: Borislav Petkov Cc: Fenghua Yu Cc: Lee, Chun-Yi Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Ravi Shankar Cc: Ricardo Neri Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1485868902-20401-6-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi_64.c | 64 +++++++++++++++++++++++++++++++++++------- drivers/firmware/efi/memattr.c | 5 +++- 2 files changed, 58 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi_64.c b/arch/x86/platform/efi/efi_64.c index 2f25a363068c..a4695da42d77 100644 --- a/arch/x86/platform/efi/efi_64.c +++ b/arch/x86/platform/efi/efi_64.c @@ -414,10 +414,44 @@ void __init parse_efi_setup(u64 phys_addr, u32 data_len) efi_setup = phys_addr + sizeof(struct setup_data); } -void __init efi_runtime_update_mappings(void) +static int __init efi_update_mappings(efi_memory_desc_t *md, unsigned long pf) { unsigned long pfn; pgd_t *pgd = efi_pgd; + int err1, err2; + + /* Update the 1:1 mapping */ + pfn = md->phys_addr >> PAGE_SHIFT; + err1 = kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf); + if (err1) { + pr_err("Error while updating 1:1 mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } + + err2 = kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf); + if (err2) { + pr_err("Error while updating VA mapping PA 0x%llx -> VA 0x%llx!\n", + md->phys_addr, md->virt_addr); + } + + return err1 || err2; +} + +static int __init efi_update_mem_attr(struct mm_struct *mm, efi_memory_desc_t *md) +{ + unsigned long pf = 0; + + if (md->attribute & EFI_MEMORY_XP) + pf |= _PAGE_NX; + + if (!(md->attribute & EFI_MEMORY_RO)) + pf |= _PAGE_RW; + + return efi_update_mappings(md, pf); +} + +void __init efi_runtime_update_mappings(void) +{ efi_memory_desc_t *md; if (efi_enabled(EFI_OLD_MEMMAP)) { @@ -426,6 +460,24 @@ void __init efi_runtime_update_mappings(void) return; } + /* + * Use the EFI Memory Attribute Table for mapping permissions if it + * exists, since it is intended to supersede EFI_PROPERTIES_TABLE. + */ + if (efi_enabled(EFI_MEM_ATTR)) { + efi_memattr_apply_permissions(NULL, efi_update_mem_attr); + return; + } + + /* + * EFI_MEMORY_ATTRIBUTES_TABLE is intended to replace + * EFI_PROPERTIES_TABLE. So, use EFI_PROPERTIES_TABLE to update + * permissions only if EFI_MEMORY_ATTRIBUTES_TABLE is not + * published by the firmware. Even if we find a buggy implementation of + * EFI_MEMORY_ATTRIBUTES_TABLE, don't fall back to + * EFI_PROPERTIES_TABLE, because of the same reason. + */ + if (!efi_enabled(EFI_NX_PE_DATA)) return; @@ -446,15 +498,7 @@ void __init efi_runtime_update_mappings(void) (md->type != EFI_RUNTIME_SERVICES_CODE)) pf |= _PAGE_RW; - /* Update the 1:1 mapping */ - pfn = md->phys_addr >> PAGE_SHIFT; - if (kernel_map_pages_in_pgd(pgd, pfn, md->phys_addr, md->num_pages, pf)) - pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", - md->phys_addr, md->virt_addr); - - if (kernel_map_pages_in_pgd(pgd, pfn, md->virt_addr, md->num_pages, pf)) - pr_warn("Error mapping PA 0x%llx -> VA 0x%llx!\n", - md->phys_addr, md->virt_addr); + efi_update_mappings(md, pf); } } diff --git a/drivers/firmware/efi/memattr.c b/drivers/firmware/efi/memattr.c index 402197460507..8986757eafaf 100644 --- a/drivers/firmware/efi/memattr.c +++ b/drivers/firmware/efi/memattr.c @@ -175,8 +175,11 @@ int __init efi_memattr_apply_permissions(struct mm_struct *mm, md.phys_addr + size - 1, efi_md_typeattr_format(buf, sizeof(buf), &md)); - if (valid) + if (valid) { ret = fn(mm, &md); + if (ret) + pr_err("Error updating mappings, skipping subsequent md's\n"); + } } memunmap(tbl); return ret; -- cgit v1.2.3 From 7b0a911478c74ca02581d496f732c10e811e894f Mon Sep 17 00:00:00 2001 From: Dave Young Date: Tue, 31 Jan 2017 13:21:40 +0000 Subject: efi/x86: Move the EFI BGRT init code to early init code Before invoking the arch specific handler, efi_mem_reserve() reserves the given memory region through memblock. efi_bgrt_init() will call efi_mem_reserve() after mm_init(), at which time memblock is dead and should not be used anymore. The EFI BGRT code depends on ACPI initialization to get the BGRT ACPI table, so move parsing of the BGRT table to ACPI early boot code to ensure that efi_mem_reserve() in EFI BGRT code still use memblock safely. Tested-by: Bhupesh Sharma Signed-off-by: Dave Young Signed-off-by: Ard Biesheuvel Cc: Len Brown Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Rafael J. Wysocki Cc: Thomas Gleixner Cc: linux-acpi@vger.kernel.org Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1485868902-20401-9-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/acpi/boot.c | 9 ++++++ arch/x86/platform/efi/efi-bgrt.c | 59 +++++++++++++++++----------------------- arch/x86/platform/efi/efi.c | 5 ---- drivers/acpi/bgrt.c | 28 +++++++++++++------ include/linux/efi-bgrt.h | 11 ++++---- init/main.c | 1 - 6 files changed, 59 insertions(+), 54 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 64422f850e95..7ff007ed899d 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include @@ -1557,6 +1558,12 @@ int __init early_acpi_boot_init(void) return 0; } +static int __init acpi_parse_bgrt(struct acpi_table_header *table) +{ + efi_bgrt_init(table); + return 0; +} + int __init acpi_boot_init(void) { /* those are executed after early-quirks are executed */ @@ -1581,6 +1588,8 @@ int __init acpi_boot_init(void) acpi_process_madt(); acpi_table_parse(ACPI_SIG_HPET, acpi_parse_hpet); + if (IS_ENABLED(CONFIG_ACPI_BGRT)) + acpi_table_parse(ACPI_SIG_BGRT, acpi_parse_bgrt); if (!acpi_noirq) x86_init.pci.init = pci_acpi_init; diff --git a/arch/x86/platform/efi/efi-bgrt.c b/arch/x86/platform/efi/efi-bgrt.c index 6aad870e8962..04ca8764f0c0 100644 --- a/arch/x86/platform/efi/efi-bgrt.c +++ b/arch/x86/platform/efi/efi-bgrt.c @@ -19,8 +19,7 @@ #include #include -struct acpi_table_bgrt *bgrt_tab; -void *__initdata bgrt_image; +struct acpi_table_bgrt bgrt_tab; size_t __initdata bgrt_image_size; struct bmp_header { @@ -28,66 +27,58 @@ struct bmp_header { u32 size; } __packed; -void __init efi_bgrt_init(void) +void __init efi_bgrt_init(struct acpi_table_header *table) { - acpi_status status; void *image; struct bmp_header bmp_header; + struct acpi_table_bgrt *bgrt = &bgrt_tab; if (acpi_disabled) return; - status = acpi_get_table("BGRT", 0, - (struct acpi_table_header **)&bgrt_tab); - if (ACPI_FAILURE(status)) - return; - - if (bgrt_tab->header.length < sizeof(*bgrt_tab)) { + if (table->length < sizeof(bgrt_tab)) { pr_notice("Ignoring BGRT: invalid length %u (expected %zu)\n", - bgrt_tab->header.length, sizeof(*bgrt_tab)); + table->length, sizeof(bgrt_tab)); return; } - if (bgrt_tab->version != 1) { + *bgrt = *(struct acpi_table_bgrt *)table; + if (bgrt->version != 1) { pr_notice("Ignoring BGRT: invalid version %u (expected 1)\n", - bgrt_tab->version); - return; + bgrt->version); + goto out; } - if (bgrt_tab->status & 0xfe) { + if (bgrt->status & 0xfe) { pr_notice("Ignoring BGRT: reserved status bits are non-zero %u\n", - bgrt_tab->status); - return; + bgrt->status); + goto out; } - if (bgrt_tab->image_type != 0) { + if (bgrt->image_type != 0) { pr_notice("Ignoring BGRT: invalid image type %u (expected 0)\n", - bgrt_tab->image_type); - return; + bgrt->image_type); + goto out; } - if (!bgrt_tab->image_address) { + if (!bgrt->image_address) { pr_notice("Ignoring BGRT: null image address\n"); - return; + goto out; } - image = memremap(bgrt_tab->image_address, sizeof(bmp_header), MEMREMAP_WB); + image = early_memremap(bgrt->image_address, sizeof(bmp_header)); if (!image) { pr_notice("Ignoring BGRT: failed to map image header memory\n"); - return; + goto out; } memcpy(&bmp_header, image, sizeof(bmp_header)); - memunmap(image); + early_memunmap(image, sizeof(bmp_header)); if (bmp_header.id != 0x4d42) { pr_notice("Ignoring BGRT: Incorrect BMP magic number 0x%x (expected 0x4d42)\n", bmp_header.id); - return; + goto out; } bgrt_image_size = bmp_header.size; + efi_mem_reserve(bgrt->image_address, bgrt_image_size); - bgrt_image = memremap(bgrt_tab->image_address, bmp_header.size, MEMREMAP_WB); - if (!bgrt_image) { - pr_notice("Ignoring BGRT: failed to map image memory\n"); - bgrt_image = NULL; - return; - } - - efi_mem_reserve(bgrt_tab->image_address, bgrt_image_size); + return; +out: + memset(bgrt, 0, sizeof(bgrt_tab)); } diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 274dfc481849..0d4becfc5145 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -542,11 +542,6 @@ void __init efi_init(void) efi_print_memmap(); } -void __init efi_late_init(void) -{ - efi_bgrt_init(); -} - void __init efi_set_executable(efi_memory_desc_t *md, bool executable) { u64 addr, npages; diff --git a/drivers/acpi/bgrt.c b/drivers/acpi/bgrt.c index 75f128e766a9..ca28aa572aa9 100644 --- a/drivers/acpi/bgrt.c +++ b/drivers/acpi/bgrt.c @@ -15,40 +15,41 @@ #include #include +static void *bgrt_image; static struct kobject *bgrt_kobj; static ssize_t show_version(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab->version); + return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab.version); } static DEVICE_ATTR(version, S_IRUGO, show_version, NULL); static ssize_t show_status(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab->status); + return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab.status); } static DEVICE_ATTR(status, S_IRUGO, show_status, NULL); static ssize_t show_type(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab->image_type); + return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab.image_type); } static DEVICE_ATTR(type, S_IRUGO, show_type, NULL); static ssize_t show_xoffset(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab->image_offset_x); + return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab.image_offset_x); } static DEVICE_ATTR(xoffset, S_IRUGO, show_xoffset, NULL); static ssize_t show_yoffset(struct device *dev, struct device_attribute *attr, char *buf) { - return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab->image_offset_y); + return snprintf(buf, PAGE_SIZE, "%d\n", bgrt_tab.image_offset_y); } static DEVICE_ATTR(yoffset, S_IRUGO, show_yoffset, NULL); @@ -84,15 +85,24 @@ static int __init bgrt_init(void) { int ret; - if (!bgrt_image) + if (!bgrt_tab.image_address) return -ENODEV; + bgrt_image = memremap(bgrt_tab.image_address, bgrt_image_size, + MEMREMAP_WB); + if (!bgrt_image) { + pr_notice("Ignoring BGRT: failed to map image memory\n"); + return -ENOMEM; + } + bin_attr_image.private = bgrt_image; bin_attr_image.size = bgrt_image_size; bgrt_kobj = kobject_create_and_add("bgrt", acpi_kobj); - if (!bgrt_kobj) - return -EINVAL; + if (!bgrt_kobj) { + ret = -EINVAL; + goto out_memmap; + } ret = sysfs_create_group(bgrt_kobj, &bgrt_attribute_group); if (ret) @@ -102,6 +112,8 @@ static int __init bgrt_init(void) out_kobject: kobject_put(bgrt_kobj); +out_memmap: + memunmap(bgrt_image); return ret; } device_initcall(bgrt_init); diff --git a/include/linux/efi-bgrt.h b/include/linux/efi-bgrt.h index 051b21fedf68..2fd3993c370b 100644 --- a/include/linux/efi-bgrt.h +++ b/include/linux/efi-bgrt.h @@ -1,20 +1,19 @@ #ifndef _LINUX_EFI_BGRT_H #define _LINUX_EFI_BGRT_H -#ifdef CONFIG_ACPI_BGRT - #include -void efi_bgrt_init(void); +#ifdef CONFIG_ACPI_BGRT + +void efi_bgrt_init(struct acpi_table_header *table); /* The BGRT data itself; only valid if bgrt_image != NULL. */ -extern void *bgrt_image; extern size_t bgrt_image_size; -extern struct acpi_table_bgrt *bgrt_tab; +extern struct acpi_table_bgrt bgrt_tab; #else /* !CONFIG_ACPI_BGRT */ -static inline void efi_bgrt_init(void) {} +static inline void efi_bgrt_init(struct acpi_table_header *table) {} #endif /* !CONFIG_ACPI_BGRT */ diff --git a/init/main.c b/init/main.c index b0c9d6facef9..9648d707eea5 100644 --- a/init/main.c +++ b/init/main.c @@ -663,7 +663,6 @@ asmlinkage __visible void __init start_kernel(void) sfi_init_late(); if (efi_enabled(EFI_RUNTIME_SERVICES)) { - efi_late_init(); efi_free_boot_services(); } -- cgit v1.2.3 From 22c091d02a5422d2825a4fb1af71e5a62f9e4d0f Mon Sep 17 00:00:00 2001 From: Dave Young Date: Tue, 31 Jan 2017 13:21:41 +0000 Subject: efi/x86: Add debug code to print cooked memmap It is not obvious if the reserved boot area are added correctly, add a efi_print_memmap() call to print the new memmap. Tested-by: Nicolai Stange Signed-off-by: Dave Young Signed-off-by: Ard Biesheuvel Reviewed-by: Nicolai Stange Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1485868902-20401-10-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/platform/efi/efi.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/platform/efi/efi.c b/arch/x86/platform/efi/efi.c index 0d4becfc5145..565dff3c9a12 100644 --- a/arch/x86/platform/efi/efi.c +++ b/arch/x86/platform/efi/efi.c @@ -955,6 +955,11 @@ static void __init __efi_enter_virtual_mode(void) return; } + if (efi_enabled(EFI_DBG)) { + pr_info("EFI runtime memory map:\n"); + efi_print_memmap(); + } + BUG_ON(!efi.systab); if (efi_setup_page_tables(pa, 1 << pg_shift)) { -- cgit v1.2.3 From a1cecf2ba78e0a6de00ff99df34b662728535aa5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:22 +0100 Subject: sched/cputime: Introduce special task_cputime_t() API to return old-typed cputime This API returns a task's cputime in cputime_t in order to ease the conversion of cputime internals to use nsecs units instead. Blindly converting all cputime readers to use this API now will later let us convert more smoothly and step by step all these places to use the new nsec based cputime. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-7-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- arch/alpha/kernel/osf_sys.c | 2 +- arch/x86/kernel/apm_32.c | 2 +- drivers/isdn/mISDN/stack.c | 2 +- fs/binfmt_elf.c | 6 +++--- fs/binfmt_elf_fdpic.c | 6 +++--- include/linux/sched.h | 32 ++++++++++++++++++++++++++--- kernel/acct.c | 2 +- kernel/delayacct.c | 4 ++-- kernel/signal.c | 4 ++-- kernel/time/itimer.c | 2 +- kernel/time/posix-cpu-timers.c | 46 +++++++++++++++++++++--------------------- kernel/tsacct.c | 6 +++--- 12 files changed, 70 insertions(+), 44 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 54d8616644e2..0f92438d736b 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1154,7 +1154,7 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) memset(&r, 0, sizeof(r)); switch (who) { case RUSAGE_SELF: - task_cputime(current, &utime, &stime); + task_cputime_t(current, &utime, &stime); utime_jiffies = cputime_to_jiffies(utime); stime_jiffies = cputime_to_jiffies(stime); jiffies_to_timeval32(utime_jiffies, &r.ru_utime); diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 45d44c173cf9..89c84fcdd3c0 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -913,7 +913,7 @@ static int apm_cpu_idle(struct cpuidle_device *dev, unsigned int bucket; recalc: - task_cputime(current, &utime, &stime); + task_cputime_t(current, &utime, &stime); if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; } else if (jiffies_since_last_check > idle_period) { diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index 9cb4b621fbc3..0a3661767531 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -306,7 +306,7 @@ mISDNStackd(void *data) "msg %d sleep %d stopped\n", dev_name(&st->dev->dev), st->msg_cnt, st->sleep_cnt, st->stopped_cnt); - task_cputime(st->thread, &utime, &stime); + task_cputime_t(st->thread, &utime, &stime); printk(KERN_DEBUG "mISDNStackd daemon for %s utime(%ld) stime(%ld)\n", dev_name(&st->dev->dev), utime, stime); diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 422370293cfd..68b915650cae 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1421,19 +1421,19 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_pgrp = task_pgrp_vnr(p); prstatus->pr_sid = task_session_vnr(p); if (thread_group_leader(p)) { - struct task_cputime cputime; + struct task_cputime_t cputime; /* * This is the record for the group leader. It shows the * group-wide total, not its individual thread total. */ - thread_group_cputime(p, &cputime); + thread_group_cputime_t(p, &cputime); cputime_to_timeval(cputime.utime, &prstatus->pr_utime); cputime_to_timeval(cputime.stime, &prstatus->pr_stime); } else { cputime_t utime, stime; - task_cputime(p, &utime, &stime); + task_cputime_t(p, &utime, &stime); cputime_to_timeval(utime, &prstatus->pr_utime); cputime_to_timeval(stime, &prstatus->pr_stime); } diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index d2e36f82c35d..6ccd9df7247a 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1342,19 +1342,19 @@ static void fill_prstatus(struct elf_prstatus *prstatus, prstatus->pr_pgrp = task_pgrp_vnr(p); prstatus->pr_sid = task_session_vnr(p); if (thread_group_leader(p)) { - struct task_cputime cputime; + struct task_cputime_t cputime; /* * This is the record for the group leader. It shows the * group-wide total, not its individual thread total. */ - thread_group_cputime(p, &cputime); + thread_group_cputime_t(p, &cputime); cputime_to_timeval(cputime.utime, &prstatus->pr_utime); cputime_to_timeval(cputime.stime, &prstatus->pr_stime); } else { cputime_t utime, stime; - task_cputime(p, &utime, &stime); + task_cputime_t(p, &utime, &stime); cputime_to_timeval(utime, &prstatus->pr_utime); cputime_to_timeval(stime, &prstatus->pr_stime); } diff --git a/include/linux/sched.h b/include/linux/sched.h index 252ff25983c8..9cc722f77799 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -615,6 +615,13 @@ struct task_cputime { unsigned long long sum_exec_runtime; }; +/* Temporary type to ease cputime_t to nsecs conversion */ +struct task_cputime_t { + cputime_t utime; + cputime_t stime; + unsigned long long sum_exec_runtime; +}; + /* Alternate field names when used to cache expirations. */ #define virt_exp utime #define prof_exp stime @@ -748,7 +755,7 @@ struct signal_struct { struct thread_group_cputimer cputimer; /* Earliest-expiration cache. */ - struct task_cputime cputime_expires; + struct task_cputime_t cputime_expires; #ifdef CONFIG_NO_HZ_FULL atomic_t tick_dep_mask; @@ -1682,7 +1689,7 @@ struct task_struct { /* mm fault and swap info: this can arguably be seen as either mm-specific or thread-specific */ unsigned long min_flt, maj_flt; - struct task_cputime cputime_expires; + struct task_cputime_t cputime_expires; struct list_head cpu_timers[3]; /* process credentials */ @@ -2286,6 +2293,19 @@ static inline void task_cputime_scaled(struct task_struct *t, } #endif +static inline void task_cputime_t(struct task_struct *t, + cputime_t *utime, cputime_t *stime) +{ + task_cputime(t, utime, stime); +} + +static inline void task_cputime_t_scaled(struct task_struct *t, + cputime_t *utimescaled, + cputime_t *stimescaled) +{ + task_cputime_scaled(t, utimescaled, stimescaled); +} + extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); @@ -3499,7 +3519,13 @@ static __always_inline bool need_resched(void) * Thread group CPU time accounting. */ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times); +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times); + +static inline void thread_group_cputime_t(struct task_struct *tsk, + struct task_cputime_t *times) +{ + thread_group_cputime(tsk, (struct task_cputime *)times); +} /* * Reevaluate whether the task has signals pending delivery. diff --git a/kernel/acct.c b/kernel/acct.c index 74963d192c5d..b9b190a8eecf 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -559,7 +559,7 @@ void acct_collect(long exitcode, int group_dead) pacct->ac_flag |= ACORE; if (current->flags & PF_SIGNALED) pacct->ac_flag |= AXSIG; - task_cputime(current, &utime, &stime); + task_cputime_t(current, &utime, &stime); pacct->ac_utime += utime; pacct->ac_stime += stime; pacct->ac_minflt += current->min_flt; diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 435c14a45118..228640f2b3d2 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -87,12 +87,12 @@ int __delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) unsigned long flags, t1; s64 tmp; - task_cputime(tsk, &utime, &stime); + task_cputime_t(tsk, &utime, &stime); tmp = (s64)d->cpu_run_real_total; tmp += cputime_to_nsecs(utime + stime); d->cpu_run_real_total = (tmp < (s64)d->cpu_run_real_total) ? 0 : tmp; - task_cputime_scaled(tsk, &utimescaled, &stimescaled); + task_cputime_t_scaled(tsk, &utimescaled, &stimescaled); tmp = (s64)d->cpu_scaled_run_real_total; tmp += cputime_to_nsecs(utimescaled + stimescaled); d->cpu_scaled_run_real_total = diff --git a/kernel/signal.c b/kernel/signal.c index 3603d93a1968..218048a837ea 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1619,7 +1619,7 @@ bool do_notify_parent(struct task_struct *tsk, int sig) task_uid(tsk)); rcu_read_unlock(); - task_cputime(tsk, &utime, &stime); + task_cputime_t(tsk, &utime, &stime); info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime); info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime); @@ -1704,7 +1704,7 @@ static void do_notify_parent_cldstop(struct task_struct *tsk, info.si_uid = from_kuid_munged(task_cred_xxx(parent, user_ns), task_uid(tsk)); rcu_read_unlock(); - task_cputime(tsk, &utime, &stime); + task_cputime_t(tsk, &utime, &stime); info.si_utime = cputime_to_clock_t(utime); info.si_stime = cputime_to_clock_t(stime); diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index 8c89143f9ebf..f2d5097bcb6d 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -53,7 +53,7 @@ static void get_cpu_itimer(struct task_struct *tsk, unsigned int clock_id, cval = it->expires; cinterval = it->incr; if (cval) { - struct task_cputime cputime; + struct task_cputime_t cputime; cputime_t t; thread_group_cputimer(tsk, &cputime); diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index e9e8c10f0d9a..d53ff711a2a8 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -115,7 +115,7 @@ static void bump_cpu_timer(struct k_itimer *timer, * Checks @cputime to see if all fields are zero. Returns true if all fields * are zero, false if any field is nonzero. */ -static inline int task_cputime_zero(const struct task_cputime *cputime) +static inline int task_cputime_zero(const struct task_cputime_t *cputime) { if (!cputime->utime && !cputime->stime && !cputime->sum_exec_runtime) return 1; @@ -126,7 +126,7 @@ static inline unsigned long long prof_ticks(struct task_struct *p) { cputime_t utime, stime; - task_cputime(p, &utime, &stime); + task_cputime_t(p, &utime, &stime); return cputime_to_expires(utime + stime); } @@ -134,7 +134,7 @@ static inline unsigned long long virt_ticks(struct task_struct *p) { cputime_t utime, stime; - task_cputime(p, &utime, &stime); + task_cputime_t(p, &utime, &stime); return cputime_to_expires(utime); } @@ -210,7 +210,7 @@ retry: } } -static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime *sum) +static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct task_cputime_t *sum) { __update_gt_cputime(&cputime_atomic->utime, sum->utime); __update_gt_cputime(&cputime_atomic->stime, sum->stime); @@ -218,7 +218,7 @@ static void update_gt_cputime(struct task_cputime_atomic *cputime_atomic, struct } /* Sample task_cputime_atomic values in "atomic_timers", store results in "times". */ -static inline void sample_cputime_atomic(struct task_cputime *times, +static inline void sample_cputime_atomic(struct task_cputime_t *times, struct task_cputime_atomic *atomic_times) { times->utime = atomic64_read(&atomic_times->utime); @@ -226,10 +226,10 @@ static inline void sample_cputime_atomic(struct task_cputime *times, times->sum_exec_runtime = atomic64_read(&atomic_times->sum_exec_runtime); } -void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) +void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times) { struct thread_group_cputimer *cputimer = &tsk->signal->cputimer; - struct task_cputime sum; + struct task_cputime_t sum; /* Check if cputimer isn't running. This is accessed without locking. */ if (!READ_ONCE(cputimer->running)) { @@ -238,7 +238,7 @@ void thread_group_cputimer(struct task_struct *tsk, struct task_cputime *times) * values through the TIMER_ABSTIME flag, therefore we have * to synchronize the timer to the clock every time we start it. */ - thread_group_cputime(tsk, &sum); + thread_group_cputime_t(tsk, &sum); update_gt_cputime(&cputimer->cputime_atomic, &sum); /* @@ -262,21 +262,21 @@ static int cpu_clock_sample_group(const clockid_t which_clock, struct task_struct *p, unsigned long long *sample) { - struct task_cputime cputime; + struct task_cputime_t cputime; switch (CPUCLOCK_WHICH(which_clock)) { default: return -EINVAL; case CPUCLOCK_PROF: - thread_group_cputime(p, &cputime); + thread_group_cputime_t(p, &cputime); *sample = cputime_to_expires(cputime.utime + cputime.stime); break; case CPUCLOCK_VIRT: - thread_group_cputime(p, &cputime); + thread_group_cputime_t(p, &cputime); *sample = cputime_to_expires(cputime.utime); break; case CPUCLOCK_SCHED: - thread_group_cputime(p, &cputime); + thread_group_cputime_t(p, &cputime); *sample = cputime.sum_exec_runtime; break; } @@ -466,7 +466,7 @@ static void arm_timer(struct k_itimer *timer) { struct task_struct *p = timer->it.cpu.task; struct list_head *head, *listpos; - struct task_cputime *cputime_expires; + struct task_cputime_t *cputime_expires; struct cpu_timer_list *const nt = &timer->it.cpu; struct cpu_timer_list *next; @@ -562,7 +562,7 @@ static int cpu_timer_sample_group(const clockid_t which_clock, struct task_struct *p, unsigned long long *sample) { - struct task_cputime cputime; + struct task_cputime_t cputime; thread_group_cputimer(p, &cputime); switch (CPUCLOCK_WHICH(which_clock)) { @@ -761,7 +761,7 @@ static void posix_cpu_timer_get(struct k_itimer *timer, struct itimerspec *itp) /* * Protect against sighand release/switch in exit/exec and * also make timer sampling safe if it ends up calling - * thread_group_cputime(). + * thread_group_cputime_t(). */ sighand = lock_task_sighand(p, &flags); if (unlikely(sighand == NULL)) { @@ -826,7 +826,7 @@ static void check_thread_timers(struct task_struct *tsk, { struct list_head *timers = tsk->cpu_timers; struct signal_struct *const sig = tsk->signal; - struct task_cputime *tsk_expires = &tsk->cputime_expires; + struct task_cputime_t *tsk_expires = &tsk->cputime_expires; unsigned long long expires; unsigned long soft; @@ -934,7 +934,7 @@ static void check_process_timers(struct task_struct *tsk, unsigned long long utime, ptime, virt_expires, prof_expires; unsigned long long sum_sched_runtime, sched_expires; struct list_head *timers = sig->cpu_timers; - struct task_cputime cputime; + struct task_cputime_t cputime; unsigned long soft; /* @@ -1037,7 +1037,7 @@ void posix_cpu_timer_schedule(struct k_itimer *timer) } else { /* * Protect arm_timer() and timer sampling in case of call to - * thread_group_cputime(). + * thread_group_cputime_t(). */ sighand = lock_task_sighand(p, &flags); if (unlikely(sighand == NULL)) { @@ -1080,8 +1080,8 @@ out: * Returns true if any field of the former is greater than the corresponding * field of the latter if the latter field is set. Otherwise returns false. */ -static inline int task_cputime_expired(const struct task_cputime *sample, - const struct task_cputime *expires) +static inline int task_cputime_expired(const struct task_cputime_t *sample, + const struct task_cputime_t *expires) { if (expires->utime && sample->utime >= expires->utime) return 1; @@ -1108,9 +1108,9 @@ static inline int fastpath_timer_check(struct task_struct *tsk) struct signal_struct *sig; if (!task_cputime_zero(&tsk->cputime_expires)) { - struct task_cputime task_sample; + struct task_cputime_t task_sample; - task_cputime(tsk, &task_sample.utime, &task_sample.stime); + task_cputime_t(tsk, &task_sample.utime, &task_sample.stime); task_sample.sum_exec_runtime = tsk->se.sum_exec_runtime; if (task_cputime_expired(&task_sample, &tsk->cputime_expires)) return 1; @@ -1133,7 +1133,7 @@ static inline int fastpath_timer_check(struct task_struct *tsk) */ if (READ_ONCE(sig->cputimer.running) && !READ_ONCE(sig->cputimer.checking_timer)) { - struct task_cputime group_sample; + struct task_cputime_t group_sample; sample_cputime_atomic(&group_sample, &sig->cputimer.cputime_atomic); diff --git a/kernel/tsacct.c b/kernel/tsacct.c index f8e26ab963ed..040d0a64d0d1 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -66,11 +66,11 @@ void bacct_add_tsk(struct user_namespace *user_ns, task_tgid_nr_ns(rcu_dereference(tsk->real_parent), pid_ns) : 0; rcu_read_unlock(); - task_cputime(tsk, &utime, &stime); + task_cputime_t(tsk, &utime, &stime); stats->ac_utime = cputime_to_usecs(utime); stats->ac_stime = cputime_to_usecs(stime); - task_cputime_scaled(tsk, &utimescaled, &stimescaled); + task_cputime_t_scaled(tsk, &utimescaled, &stimescaled); stats->ac_utimescaled = cputime_to_usecs(utimescaled); stats->ac_stimescaled = cputime_to_usecs(stimescaled); @@ -159,7 +159,7 @@ void acct_update_integrals(struct task_struct *tsk) unsigned long flags; local_irq_save(flags); - task_cputime(tsk, &utime, &stime); + task_cputime_t(tsk, &utime, &stime); __acct_update_integrals(tsk, utime, stime); local_irq_restore(flags); } -- cgit v1.2.3 From 5613fda9a503cd6137b120298902a34a1386b2c1 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:23 +0100 Subject: sched/cputime: Convert task/group cputime to nsecs Now that most cputime readers use the transition API which return the task cputime in old style cputime_t, we can safely store the cputime in nsecs. This will eventually make cputime statistics less opaque and more granular. Back and forth convertions between cputime_t and nsecs in order to deal with cputime_t random granularity won't be needed anymore. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-8-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- arch/alpha/kernel/osf_sys.c | 4 ++-- arch/powerpc/kernel/time.c | 4 ++-- arch/s390/kernel/vtime.c | 6 ++--- arch/x86/kvm/hyperv.c | 5 +++-- fs/binfmt_elf.c | 11 +++++++-- fs/binfmt_elf_fdpic.c | 4 ++-- fs/proc/array.c | 10 ++++----- include/linux/sched.h | 55 ++++++++++++++++++++++++++++----------------- kernel/exit.c | 4 ++-- kernel/sched/cputime.c | 35 ++++++++++++++--------------- kernel/signal.c | 4 ++-- kernel/sys.c | 16 ++++++------- 12 files changed, 89 insertions(+), 69 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 0f92438d736b..82ccb43b795b 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -1163,8 +1163,8 @@ SYSCALL_DEFINE2(osf_getrusage, int, who, struct rusage32 __user *, ru) r.ru_majflt = current->maj_flt; break; case RUSAGE_CHILDREN: - utime_jiffies = cputime_to_jiffies(current->signal->cutime); - stime_jiffies = cputime_to_jiffies(current->signal->cstime); + utime_jiffies = nsecs_to_jiffies(current->signal->cutime); + stime_jiffies = nsecs_to_jiffies(current->signal->cstime); jiffies_to_timeval32(utime_jiffies, &r.ru_utime); jiffies_to_timeval32(stime_jiffies, &r.ru_stime); r.ru_minflt = current->signal->cmin_flt; diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 02e97305d22b..3cca82e065c9 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -396,7 +396,7 @@ void vtime_flush(struct task_struct *tsk) account_user_time(tsk, acct->utime); if (acct->utime_scaled) - tsk->utimescaled += acct->utime_scaled; + tsk->utimescaled += cputime_to_nsecs(acct->utime_scaled); if (acct->gtime) account_guest_time(tsk, acct->gtime); @@ -411,7 +411,7 @@ void vtime_flush(struct task_struct *tsk) account_system_index_time(tsk, acct->stime, CPUTIME_SYSTEM); if (acct->stime_scaled) - tsk->stimescaled += acct->stime_scaled; + tsk->stimescaled += cputime_to_nsecs(acct->stime_scaled); if (acct->hardirq_time) account_system_index_time(tsk, acct->hardirq_time, CPUTIME_IRQ); diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 0a9e5d67547d..f2fc27491604 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -114,7 +114,7 @@ static void account_system_index_scaled(struct task_struct *p, cputime_t cputime, cputime_t scaled, enum cpu_usage_stat index) { - p->stimescaled += scaled; + p->stimescaled += cputime_to_nsecs(scaled); account_system_index_time(p, cputime, index); } @@ -167,12 +167,12 @@ static int do_account_vtime(struct task_struct *tsk) /* Push account value */ if (user) { account_user_time(tsk, user); - tsk->utimescaled += scale_vtime(user); + tsk->utimescaled += cputime_to_nsecs(scale_vtime(user)); } if (guest) { account_guest_time(tsk, guest); - tsk->utimescaled += scale_vtime(guest); + tsk->utimescaled += cputime_to_nsecs(scale_vtime(guest)); } if (system) diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index 1572c35b4f1a..2ecd7dab4631 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -964,10 +964,11 @@ static int kvm_hv_set_msr_pw(struct kvm_vcpu *vcpu, u32 msr, u64 data, /* Calculate cpu time spent by current task in 100ns units */ static u64 current_task_runtime_100ns(void) { - cputime_t utime, stime; + u64 utime, stime; task_cputime_adjusted(current, &utime, &stime); - return div_u64(cputime_to_nsecs(utime + stime), 100); + + return div_u64(utime + stime, 100); } static int kvm_hv_set_msr(struct kvm_vcpu *vcpu, u32 msr, u64 data, bool host) diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 68b915650cae..6d451936a858 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -1411,6 +1411,8 @@ static void fill_note(struct memelfnote *note, const char *name, int type, static void fill_prstatus(struct elf_prstatus *prstatus, struct task_struct *p, long signr) { + struct timeval tv; + prstatus->pr_info.si_signo = prstatus->pr_cursig = signr; prstatus->pr_sigpend = p->pending.signal.sig[0]; prstatus->pr_sighold = p->blocked.sig[0]; @@ -1437,8 +1439,13 @@ static void fill_prstatus(struct elf_prstatus *prstatus, cputime_to_timeval(utime, &prstatus->pr_utime); cputime_to_timeval(stime, &prstatus->pr_stime); } - cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); - cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); + tv = ns_to_timeval(p->signal->cutime); + prstatus->pr_cutime.tv_sec = tv.tv_sec; + prstatus->pr_cutime.tv_usec = tv.tv_usec; + + tv = ns_to_timeval(p->signal->cstime); + prstatus->pr_cstime.tv_sec = tv.tv_sec; + prstatus->pr_cstime.tv_usec = tv.tv_usec; } static int fill_psinfo(struct elf_prpsinfo *psinfo, struct task_struct *p, diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 6ccd9df7247a..e1f373460257 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -1358,8 +1358,8 @@ static void fill_prstatus(struct elf_prstatus *prstatus, cputime_to_timeval(utime, &prstatus->pr_utime); cputime_to_timeval(stime, &prstatus->pr_stime); } - cputime_to_timeval(p->signal->cutime, &prstatus->pr_cutime); - cputime_to_timeval(p->signal->cstime, &prstatus->pr_cstime); + prstatus->pr_cutime = ns_to_timeval(p->signal->cutime); + prstatus->pr_cstime = ns_to_timeval(p->signal->cstime); prstatus->pr_exec_fdpic_loadmap = p->mm->context.exec_fdpic_loadmap; prstatus->pr_interp_fdpic_loadmap = p->mm->context.interp_fdpic_loadmap; diff --git a/fs/proc/array.c b/fs/proc/array.c index 25b54cf0c042..fe12b519d09b 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -401,7 +401,7 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, unsigned long long start_time; unsigned long cmin_flt = 0, cmaj_flt = 0; unsigned long min_flt = 0, maj_flt = 0; - cputime_t cutime, cstime, utime, stime; + u64 cutime, cstime, utime, stime; u64 cgtime, gtime; unsigned long rsslim = 0; char tcomm[sizeof(task->comm)]; @@ -497,10 +497,10 @@ static int do_task_stat(struct seq_file *m, struct pid_namespace *ns, seq_put_decimal_ull(m, " ", cmin_flt); seq_put_decimal_ull(m, " ", maj_flt); seq_put_decimal_ull(m, " ", cmaj_flt); - seq_put_decimal_ull(m, " ", cputime_to_clock_t(utime)); - seq_put_decimal_ull(m, " ", cputime_to_clock_t(stime)); - seq_put_decimal_ll(m, " ", cputime_to_clock_t(cutime)); - seq_put_decimal_ll(m, " ", cputime_to_clock_t(cstime)); + seq_put_decimal_ull(m, " ", nsec_to_clock_t(utime)); + seq_put_decimal_ull(m, " ", nsec_to_clock_t(stime)); + seq_put_decimal_ll(m, " ", nsec_to_clock_t(cutime)); + seq_put_decimal_ll(m, " ", nsec_to_clock_t(cstime)); seq_put_decimal_ll(m, " ", priority); seq_put_decimal_ll(m, " ", nice); seq_put_decimal_ll(m, " ", num_threads); diff --git a/include/linux/sched.h b/include/linux/sched.h index 9cc722f77799..b7ccc54b35cc 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -585,8 +585,8 @@ struct cpu_itimer { */ struct prev_cputime { #ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE - cputime_t utime; - cputime_t stime; + u64 utime; + u64 stime; raw_spinlock_t lock; #endif }; @@ -601,8 +601,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev) /** * struct task_cputime - collected CPU time counts - * @utime: time spent in user mode, in &cputime_t units - * @stime: time spent in kernel mode, in &cputime_t units + * @utime: time spent in user mode, in nanoseconds + * @stime: time spent in kernel mode, in nanoseconds * @sum_exec_runtime: total time spent on the CPU, in nanoseconds * * This structure groups together three kinds of CPU time that are tracked for @@ -610,8 +610,8 @@ static inline void prev_cputime_init(struct prev_cputime *prev) * these counts together and treat all three of them in parallel. */ struct task_cputime { - cputime_t utime; - cputime_t stime; + u64 utime; + u64 stime; unsigned long long sum_exec_runtime; }; @@ -780,7 +780,7 @@ struct signal_struct { * in __exit_signal, except for the group leader. */ seqlock_t stats_lock; - cputime_t utime, stime, cutime, cstime; + u64 utime, stime, cutime, cstime; u64 gtime; u64 cgtime; struct prev_cputime prev_cputime; @@ -1661,9 +1661,9 @@ struct task_struct { int __user *set_child_tid; /* CLONE_CHILD_SETTID */ int __user *clear_child_tid; /* CLONE_CHILD_CLEARTID */ - cputime_t utime, stime; + u64 utime, stime; #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME - cputime_t utimescaled, stimescaled; + u64 utimescaled, stimescaled; #endif u64 gtime; struct prev_cputime prev_cputime; @@ -2260,11 +2260,11 @@ struct task_struct *try_get_task_struct(struct task_struct **ptask); #ifdef CONFIG_VIRT_CPU_ACCOUNTING_GEN extern void task_cputime(struct task_struct *t, - cputime_t *utime, cputime_t *stime); + u64 *utime, u64 *stime); extern u64 task_gtime(struct task_struct *t); #else static inline void task_cputime(struct task_struct *t, - cputime_t *utime, cputime_t *stime) + u64 *utime, u64 *stime) { *utime = t->utime; *stime = t->stime; @@ -2278,16 +2278,16 @@ static inline u64 task_gtime(struct task_struct *t) #ifdef CONFIG_ARCH_HAS_SCALED_CPUTIME static inline void task_cputime_scaled(struct task_struct *t, - cputime_t *utimescaled, - cputime_t *stimescaled) + u64 *utimescaled, + u64 *stimescaled) { *utimescaled = t->utimescaled; *stimescaled = t->stimescaled; } #else static inline void task_cputime_scaled(struct task_struct *t, - cputime_t *utimescaled, - cputime_t *stimescaled) + u64 *utimescaled, + u64 *stimescaled) { task_cputime(t, utimescaled, stimescaled); } @@ -2296,18 +2296,26 @@ static inline void task_cputime_scaled(struct task_struct *t, static inline void task_cputime_t(struct task_struct *t, cputime_t *utime, cputime_t *stime) { - task_cputime(t, utime, stime); + u64 ut, st; + + task_cputime(t, &ut, &st); + *utime = nsecs_to_cputime(ut); + *stime = nsecs_to_cputime(st); } static inline void task_cputime_t_scaled(struct task_struct *t, cputime_t *utimescaled, cputime_t *stimescaled) { - task_cputime_scaled(t, utimescaled, stimescaled); + u64 ut, st; + + task_cputime_scaled(t, &ut, &st); + *utimescaled = nsecs_to_cputime(ut); + *stimescaled = nsecs_to_cputime(st); } -extern void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); -extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st); +extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); +extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); /* * Per process flags @@ -3522,9 +3530,14 @@ void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times); void thread_group_cputimer(struct task_struct *tsk, struct task_cputime_t *times); static inline void thread_group_cputime_t(struct task_struct *tsk, - struct task_cputime_t *times) + struct task_cputime_t *cputime) { - thread_group_cputime(tsk, (struct task_cputime *)times); + struct task_cputime times; + + thread_group_cputime(tsk, ×); + cputime->utime = nsecs_to_cputime(times.utime); + cputime->stime = nsecs_to_cputime(times.stime); + cputime->sum_exec_runtime = times.sum_exec_runtime; } /* diff --git a/kernel/exit.c b/kernel/exit.c index 8f14b866f9f6..8e5e21338b3a 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -86,7 +86,7 @@ static void __exit_signal(struct task_struct *tsk) bool group_dead = thread_group_leader(tsk); struct sighand_struct *sighand; struct tty_struct *uninitialized_var(tty); - cputime_t utime, stime; + u64 utime, stime; sighand = rcu_dereference_check(tsk->sighand, lockdep_tasklist_lock_is_held()); @@ -1091,7 +1091,7 @@ static int wait_task_zombie(struct wait_opts *wo, struct task_struct *p) struct signal_struct *sig = p->signal; struct signal_struct *psig = current->signal; unsigned long maxrss; - cputime_t tgutime, tgstime; + u64 tgutime, tgstime; /* * The resource counters for the group leader are in its diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 8bcd98e2b821..0bdef50d88bc 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -134,7 +134,7 @@ void account_user_time(struct task_struct *p, cputime_t cputime) int index; /* Add user time to process. */ - p->utime += cputime; + p->utime += cputime_to_nsecs(cputime); account_group_user_time(p, cputime); index = (task_nice(p) > 0) ? CPUTIME_NICE : CPUTIME_USER; @@ -156,7 +156,7 @@ void account_guest_time(struct task_struct *p, cputime_t cputime) u64 *cpustat = kcpustat_this_cpu->cpustat; /* Add guest time to process. */ - p->utime += cputime; + p->utime += cputime_to_nsecs(cputime); account_group_user_time(p, cputime); p->gtime += cputime_to_nsecs(cputime); @@ -180,7 +180,7 @@ void account_system_index_time(struct task_struct *p, cputime_t cputime, enum cpu_usage_stat index) { /* Add system time to process. */ - p->stime += cputime; + p->stime += cputime_to_nsecs(cputime); account_group_system_time(p, cputime); /* Add system time to cpustat. */ @@ -315,7 +315,7 @@ static u64 read_sum_exec_runtime(struct task_struct *t) void thread_group_cputime(struct task_struct *tsk, struct task_cputime *times) { struct signal_struct *sig = tsk->signal; - cputime_t utime, stime; + u64 utime, stime; struct task_struct *t; unsigned int seq, nextseq; unsigned long flags; @@ -465,14 +465,14 @@ void vtime_account_irq_enter(struct task_struct *tsk) EXPORT_SYMBOL_GPL(vtime_account_irq_enter); #endif /* __ARCH_HAS_VTIME_ACCOUNT */ -void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) { *ut = p->utime; *st = p->stime; } EXPORT_SYMBOL_GPL(task_cputime_adjusted); -void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) { struct task_cputime cputime; @@ -543,7 +543,7 @@ void account_idle_ticks(unsigned long ticks) * Perform (stime * rtime) / total, but avoid multiplication overflow by * loosing precision when the numbers are big. */ -static cputime_t scale_stime(u64 stime, u64 rtime, u64 total) +static u64 scale_stime(u64 stime, u64 rtime, u64 total) { u64 scaled; @@ -580,7 +580,7 @@ drop_precision: * followed by a 64/32->64 divide. */ scaled = div_u64((u64) (u32) stime * (u64) (u32) rtime, (u32)total); - return (__force cputime_t) scaled; + return scaled; } /* @@ -605,14 +605,14 @@ drop_precision: */ static void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, - cputime_t *ut, cputime_t *st) + u64 *ut, u64 *st) { - cputime_t rtime, stime, utime; + u64 rtime, stime, utime; unsigned long flags; /* Serialize concurrent callers such that we can honour our guarantees */ raw_spin_lock_irqsave(&prev->lock, flags); - rtime = nsecs_to_cputime(curr->sum_exec_runtime); + rtime = curr->sum_exec_runtime; /* * This is possible under two circumstances: @@ -643,8 +643,7 @@ static void cputime_adjust(struct task_cputime *curr, goto update; } - stime = scale_stime((__force u64)stime, (__force u64)rtime, - (__force u64)(stime + utime)); + stime = scale_stime(stime, rtime, stime + utime); update: /* @@ -677,7 +676,7 @@ out: raw_spin_unlock_irqrestore(&prev->lock, flags); } -void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) { struct task_cputime cputime = { .sum_exec_runtime = p->se.sum_exec_runtime, @@ -688,7 +687,7 @@ void task_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) } EXPORT_SYMBOL_GPL(task_cputime_adjusted); -void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, cputime_t *st) +void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st) { struct task_cputime cputime; @@ -849,9 +848,9 @@ u64 task_gtime(struct task_struct *t) * add up the pending nohz execution time since the last * cputime snapshot. */ -void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) +void task_cputime(struct task_struct *t, u64 *utime, u64 *stime) { - cputime_t delta; + u64 delta; unsigned int seq; if (!vtime_accounting_enabled()) { @@ -870,7 +869,7 @@ void task_cputime(struct task_struct *t, cputime_t *utime, cputime_t *stime) if (t->vtime_snap_whence == VTIME_INACTIVE || is_idle_task(t)) continue; - delta = vtime_delta(t); + delta = cputime_to_nsecs(vtime_delta(t)); /* * Task runs either in user or kernel space, add pending nohz time to diff --git a/kernel/signal.c b/kernel/signal.c index 218048a837ea..b63522193076 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1620,8 +1620,8 @@ bool do_notify_parent(struct task_struct *tsk, int sig) rcu_read_unlock(); task_cputime_t(tsk, &utime, &stime); - info.si_utime = cputime_to_clock_t(utime + tsk->signal->utime); - info.si_stime = cputime_to_clock_t(stime + tsk->signal->stime); + info.si_utime = cputime_to_clock_t(utime + nsecs_to_cputime(tsk->signal->utime)); + info.si_stime = cputime_to_clock_t(stime + nsecs_to_cputime(tsk->signal->stime)); info.si_status = tsk->exit_code & 0x7f; if (tsk->exit_code & 0x80) diff --git a/kernel/sys.c b/kernel/sys.c index 842914ef7de4..7d4a9a6df956 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -881,15 +881,15 @@ SYSCALL_DEFINE0(getegid) void do_sys_times(struct tms *tms) { - cputime_t tgutime, tgstime, cutime, cstime; + u64 tgutime, tgstime, cutime, cstime; thread_group_cputime_adjusted(current, &tgutime, &tgstime); cutime = current->signal->cutime; cstime = current->signal->cstime; - tms->tms_utime = cputime_to_clock_t(tgutime); - tms->tms_stime = cputime_to_clock_t(tgstime); - tms->tms_cutime = cputime_to_clock_t(cutime); - tms->tms_cstime = cputime_to_clock_t(cstime); + tms->tms_utime = nsec_to_clock_t(tgutime); + tms->tms_stime = nsec_to_clock_t(tgstime); + tms->tms_cutime = nsec_to_clock_t(cutime); + tms->tms_cstime = nsec_to_clock_t(cstime); } SYSCALL_DEFINE1(times, struct tms __user *, tbuf) @@ -1544,7 +1544,7 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) { struct task_struct *t; unsigned long flags; - cputime_t tgutime, tgstime, utime, stime; + u64 tgutime, tgstime, utime, stime; unsigned long maxrss = 0; memset((char *)r, 0, sizeof (*r)); @@ -1600,8 +1600,8 @@ static void k_getrusage(struct task_struct *p, int who, struct rusage *r) unlock_task_sighand(p, &flags); out: - cputime_to_timeval(utime, &r->ru_utime); - cputime_to_timeval(stime, &r->ru_stime); + r->ru_utime = ns_to_timeval(utime); + r->ru_stime = ns_to_timeval(stime); if (who != RUSAGE_CHILDREN) { struct mm_struct *mm = get_task_mm(p); -- cgit v1.2.3 From f7dcd63de44219fb5e9a36fc2c0ca23ddd79d01c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:25 +0100 Subject: x86: Convert obsolete cputime type to nsecs Use the new nsec based cputime accessors as part of the whole cputime conversion from cputime_t to nsecs. Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-10-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apm_32.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 89c84fcdd3c0..4a7080c84a5a 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -905,21 +905,21 @@ static int apm_cpu_idle(struct cpuidle_device *dev, { static int use_apm_idle; /* = 0 */ static unsigned int last_jiffies; /* = 0 */ - static unsigned int last_stime; /* = 0 */ - cputime_t stime, utime; + static u64 last_stime; /* = 0 */ + u64 stime, utime; int apm_idle_done = 0; unsigned int jiffies_since_last_check = jiffies - last_jiffies; unsigned int bucket; recalc: - task_cputime_t(current, &utime, &stime); + task_cputime(current, &utime, &stime); if (jiffies_since_last_check > IDLE_CALC_LIMIT) { use_apm_idle = 0; } else if (jiffies_since_last_check > idle_period) { unsigned int idle_percentage; - idle_percentage = cputime_to_jiffies(stime - last_stime); + idle_percentage = nsecs_to_jiffies(stime - last_stime); idle_percentage *= 100; idle_percentage /= jiffies_since_last_check; use_apm_idle = (idle_percentage > idle_threshold); -- cgit v1.2.3 From b672592f022152155fde7db99aafbcf04a2c3ba5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 31 Jan 2017 04:09:51 +0100 Subject: sched/cputime: Remove generic asm headers cputime_t is now only used by two architectures: * powerpc (when CONFIG_VIRT_CPU_ACCOUNTING_NATIVE=y) * s390 And since the core doesn't use it anymore, we don't need any arch support from the others. So we can remove their stub implementations. A final cleanup would be to provide an efficient pure arch implementation of cputime_to_nsec() for s390 and powerpc and finally remove include/linux/cputime.h . Signed-off-by: Frederic Weisbecker Cc: Benjamin Herrenschmidt Cc: Fenghua Yu Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Michael Ellerman Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Stanislaw Gruszka Cc: Thomas Gleixner Cc: Tony Luck Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1485832191-26889-36-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/Kbuild | 1 - arch/arc/include/asm/Kbuild | 1 - arch/arm/include/asm/Kbuild | 1 - arch/arm64/include/asm/Kbuild | 1 - arch/avr32/include/asm/Kbuild | 1 - arch/blackfin/include/asm/Kbuild | 1 - arch/c6x/include/asm/Kbuild | 1 - arch/cris/include/asm/Kbuild | 1 - arch/frv/include/asm/Kbuild | 1 - arch/h8300/include/asm/Kbuild | 1 - arch/hexagon/include/asm/Kbuild | 1 - arch/ia64/include/asm/cputime.h | 4 +--- arch/m32r/include/asm/Kbuild | 1 - arch/m68k/include/asm/Kbuild | 1 - arch/metag/include/asm/Kbuild | 1 - arch/microblaze/include/asm/Kbuild | 1 - arch/mips/include/asm/Kbuild | 1 - arch/mn10300/include/asm/Kbuild | 1 - arch/nios2/include/asm/Kbuild | 1 - arch/openrisc/include/asm/Kbuild | 1 - arch/parisc/include/asm/Kbuild | 1 - arch/powerpc/include/asm/cputime.h | 4 +--- arch/score/include/asm/Kbuild | 1 - arch/sh/include/asm/Kbuild | 1 - arch/sparc/include/asm/Kbuild | 1 - arch/tile/include/asm/Kbuild | 1 - arch/um/include/asm/Kbuild | 1 - arch/unicore32/include/asm/Kbuild | 1 - arch/x86/include/asm/Kbuild | 1 - arch/xtensa/include/asm/Kbuild | 1 - include/asm-generic/cputime.h | 7 ------- include/linux/cputime.h | 2 ++ 32 files changed, 4 insertions(+), 41 deletions(-) delete mode 100644 include/asm-generic/cputime.h (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index bf8475ce85ee..baa152b9348e 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += clkdev.h -generic-y += cputime.h generic-y += exec.h generic-y += export.h generic-y += irq_work.h diff --git a/arch/arc/include/asm/Kbuild b/arch/arc/include/asm/Kbuild index c332604606dd..63a04013d05a 100644 --- a/arch/arc/include/asm/Kbuild +++ b/arch/arc/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += auxvec.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += device.h generic-y += div64.h generic-y += emergency-restart.h diff --git a/arch/arm/include/asm/Kbuild b/arch/arm/include/asm/Kbuild index efb21757d41f..b14e8c7d71bd 100644 --- a/arch/arm/include/asm/Kbuild +++ b/arch/arm/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += bitsperlong.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += early_ioremap.h generic-y += emergency-restart.h diff --git a/arch/arm64/include/asm/Kbuild b/arch/arm64/include/asm/Kbuild index 8365a84c2640..a12f1afc95a3 100644 --- a/arch/arm64/include/asm/Kbuild +++ b/arch/arm64/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += delay.h generic-y += div64.h generic-y += dma.h diff --git a/arch/avr32/include/asm/Kbuild b/arch/avr32/include/asm/Kbuild index 241b9b9729d8..3d7ef2c17a7c 100644 --- a/arch/avr32/include/asm/Kbuild +++ b/arch/avr32/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += clkdev.h -generic-y += cputime.h generic-y += delay.h generic-y += device.h generic-y += div64.h diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index 2fb67b59d188..d6fa60b158be 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += auxvec.h generic-y += bitsperlong.h generic-y += bugs.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 64465e7e2245..4e9f57433f3a 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += barrier.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 1778805f6380..9f19e19bff9d 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -4,7 +4,6 @@ generic-y += barrier.h generic-y += bitsperlong.h generic-y += clkdev.h generic-y += cmpxchg.h -generic-y += cputime.h generic-y += device.h generic-y += div64.h generic-y += errno.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 1fa084cf1a43..0f5b0d5d313c 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += clkdev.h -generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 373cb23301e3..5efd0c87f3c0 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -5,7 +5,6 @@ generic-y += bugs.h generic-y += cacheflush.h generic-y += checksum.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += delay.h generic-y += device.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index db8ddabc6bd2..a43a7c90e4af 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -6,7 +6,6 @@ generic-y += barrier.h generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/ia64/include/asm/cputime.h b/arch/ia64/include/asm/cputime.h index 44bcffc5681c..3d665c0627a8 100644 --- a/arch/ia64/include/asm/cputime.h +++ b/arch/ia64/include/asm/cputime.h @@ -18,9 +18,7 @@ #ifndef __IA64_CPUTIME_H #define __IA64_CPUTIME_H -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -# include -#else +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE extern void arch_vtime_task_switch(struct task_struct *tsk); #endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 860e440611c9..652100b64a71 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -1,6 +1,5 @@ generic-y += clkdev.h -generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h generic-y += kvm_para.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 1f2e5d31cb24..6c76d6c24b3d 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += barrier.h generic-y += bitsperlong.h generic-y += clkdev.h -generic-y += cputime.h generic-y += device.h generic-y += emergency-restart.h generic-y += errno.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index 167150c701d1..d3731f0db73b 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += auxvec.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += dma.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index b0ae88c9fed9..6275eb051801 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += barrier.h generic-y += clkdev.h -generic-y += cputime.h generic-y += device.h generic-y += exec.h generic-y += irq_work.h diff --git a/arch/mips/include/asm/Kbuild b/arch/mips/include/asm/Kbuild index 3269b742a75e..994b1c4392be 100644 --- a/arch/mips/include/asm/Kbuild +++ b/arch/mips/include/asm/Kbuild @@ -1,7 +1,6 @@ # MIPS headers generic-(CONFIG_GENERIC_CSUM) += checksum.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += dma-contiguous.h generic-y += emergency-restart.h diff --git a/arch/mn10300/include/asm/Kbuild b/arch/mn10300/include/asm/Kbuild index 1c8dd0f5cd5d..97f64c723a0c 100644 --- a/arch/mn10300/include/asm/Kbuild +++ b/arch/mn10300/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += barrier.h generic-y += clkdev.h -generic-y += cputime.h generic-y += exec.h generic-y += irq_work.h generic-y += mcs_spinlock.h diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index d63330e88379..35b0e883761a 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -6,7 +6,6 @@ generic-y += bitsperlong.h generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index 2832f031fb11..ef8d1ccc3e45 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -12,7 +12,6 @@ generic-y += checksum.h generic-y += clkdev.h generic-y += cmpxchg-local.h generic-y += cmpxchg.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index 91f53c07f410..4e179d770d69 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += auxvec.h generic-y += barrier.h generic-y += clkdev.h -generic-y += cputime.h generic-y += device.h generic-y += div64.h generic-y += emergency-restart.h diff --git a/arch/powerpc/include/asm/cputime.h b/arch/powerpc/include/asm/cputime.h index 6ec0ba6f1a61..99b541865d8d 100644 --- a/arch/powerpc/include/asm/cputime.h +++ b/arch/powerpc/include/asm/cputime.h @@ -16,9 +16,7 @@ #ifndef __POWERPC_CPUTIME_H #define __POWERPC_CPUTIME_H -#ifndef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE -#include -#else +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include #include diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index a05218ff3fe4..51970bb6c4fe 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -4,7 +4,6 @@ header-y += generic-y += barrier.h generic-y += clkdev.h -generic-y += cputime.h generic-y += irq_work.h generic-y += mcs_spinlock.h generic-y += mm-arch-hooks.h diff --git a/arch/sh/include/asm/Kbuild b/arch/sh/include/asm/Kbuild index 751c3373a92c..cf2a75063b53 100644 --- a/arch/sh/include/asm/Kbuild +++ b/arch/sh/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += bitsperlong.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += delay.h generic-y += div64.h diff --git a/arch/sparc/include/asm/Kbuild b/arch/sparc/include/asm/Kbuild index 0569bfac4afb..e9e837bc3158 100644 --- a/arch/sparc/include/asm/Kbuild +++ b/arch/sparc/include/asm/Kbuild @@ -2,7 +2,6 @@ generic-y += clkdev.h -generic-y += cputime.h generic-y += div64.h generic-y += emergency-restart.h generic-y += exec.h diff --git a/arch/tile/include/asm/Kbuild b/arch/tile/include/asm/Kbuild index 2d1f5638974c..51a339feceac 100644 --- a/arch/tile/include/asm/Kbuild +++ b/arch/tile/include/asm/Kbuild @@ -4,7 +4,6 @@ header-y += ../arch/ generic-y += bug.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += div64.h generic-y += emergency-restart.h generic-y += errno.h diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 052f7f6d0551..90c281cd7e1d 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += barrier.h generic-y += bug.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += delay.h generic-y += device.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 256c45b3ae34..5d51ade89f4c 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -4,7 +4,6 @@ generic-y += auxvec.h generic-y += bitsperlong.h generic-y += bugs.h generic-y += clkdev.h -generic-y += cputime.h generic-y += current.h generic-y += device.h generic-y += div64.h diff --git a/arch/x86/include/asm/Kbuild b/arch/x86/include/asm/Kbuild index 2b892e2313a9..5d6a53fd7521 100644 --- a/arch/x86/include/asm/Kbuild +++ b/arch/x86/include/asm/Kbuild @@ -7,7 +7,6 @@ generated-y += unistd_64_x32.h generated-y += xen-hypercalls.h generic-y += clkdev.h -generic-y += cputime.h generic-y += dma-contiguous.h generic-y += early_ioremap.h generic-y += mcs_spinlock.h diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index b7fbaa56b51a..9e9760b20be5 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -1,7 +1,6 @@ generic-y += bitsperlong.h generic-y += bug.h generic-y += clkdev.h -generic-y += cputime.h generic-y += div64.h generic-y += dma-contiguous.h generic-y += emergency-restart.h diff --git a/include/asm-generic/cputime.h b/include/asm-generic/cputime.h deleted file mode 100644 index 358e54777b56..000000000000 --- a/include/asm-generic/cputime.h +++ /dev/null @@ -1,7 +0,0 @@ -#ifndef _ASM_GENERIC_CPUTIME_H -#define _ASM_GENERIC_CPUTIME_H - -#include -#include - -#endif diff --git a/include/linux/cputime.h b/include/linux/cputime.h index a257d6690621..a691dc4ddc13 100644 --- a/include/linux/cputime.h +++ b/include/linux/cputime.h @@ -1,6 +1,7 @@ #ifndef __LINUX_CPUTIME_H #define __LINUX_CPUTIME_H +#ifdef CONFIG_VIRT_CPU_ACCOUNTING_NATIVE #include #ifndef cputime_to_nsecs @@ -8,4 +9,5 @@ (cputime_to_usecs(__ct) * NSEC_PER_USEC) #endif +#endif /* CONFIG_VIRT_CPU_ACCOUNTING_NATIVE */ #endif /* __LINUX_CPUTIME_H */ -- cgit v1.2.3 From 7243e10689fd17a3e151f41216569295cefa2958 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 14 Jan 2017 09:26:12 +0100 Subject: x86/platform/UV: Clean up the UV APIC code Make it more readable. Acked-by: Thomas Gleixner Cc: Dimitri Sivanich Cc: Linus Torvalds Cc: Mike Travis Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170114082612.GA27842@gmail.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/x2apic_uv_x.c | 500 ++++++++++++++++++------------------- 1 file changed, 244 insertions(+), 256 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 97ea712fc72f..656994ac4677 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -41,15 +41,13 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); -#define PR_DEVEL(fmt, args...) pr_devel("%s: " fmt, __func__, args) +static enum uv_system_type uv_system_type; +static u64 gru_start_paddr, gru_end_paddr; +static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr; +static u64 gru_dist_lmask, gru_dist_umask; +static union uvh_apicid uvh_apicid; -static enum uv_system_type uv_system_type; -static u64 gru_start_paddr, gru_end_paddr; -static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr; -static u64 gru_dist_lmask, gru_dist_umask; -static union uvh_apicid uvh_apicid; - -/* info derived from CPUID */ +/* Information derived from CPUID: */ static struct { unsigned int apicid_shift; unsigned int apicid_mask; @@ -61,21 +59,25 @@ static struct { int uv_min_hub_revision_id; EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); + unsigned int uv_apicid_hibits; EXPORT_SYMBOL_GPL(uv_apicid_hibits); static struct apic apic_x2apic_uv_x; static struct uv_hub_info_s uv_hub_info_node0; -/* Set this to use hardware error handler instead of kernel panic */ +/* Set this to use hardware error handler instead of kernel panic: */ static int disable_uv_undefined_panic = 1; + unsigned long uv_undefined(char *str) { if (likely(!disable_uv_undefined_panic)) panic("UV: error: undefined MMR: %s\n", str); else pr_crit("UV: error: undefined MMR: %s\n", str); - return ~0ul; /* cause a machine fault */ + + /* Cause a machine fault: */ + return ~0ul; } EXPORT_SYMBOL(uv_undefined); @@ -86,18 +88,19 @@ static unsigned long __init uv_early_read_mmr(unsigned long addr) mmr = early_ioremap(UV_LOCAL_MMR_BASE | addr, sizeof(*mmr)); val = *mmr; early_iounmap(mmr, sizeof(*mmr)); + return val; } static inline bool is_GRU_range(u64 start, u64 end) { if (gru_dist_base) { - u64 su = start & gru_dist_umask; /* upper (incl pnode) bits */ - u64 sl = start & gru_dist_lmask; /* base offset bits */ + u64 su = start & gru_dist_umask; /* Upper (incl pnode) bits */ + u64 sl = start & gru_dist_lmask; /* Base offset bits */ u64 eu = end & gru_dist_umask; u64 el = end & gru_dist_lmask; - /* Must reside completely within a single GRU range */ + /* Must reside completely within a single GRU range: */ return (sl == gru_dist_base && el == gru_dist_base && su >= gru_first_node_paddr && su <= gru_last_node_paddr && @@ -141,7 +144,7 @@ static int __init early_get_pnodeid(void) uv_hub_info->hub_revision = uv_min_hub_revision_id; uv_cpuid.pnode_mask = (1 << m_n_config.s.n_skt) - 1; pnode = (node_id.s.node_id >> 1) & uv_cpuid.pnode_mask; - uv_cpuid.gpa_shift = 46; /* default unless changed */ + uv_cpuid.gpa_shift = 46; /* Default unless changed */ pr_info("UV: rev:%d part#:%x nodeid:%04x n_skt:%d pnmsk:%x pn:%x\n", node_id.s.revision, node_id.s.part_number, node_id.s.node_id, @@ -149,11 +152,12 @@ static int __init early_get_pnodeid(void) return pnode; } -/* [copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ -#define SMT_LEVEL 0 /* leaf 0xb SMT level */ -#define INVALID_TYPE 0 /* leaf 0xb sub-leaf types */ -#define SMT_TYPE 1 -#define CORE_TYPE 2 +/* [Copied from arch/x86/kernel/cpu/topology.c:detect_extended_topology()] */ + +#define SMT_LEVEL 0 /* Leaf 0xb SMT level */ +#define INVALID_TYPE 0 /* Leaf 0xb sub-leaf types */ +#define SMT_TYPE 1 +#define CORE_TYPE 2 #define LEAFB_SUBTYPE(ecx) (((ecx) >> 8) & 0xff) #define BITS_SHIFT_NEXT_LEVEL(eax) ((eax) & 0x1f) @@ -167,11 +171,13 @@ static void set_x2apic_bits(void) pr_info("UV: CPU does not have CPUID.11\n"); return; } + cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx); if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE)) { pr_info("UV: CPUID.11 not implemented\n"); return; } + sid_shift = BITS_SHIFT_NEXT_LEVEL(eax); sub_index = 1; do { @@ -182,8 +188,9 @@ static void set_x2apic_bits(void) } sub_index++; } while (LEAFB_SUBTYPE(ecx) != INVALID_TYPE); - uv_cpuid.apicid_shift = 0; - uv_cpuid.apicid_mask = (~(-1 << sid_shift)); + + uv_cpuid.apicid_shift = 0; + uv_cpuid.apicid_mask = (~(-1 << sid_shift)); uv_cpuid.socketid_shift = sid_shift; } @@ -194,10 +201,8 @@ static void __init early_get_apic_socketid_shift(void) set_x2apic_bits(); - pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", - uv_cpuid.apicid_shift, uv_cpuid.apicid_mask); - pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", - uv_cpuid.socketid_shift, uv_cpuid.pnode_mask); + pr_info("UV: apicid_shift:%d apicid_mask:0x%x\n", uv_cpuid.apicid_shift, uv_cpuid.apicid_mask); + pr_info("UV: socketid_shift:%d pnode_mask:0x%x\n", uv_cpuid.socketid_shift, uv_cpuid.pnode_mask); } /* @@ -210,10 +215,8 @@ static void __init uv_set_apicid_hibit(void) union uv1h_lb_target_physical_apic_id_mask_u apicid_mask; if (is_uv1_hub()) { - apicid_mask.v = - uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK); - uv_apicid_hibits = - apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK; + apicid_mask.v = uv_early_read_mmr(UV1H_LB_TARGET_PHYSICAL_APIC_ID_MASK); + uv_apicid_hibits = apicid_mask.s1.bit_enables & UV_APICID_HIBIT_MASK; } } @@ -230,12 +233,12 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) return 0; } - /* Setup early hub type field in uv_hub_info for Node 0 */ + /* Set up early hub type field in uv_hub_info for Node 0 */ uv_cpu_info->p_uv_hub_info = &uv_hub_info_node0; /* * Determine UV arch type. - * SGI: UV100/1000 + * SGI: UV100/1000 * SGI2: UV2000/3000 * SGI3: UV300 (truncated to 4 chars because of different varieties) * SGI4: UV400 (truncated to 4 chars because of different varieties) @@ -251,31 +254,32 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) pnodeid = early_get_pnodeid(); early_get_apic_socketid_shift(); - x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; + + x86_platform.is_untracked_pat_range = uv_is_untracked_pat_range; x86_platform.nmi_init = uv_nmi_init; - if (!strcmp(oem_table_id, "UVX")) { /* most common */ + if (!strcmp(oem_table_id, "UVX")) { + /* This is the most common hardware variant: */ uv_system_type = UV_X2APIC; uv_apic = 0; - } else if (!strcmp(oem_table_id, "UVH")) { /* only UV1 systems */ + } else if (!strcmp(oem_table_id, "UVH")) { + /* Only UV1 systems: */ uv_system_type = UV_NON_UNIQUE_APIC; - __this_cpu_write(x2apic_extra_bits, - pnodeid << uvh_apicid.s.pnode_shift); + __this_cpu_write(x2apic_extra_bits, pnodeid << uvh_apicid.s.pnode_shift); uv_set_apicid_hibit(); uv_apic = 1; - } else if (!strcmp(oem_table_id, "UVL")) { /* only used for */ - uv_system_type = UV_LEGACY_APIC; /* very small systems */ + } else if (!strcmp(oem_table_id, "UVL")) { + /* Only used for very small systems: */ + uv_system_type = UV_LEGACY_APIC; uv_apic = 0; } else { goto badbios; } - pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", - oem_id, oem_table_id, uv_system_type, - uv_min_hub_revision_id, uv_apic); + pr_info("UV: OEM IDs %s/%s, System/HUB Types %d/%d, uv_apic %d\n", oem_id, oem_table_id, uv_system_type, uv_min_hub_revision_id, uv_apic); return uv_apic; @@ -308,16 +312,18 @@ EXPORT_SYMBOL_GPL(uv_possible_blades); unsigned long sn_rtc_cycles_per_second; EXPORT_SYMBOL(sn_rtc_cycles_per_second); -/* the following values are used for the per node hub info struct */ -static __initdata unsigned short *_node_to_pnode; -static __initdata unsigned short _min_socket, _max_socket; -static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len; -static __initdata struct uv_gam_range_entry *uv_gre_table; -static __initdata struct uv_gam_parameters *uv_gp_table; -static __initdata unsigned short *_socket_to_node; -static __initdata unsigned short *_socket_to_pnode; -static __initdata unsigned short *_pnode_to_socket; -static __initdata struct uv_gam_range_s *_gr_table; +/* The following values are used for the per node hub info struct */ +static __initdata unsigned short *_node_to_pnode; +static __initdata unsigned short _min_socket, _max_socket; +static __initdata unsigned short _min_pnode, _max_pnode, _gr_table_len; +static __initdata struct uv_gam_range_entry *uv_gre_table; +static __initdata struct uv_gam_parameters *uv_gp_table; +static __initdata unsigned short *_socket_to_node; +static __initdata unsigned short *_socket_to_pnode; +static __initdata unsigned short *_pnode_to_socket; + +static __initdata struct uv_gam_range_s *_gr_table; + #define SOCK_EMPTY ((unsigned short)~0) extern int uv_hub_info_version(void) @@ -326,7 +332,7 @@ extern int uv_hub_info_version(void) } EXPORT_SYMBOL(uv_hub_info_version); -/* Build GAM range lookup table */ +/* Build GAM range lookup table: */ static __init void build_uv_gr_table(void) { struct uv_gam_range_entry *gre = uv_gre_table; @@ -344,25 +350,24 @@ static __init void build_uv_gr_table(void) for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { if (gre->type == UV_GAM_RANGE_TYPE_HOLE) { - if (!ram_limit) { /* mark hole between ram/non-ram */ + if (!ram_limit) { + /* Mark hole between RAM/non-RAM: */ ram_limit = last_limit; last_limit = gre->limit; lsid++; continue; } last_limit = gre->limit; - pr_info("UV: extra hole in GAM RE table @%d\n", - (int)(gre - uv_gre_table)); + pr_info("UV: extra hole in GAM RE table @%d\n", (int)(gre - uv_gre_table)); continue; } if (_max_socket < gre->sockid) { - pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n", - gre->sockid, _max_socket, - (int)(gre - uv_gre_table)); + pr_err("UV: GAM table sockid(%d) too large(>%d) @%d\n", gre->sockid, _max_socket, (int)(gre - uv_gre_table)); continue; } sid = gre->sockid - _min_socket; - if (lsid < sid) { /* new range */ + if (lsid < sid) { + /* New range: */ grt = &_gr_table[indx]; grt->base = lindx; grt->nasid = gre->nasid; @@ -371,27 +376,32 @@ static __init void build_uv_gr_table(void) lindx = indx++; continue; } - if (lsid == sid && !ram_limit) { /* update range */ - if (grt->limit == last_limit) { /* .. if contiguous */ + /* Update range: */ + if (lsid == sid && !ram_limit) { + /* .. if contiguous: */ + if (grt->limit == last_limit) { grt->limit = last_limit = gre->limit; continue; } } - if (!ram_limit) { /* non-contiguous ram range */ + /* Non-contiguous RAM range: */ + if (!ram_limit) { grt++; grt->base = lindx; grt->nasid = gre->nasid; grt->limit = last_limit = gre->limit; continue; } - grt++; /* non-contiguous/non-ram */ - grt->base = grt - _gr_table; /* base is this entry */ + /* Non-contiguous/non-RAM: */ + grt++; + /* base is this entry */ + grt->base = grt - _gr_table; grt->nasid = gre->nasid; grt->limit = last_limit = gre->limit; lsid++; } - /* shorten table if possible */ + /* Shorten table if possible */ grt++; i = grt - _gr_table; if (i < _gr_table_len) { @@ -405,16 +415,15 @@ static __init void build_uv_gr_table(void) } } - /* display resultant gam range table */ + /* Display resultant GAM range table: */ for (i = 0, grt = _gr_table; i < _gr_table_len; i++, grt++) { + unsigned long start, end; int gb = grt->base; - unsigned long start = gb < 0 ? 0 : - (unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT; - unsigned long end = - (unsigned long)grt->limit << UV_GAM_RANGE_SHFT; - pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n", - i, grt->nasid, start, end, gb); + start = gb < 0 ? 0 : (unsigned long)_gr_table[gb].limit << UV_GAM_RANGE_SHFT; + end = (unsigned long)grt->limit << UV_GAM_RANGE_SHFT; + + pr_info("UV: GAM Range %2d %04x 0x%013lx-0x%013lx (%d)\n", i, grt->nasid, start, end, gb); } } @@ -425,16 +434,19 @@ static int uv_wakeup_secondary(int phys_apicid, unsigned long start_rip) pnode = uv_apicid_to_pnode(phys_apicid); phys_apicid |= uv_apicid_hibits; + val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_INIT; + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); val = (1UL << UVH_IPI_INT_SEND_SHFT) | (phys_apicid << UVH_IPI_INT_APIC_ID_SHFT) | ((start_rip << UVH_IPI_INT_VECTOR_SHFT) >> 12) | APIC_DM_STARTUP; + uv_write_global_mmr64(pnode, UVH_IPI_INT, val); return 0; @@ -568,7 +580,7 @@ static struct apic apic_x2apic_uv_x __ro_after_init = { .apic_id_registered = uv_apic_id_registered, .irq_delivery_mode = dest_Fixed, - .irq_dest_mode = 0, /* physical */ + .irq_dest_mode = 0, /* Physical */ .target_cpus = online_target_cpus, .disable_esr = 0, @@ -629,23 +641,22 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) switch (i) { case 0: m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_0_MMR; - m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR; + m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_0_MMR; break; case 1: m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_1_MMR; - m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR; + m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_1_MMR; break; case 2: m_redirect = UVH_RH_GAM_ALIAS210_REDIRECT_CONFIG_2_MMR; - m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR; + m_overlay = UVH_RH_GAM_ALIAS210_OVERLAY_CONFIG_2_MMR; break; } alias.v = uv_read_local_mmr(m_overlay); if (alias.s.enable && alias.s.base == 0) { *size = (1UL << alias.s.m_alias); redirect.v = uv_read_local_mmr(m_redirect); - *base = (unsigned long)redirect.s.dest_base - << DEST_SHIFT; + *base = (unsigned long)redirect.s.dest_base << DEST_SHIFT; return; } } @@ -654,8 +665,7 @@ static __init void get_lowmem_redirect(unsigned long *base, unsigned long *size) enum map_type {map_wb, map_uc}; -static __init void map_high(char *id, unsigned long base, int pshift, - int bshift, int max_pnode, enum map_type map_type) +static __init void map_high(char *id, unsigned long base, int pshift, int bshift, int max_pnode, enum map_type map_type) { unsigned long bytes, paddr; @@ -680,16 +690,19 @@ static __init void map_gru_distributed(unsigned long c) int nid; gru.v = c; - /* only base bits 42:28 relevant in dist mode */ + + /* Only base bits 42:28 relevant in dist mode */ gru_dist_base = gru.v & 0x000007fff0000000UL; if (!gru_dist_base) { pr_info("UV: Map GRU_DIST base address NULL\n"); return; } + bytes = 1UL << UVH_RH_GAM_GRU_OVERLAY_CONFIG_MMR_BASE_SHFT; gru_dist_lmask = ((1UL << uv_hub_info->m_val) - 1) & ~(bytes - 1); gru_dist_umask = ~((1UL << uv_hub_info->m_val) - 1); gru_dist_base &= gru_dist_lmask; /* Clear bits above M */ + for_each_online_node(nid) { paddr = ((u64)uv_node_to_pnode(nid) << uv_hub_info->m_val) | gru_dist_base; @@ -697,11 +710,12 @@ static __init void map_gru_distributed(unsigned long c) gru_first_node_paddr = min(paddr, gru_first_node_paddr); gru_last_node_paddr = max(paddr, gru_last_node_paddr); } + /* Save upper (63:M) bits of address only for is_GRU_range */ gru_first_node_paddr &= gru_dist_umask; gru_last_node_paddr &= gru_dist_umask; - pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n", - gru_dist_base, gru_first_node_paddr, gru_last_node_paddr); + + pr_debug("UV: Map GRU_DIST base 0x%016llx 0x%016llx - 0x%016llx\n", gru_dist_base, gru_first_node_paddr, gru_last_node_paddr); } static __init void map_gru_high(int max_pnode) @@ -721,6 +735,7 @@ static __init void map_gru_high(int max_pnode) map_gru_distributed(gru.v); return; } + base = (gru.v & mask) >> shift; map_high("GRU", base, shift, shift, max_pnode, map_wb); gru_start_paddr = ((u64)base << shift); @@ -774,8 +789,8 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) id = mmiohs[index].id; overlay.v = uv_read_local_mmr(mmiohs[index].overlay); - pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n", - id, overlay.v, overlay.s3.base, overlay.s3.m_io); + + pr_info("UV: %s overlay 0x%lx base:0x%x m_io:%d\n", id, overlay.v, overlay.s3.base, overlay.s3.m_io); if (!overlay.s3.enable) { pr_info("UV: %s disabled\n", id); return; @@ -786,7 +801,8 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) m_io = overlay.s3.m_io; mmr = mmiohs[index].redirect; n = UV3H_RH_GAM_MMIOH_REDIRECT_CONFIG0_MMR_DEPTH; - min_pnode *= 2; /* convert to NASID */ + /* Convert to NASID: */ + min_pnode *= 2; max_pnode *= 2; max_io = lnasid = fi = li = -1; @@ -795,16 +811,18 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) redirect.v = uv_read_local_mmr(mmr + i * 8); nasid = redirect.s3.nasid; + /* Invalid NASID: */ if (nasid < min_pnode || max_pnode < nasid) - nasid = -1; /* invalid NASID */ + nasid = -1; if (nasid == lnasid) { li = i; - if (i != n-1) /* last entry check */ + /* Last entry check: */ + if (i != n-1) continue; } - /* check if we have a cached (or last) redirect to print */ + /* Check if we have a cached (or last) redirect to print: */ if (lnasid != -1 || (i == n-1 && nasid != -1)) { unsigned long addr1, addr2; int f, l; @@ -816,12 +834,9 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) f = fi; l = li; } - addr1 = (base << shift) + - f * (1ULL << m_io); - addr2 = (base << shift) + - (l + 1) * (1ULL << m_io); - pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n", - id, fi, li, lnasid, addr1, addr2); + addr1 = (base << shift) + f * (1ULL << m_io); + addr2 = (base << shift) + (l + 1) * (1ULL << m_io); + pr_info("UV: %s[%03d..%03d] NASID 0x%04x ADDR 0x%016lx - 0x%016lx\n", id, fi, li, lnasid, addr1, addr2); if (max_io < l) max_io = l; } @@ -829,8 +844,7 @@ static __init void map_mmioh_high_uv3(int index, int min_pnode, int max_pnode) lnasid = nasid; } - pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n", - id, base, shift, m_io, max_io); + pr_info("UV: %s base:0x%lx shift:%d M_IO:%d MAX_IO:%d\n", id, base, shift, m_io, max_io); if (max_io >= 0) map_high(id, base, shift, m_io, max_io, map_uc); @@ -843,36 +857,35 @@ static __init void map_mmioh_high(int min_pnode, int max_pnode) int shift, enable, m_io, n_io; if (is_uv3_hub() || is_uv4_hub()) { - /* Map both MMIOH Regions */ + /* Map both MMIOH regions: */ map_mmioh_high_uv3(0, min_pnode, max_pnode); map_mmioh_high_uv3(1, min_pnode, max_pnode); return; } if (is_uv1_hub()) { - mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR; - shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; - mmioh.v = uv_read_local_mmr(mmr); - enable = !!mmioh.s1.enable; - base = mmioh.s1.base; - m_io = mmioh.s1.m_io; - n_io = mmioh.s1.n_io; + mmr = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR; + shift = UV1H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + mmioh.v = uv_read_local_mmr(mmr); + enable = !!mmioh.s1.enable; + base = mmioh.s1.base; + m_io = mmioh.s1.m_io; + n_io = mmioh.s1.n_io; } else if (is_uv2_hub()) { - mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR; - shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; - mmioh.v = uv_read_local_mmr(mmr); - enable = !!mmioh.s2.enable; - base = mmioh.s2.base; - m_io = mmioh.s2.m_io; - n_io = mmioh.s2.n_io; - } else + mmr = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR; + shift = UV2H_RH_GAM_MMIOH_OVERLAY_CONFIG_MMR_BASE_SHFT; + mmioh.v = uv_read_local_mmr(mmr); + enable = !!mmioh.s2.enable; + base = mmioh.s2.base; + m_io = mmioh.s2.m_io; + n_io = mmioh.s2.n_io; + } else { return; + } if (enable) { max_pnode &= (1 << n_io) - 1; - pr_info( - "UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n", - base, shift, m_io, n_io, max_pnode); + pr_info("UV: base:0x%lx shift:%d N_IO:%d M_IO:%d max_pnode:0x%x\n", base, shift, m_io, n_io, max_pnode); map_high("MMIOH", base, shift, m_io, max_pnode, map_uc); } else { pr_info("UV: MMIOH disabled\n"); @@ -890,16 +903,16 @@ static __init void uv_rtc_init(void) long status; u64 ticks_per_sec; - status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, - &ticks_per_sec); + status = uv_bios_freq_base(BIOS_FREQ_BASE_REALTIME_CLOCK, &ticks_per_sec); + if (status != BIOS_STATUS_SUCCESS || ticks_per_sec < 100000) { - printk(KERN_WARNING - "unable to determine platform RTC clock frequency, " - "guessing.\n"); - /* BIOS gives wrong value for clock freq. so guess */ + pr_warn("UV: unable to determine platform RTC clock frequency, guessing.\n"); + + /* BIOS gives wrong value for clock frequency, so guess: */ sn_rtc_cycles_per_second = 1000000000000UL / 30000UL; - } else + } else { sn_rtc_cycles_per_second = ticks_per_sec; + } } /* @@ -910,19 +923,19 @@ static void uv_heartbeat(unsigned long ignored) struct timer_list *timer = &uv_scir_info->timer; unsigned char bits = uv_scir_info->state; - /* flip heartbeat bit */ + /* Flip heartbeat bit: */ bits ^= SCIR_CPU_HEARTBEAT; - /* is this cpu idle? */ + /* Is this CPU idle? */ if (idle_cpu(raw_smp_processor_id())) bits &= ~SCIR_CPU_ACTIVITY; else bits |= SCIR_CPU_ACTIVITY; - /* update system controller interface reg */ + /* Update system controller interface reg: */ uv_set_scir_bits(bits); - /* enable next timer period */ + /* Enable next timer period: */ mod_timer(timer, jiffies + SCIR_CPU_HB_INTERVAL); } @@ -937,7 +950,7 @@ static int uv_heartbeat_enable(unsigned int cpu) add_timer_on(timer, cpu); uv_cpu_scir_info(cpu)->enabled = 1; - /* also ensure that boot cpu is enabled */ + /* Also ensure that boot CPU is enabled: */ cpu = 0; } return 0; @@ -970,9 +983,11 @@ static __init int uv_init_heartbeat(void) { int cpu; - if (is_uv_system()) + if (is_uv_system()) { for_each_online_cpu(cpu) uv_heartbeat_enable(cpu); + } + return 0; } @@ -981,14 +996,10 @@ late_initcall(uv_init_heartbeat); #endif /* !CONFIG_HOTPLUG_CPU */ /* Direct Legacy VGA I/O traffic to designated IOH */ -int uv_set_vga_state(struct pci_dev *pdev, bool decode, - unsigned int command_bits, u32 flags) +int uv_set_vga_state(struct pci_dev *pdev, bool decode, unsigned int command_bits, u32 flags) { int domain, bus, rc; - PR_DEVEL("devfn %x decode %d cmd %x flags %d\n", - pdev->devfn, decode, command_bits, flags); - if (!(flags & PCI_VGA_STATE_CHANGE_BRIDGE)) return 0; @@ -999,13 +1010,12 @@ int uv_set_vga_state(struct pci_dev *pdev, bool decode, bus = pdev->bus->number; rc = uv_bios_set_legacy_vga_target(decode, domain, bus); - PR_DEVEL("vga decode %d %x:%x, rc: %d\n", decode, domain, bus, rc); return rc; } /* - * Called on each cpu to initialize the per_cpu UV data area. + * Called on each CPU to initialize the per_cpu UV data area. * FIXME: hotplug not supported yet */ void uv_cpu_init(void) @@ -1032,92 +1042,79 @@ static void get_mn(struct mn *mnp) union uvh_rh_gam_config_mmr_u m_n_config; union uv3h_gr0_gam_gr_config_u m_gr_config; - m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR); - mnp->n_val = m_n_config.s.n_skt; + /* Make sure the whole structure is well initialized: */ + memset(mnp, 0, sizeof(*mnp)); + + m_n_config.v = uv_read_local_mmr(UVH_RH_GAM_CONFIG_MMR); + mnp->n_val = m_n_config.s.n_skt; + if (is_uv4_hub()) { - mnp->m_val = 0; - mnp->n_lshift = 0; + mnp->m_val = 0; + mnp->n_lshift = 0; } else if (is_uv3_hub()) { - mnp->m_val = m_n_config.s3.m_skt; - m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG); - mnp->n_lshift = m_gr_config.s3.m_skt; + mnp->m_val = m_n_config.s3.m_skt; + m_gr_config.v = uv_read_local_mmr(UV3H_GR0_GAM_GR_CONFIG); + mnp->n_lshift = m_gr_config.s3.m_skt; } else if (is_uv2_hub()) { - mnp->m_val = m_n_config.s2.m_skt; - mnp->n_lshift = mnp->m_val == 40 ? 40 : 39; + mnp->m_val = m_n_config.s2.m_skt; + mnp->n_lshift = mnp->m_val == 40 ? 40 : 39; } else if (is_uv1_hub()) { - mnp->m_val = m_n_config.s1.m_skt; - mnp->n_lshift = mnp->m_val; + mnp->m_val = m_n_config.s1.m_skt; + mnp->n_lshift = mnp->m_val; } mnp->m_shift = mnp->m_val ? 64 - mnp->m_val : 0; } -void __init uv_init_hub_info(struct uv_hub_info_s *hub_info) +void __init uv_init_hub_info(struct uv_hub_info_s *hi) { - struct mn mn = {0}; /* avoid unitialized warnings */ union uvh_node_id_u node_id; + struct mn mn; get_mn(&mn); - hub_info->m_val = mn.m_val; - hub_info->n_val = mn.n_val; - hub_info->m_shift = mn.m_shift; - hub_info->n_lshift = mn.n_lshift ? mn.n_lshift : 0; - - hub_info->hub_revision = uv_hub_info->hub_revision; - hub_info->pnode_mask = uv_cpuid.pnode_mask; - hub_info->min_pnode = _min_pnode; - hub_info->min_socket = _min_socket; - hub_info->pnode_to_socket = _pnode_to_socket; - hub_info->socket_to_node = _socket_to_node; - hub_info->socket_to_pnode = _socket_to_pnode; - hub_info->gr_table_len = _gr_table_len; - hub_info->gr_table = _gr_table; - hub_info->gpa_mask = mn.m_val ? + hi->gpa_mask = mn.m_val ? (1UL << (mn.m_val + mn.n_val)) - 1 : (1UL << uv_cpuid.gpa_shift) - 1; - node_id.v = uv_read_local_mmr(UVH_NODE_ID); - uv_cpuid.gnode_shift = max_t(unsigned int, - uv_cpuid.gnode_shift, mn.n_val); - hub_info->gnode_extra = - (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1; - - hub_info->gnode_upper = - ((unsigned long)hub_info->gnode_extra << mn.m_val); + hi->m_val = mn.m_val; + hi->n_val = mn.n_val; + hi->m_shift = mn.m_shift; + hi->n_lshift = mn.n_lshift ? mn.n_lshift : 0; + hi->hub_revision = uv_hub_info->hub_revision; + hi->pnode_mask = uv_cpuid.pnode_mask; + hi->min_pnode = _min_pnode; + hi->min_socket = _min_socket; + hi->pnode_to_socket = _pnode_to_socket; + hi->socket_to_node = _socket_to_node; + hi->socket_to_pnode = _socket_to_pnode; + hi->gr_table_len = _gr_table_len; + hi->gr_table = _gr_table; + + node_id.v = uv_read_local_mmr(UVH_NODE_ID); + uv_cpuid.gnode_shift = max_t(unsigned int, uv_cpuid.gnode_shift, mn.n_val); + hi->gnode_extra = (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1; + hi->gnode_upper = (unsigned long)hi->gnode_extra << mn.m_val; if (uv_gp_table) { - hub_info->global_mmr_base = uv_gp_table->mmr_base; - hub_info->global_mmr_shift = uv_gp_table->mmr_shift; - hub_info->global_gru_base = uv_gp_table->gru_base; - hub_info->global_gru_shift = uv_gp_table->gru_shift; - hub_info->gpa_shift = uv_gp_table->gpa_shift; - hub_info->gpa_mask = (1UL << hub_info->gpa_shift) - 1; + hi->global_mmr_base = uv_gp_table->mmr_base; + hi->global_mmr_shift = uv_gp_table->mmr_shift; + hi->global_gru_base = uv_gp_table->gru_base; + hi->global_gru_shift = uv_gp_table->gru_shift; + hi->gpa_shift = uv_gp_table->gpa_shift; + hi->gpa_mask = (1UL << hi->gpa_shift) - 1; } else { - hub_info->global_mmr_base = - uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & - ~UV_MMR_ENABLE; - hub_info->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT; + hi->global_mmr_base = uv_read_local_mmr(UVH_RH_GAM_MMR_OVERLAY_CONFIG_MMR) & ~UV_MMR_ENABLE; + hi->global_mmr_shift = _UV_GLOBAL_MMR64_PNODE_SHIFT; } - get_lowmem_redirect( - &hub_info->lowmem_remap_base, &hub_info->lowmem_remap_top); - - hub_info->apic_pnode_shift = uv_cpuid.socketid_shift; + get_lowmem_redirect(&hi->lowmem_remap_base, &hi->lowmem_remap_top); - /* show system specific info */ - pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n", - hub_info->n_val, hub_info->m_val, - hub_info->m_shift, hub_info->n_lshift); + hi->apic_pnode_shift = uv_cpuid.socketid_shift; - pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n", - hub_info->gpa_mask, hub_info->gpa_shift, - hub_info->pnode_mask, hub_info->apic_pnode_shift); - - pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n", - hub_info->global_mmr_base, hub_info->global_mmr_shift, - hub_info->global_gru_base, hub_info->global_gru_shift); - - pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n", - hub_info->gnode_upper, hub_info->gnode_extra); + /* Show system specific info: */ + pr_info("UV: N:%d M:%d m_shift:%d n_lshift:%d\n", hi->n_val, hi->m_val, hi->m_shift, hi->n_lshift); + pr_info("UV: gpa_mask/shift:0x%lx/%d pnode_mask:0x%x apic_pns:%d\n", hi->gpa_mask, hi->gpa_shift, hi->pnode_mask, hi->apic_pnode_shift); + pr_info("UV: mmr_base/shift:0x%lx/%ld gru_base/shift:0x%lx/%ld\n", hi->global_mmr_base, hi->global_mmr_shift, hi->global_gru_base, hi->global_gru_shift); + pr_info("UV: gnode_upper:0x%lx gnode_extra:0x%x\n", hi->gnode_upper, hi->gnode_extra); } static void __init decode_gam_params(unsigned long ptr) @@ -1143,12 +1140,9 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { if (!index) { pr_info("UV: GAM Range Table...\n"); - pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", - "Range", "", "Size", "Type", "NASID", - "SID", "PN"); + pr_info("UV: # %20s %14s %5s %4s %5s %3s %2s\n", "Range", "", "Size", "Type", "NASID", "SID", "PN"); } - pr_info( - "UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n", + pr_info("UV: %2d: 0x%014lx-0x%014lx %5luG %3d %04x %02x %02x\n", index++, (unsigned long)lgre << UV_GAM_RANGE_SHFT, (unsigned long)gre->limit << UV_GAM_RANGE_SHFT, @@ -1166,14 +1160,13 @@ static void __init decode_gam_rng_tbl(unsigned long ptr) if (pnode_max < gre->pnode) pnode_max = gre->pnode; } - _min_socket = sock_min; - _max_socket = sock_max; - _min_pnode = pnode_min; - _max_pnode = pnode_max; - _gr_table_len = index; - pr_info( - "UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", - index, _min_socket, _max_socket, _min_pnode, _max_pnode); + _min_socket = sock_min; + _max_socket = sock_max; + _min_pnode = pnode_min; + _max_pnode = pnode_max; + _gr_table_len = index; + + pr_info("UV: GRT: %d entries, sockets(min:%x,max:%x) pnodes(min:%x,max:%x)\n", index, _min_socket, _max_socket, _min_pnode, _max_pnode); } static int __init decode_uv_systab(void) @@ -1188,12 +1181,10 @@ static int __init decode_uv_systab(void) if ((!st) || (st->revision < UV_SYSTAB_VERSION_UV4_LATEST)) { int rev = st ? st->revision : 0; - pr_err( - "UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n", - rev, UV_SYSTAB_VERSION_UV4_LATEST); - pr_err( - "UV: Cannot support UV operations, switching to generic PC\n"); + pr_err("UV: BIOS UVsystab version(%x) mismatch, expecting(%x)\n", rev, UV_SYSTAB_VERSION_UV4_LATEST); + pr_err("UV: Cannot support UV operations, switching to generic PC\n"); uv_system_type = UV_NONE; + return -EINVAL; } @@ -1219,7 +1210,7 @@ static int __init decode_uv_systab(void) } /* - * Setup physical blade translations from UVH_NODE_PRESENT_TABLE + * Set up physical blade translations from UVH_NODE_PRESENT_TABLE * .. NB: UVH_NODE_PRESENT_TABLE is going away, * .. being replaced by GAM Range Table */ @@ -1255,14 +1246,13 @@ static void __init build_socket_tables(void) if (!gre) { if (is_uv1_hub() || is_uv2_hub() || is_uv3_hub()) { pr_info("UV: No UVsystab socket table, ignoring\n"); - return; /* not required */ + return; } - pr_crit( - "UV: Error: UVsystab address translations not available!\n"); + pr_crit("UV: Error: UVsystab address translations not available!\n"); BUG(); } - /* build socket id -> node id, pnode */ + /* Build socket id -> node id, pnode */ num = maxsock - minsock + 1; bytes = num * sizeof(_socket_to_node[0]); _socket_to_node = kmalloc(bytes, GFP_KERNEL); @@ -1279,27 +1269,27 @@ static void __init build_socket_tables(void) for (i = 0; i < nump; i++) _pnode_to_socket[i] = SOCK_EMPTY; - /* fill in pnode/node/addr conversion list values */ + /* Fill in pnode/node/addr conversion list values: */ pr_info("UV: GAM Building socket/pnode conversion tables\n"); for (; gre->type != UV_GAM_RANGE_TYPE_UNUSED; gre++) { if (gre->type == UV_GAM_RANGE_TYPE_HOLE) continue; i = gre->sockid - minsock; + /* Duplicate: */ if (_socket_to_pnode[i] != SOCK_EMPTY) - continue; /* duplicate */ + continue; _socket_to_pnode[i] = gre->pnode; i = gre->pnode - minpnode; _pnode_to_socket[i] = gre->sockid; - pr_info( - "UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n", + pr_info("UV: sid:%02x type:%d nasid:%04x pn:%02x pn2s:%2x\n", gre->sockid, gre->type, gre->nasid, _socket_to_pnode[gre->sockid - minsock], _pnode_to_socket[gre->pnode - minpnode]); } - /* Set socket -> node values */ + /* Set socket -> node values: */ lnid = -1; for_each_present_cpu(cpu) { int nid = cpu_to_node(cpu); @@ -1315,7 +1305,7 @@ static void __init build_socket_tables(void) sockid, apicid, nid); } - /* Setup physical blade to pnode translation from GAM Range Table */ + /* Set up physical blade to pnode translation from GAM Range Table: */ bytes = num_possible_nodes() * sizeof(_node_to_pnode[0]); _node_to_pnode = kmalloc(bytes, GFP_KERNEL); BUG_ON(!_node_to_pnode); @@ -1325,8 +1315,7 @@ static void __init build_socket_tables(void) for (sockid = minsock; sockid <= maxsock; sockid++) { if (lnid == _socket_to_node[sockid - minsock]) { - _node_to_pnode[lnid] = - _socket_to_pnode[sockid - minsock]; + _node_to_pnode[lnid] = _socket_to_pnode[sockid - minsock]; break; } } @@ -1343,8 +1332,7 @@ static void __init build_socket_tables(void) pr_info("UV: Checking socket->node/pnode for identity maps\n"); if (minsock == 0) { for (i = 0; i < num; i++) - if (_socket_to_node[i] == SOCK_EMPTY || - i != _socket_to_node[i]) + if (_socket_to_node[i] == SOCK_EMPTY || i != _socket_to_node[i]) break; if (i >= num) { kfree(_socket_to_node); @@ -1383,9 +1371,13 @@ void __init uv_system_init(void) map_low_mmrs(); - uv_bios_init(); /* get uv_systab for decoding */ + /* Get uv_systab for decoding: */ + uv_bios_init(); + + /* If there's an UVsystab problem then abort UV init: */ if (decode_uv_systab() < 0) - return; /* UVsystab problem, abort UV init */ + return; + build_socket_tables(); build_uv_gr_table(); uv_init_hub_info(&hub_info); @@ -1393,14 +1385,10 @@ void __init uv_system_init(void) if (!_node_to_pnode) boot_init_possible_blades(&hub_info); - /* uv_num_possible_blades() is really the hub count */ - pr_info("UV: Found %d hubs, %d nodes, %d cpus\n", - uv_num_possible_blades(), - num_possible_nodes(), - num_possible_cpus()); + /* uv_num_possible_blades() is really the hub count: */ + pr_info("UV: Found %d hubs, %d nodes, %d CPUs\n", uv_num_possible_blades(), num_possible_nodes(), num_possible_cpus()); - uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, - &sn_region_size, &system_serial_number); + uv_bios_get_sn_info(0, &uv_type, &sn_partition_id, &sn_coherency_id, &sn_region_size, &system_serial_number); hub_info.coherency_domain_number = sn_coherency_id; uv_rtc_init(); @@ -1413,33 +1401,31 @@ void __init uv_system_init(void) struct uv_hub_info_s *new_hub; if (__uv_hub_info_list[nodeid]) { - pr_err("UV: Node %d UV HUB already initialized!?\n", - nodeid); + pr_err("UV: Node %d UV HUB already initialized!?\n", nodeid); BUG(); } /* Allocate new per hub info list */ - new_hub = (nodeid == 0) ? - &uv_hub_info_node0 : - kzalloc_node(bytes, GFP_KERNEL, nodeid); + new_hub = (nodeid == 0) ? &uv_hub_info_node0 : kzalloc_node(bytes, GFP_KERNEL, nodeid); BUG_ON(!new_hub); __uv_hub_info_list[nodeid] = new_hub; new_hub = uv_hub_info_list(nodeid); BUG_ON(!new_hub); *new_hub = hub_info; - /* Use information from GAM table if available */ + /* Use information from GAM table if available: */ if (_node_to_pnode) new_hub->pnode = _node_to_pnode[nodeid]; - else /* Fill in during cpu loop */ + else /* Or fill in during CPU loop: */ new_hub->pnode = 0xffff; + new_hub->numa_blade_id = uv_node_to_blade_id(nodeid); new_hub->memory_nid = -1; new_hub->nr_possible_cpus = 0; new_hub->nr_online_cpus = 0; } - /* Initialize per cpu info */ + /* Initialize per CPU info: */ for_each_possible_cpu(cpu) { int apicid = per_cpu(x86_cpu_to_apicid, cpu); int numa_node_id; @@ -1450,22 +1436,24 @@ void __init uv_system_init(void) pnode = uv_apicid_to_pnode(apicid); uv_cpu_info_per(cpu)->p_uv_hub_info = uv_hub_info_list(nodeid); - uv_cpu_info_per(cpu)->blade_cpu_id = - uv_cpu_hub_info(cpu)->nr_possible_cpus++; + uv_cpu_info_per(cpu)->blade_cpu_id = uv_cpu_hub_info(cpu)->nr_possible_cpus++; if (uv_cpu_hub_info(cpu)->memory_nid == -1) uv_cpu_hub_info(cpu)->memory_nid = cpu_to_node(cpu); - if (nodeid != numa_node_id && /* init memoryless node */ + + /* Init memoryless node: */ + if (nodeid != numa_node_id && uv_hub_info_list(numa_node_id)->pnode == 0xffff) uv_hub_info_list(numa_node_id)->pnode = pnode; else if (uv_cpu_hub_info(cpu)->pnode == 0xffff) uv_cpu_hub_info(cpu)->pnode = pnode; + uv_cpu_scir_info(cpu)->offset = uv_scir_offset(apicid); } for_each_node(nodeid) { unsigned short pnode = uv_hub_info_list(nodeid)->pnode; - /* Add pnode info for pre-GAM list nodes without cpus */ + /* Add pnode info for pre-GAM list nodes without CPUs: */ if (pnode == 0xffff) { unsigned long paddr; @@ -1491,12 +1479,12 @@ void __init uv_system_init(void) uv_scir_register_cpu_notifier(); proc_mkdir("sgi_uv", NULL); - /* register Legacy VGA I/O redirection handler */ + /* Register Legacy VGA I/O redirection handler: */ pci_register_set_vga_state(uv_set_vga_state); /* * For a kdump kernel the reset must be BOOT_ACPI, not BOOT_EFI, as - * EFI is not enabled in the kdump kernel. + * EFI is not enabled in the kdump kernel: */ if (is_kdump_kernel()) reboot_type = BOOT_ACPI; -- cgit v1.2.3 From 74862b03b46a852662c1a30c859b985261ff5d5c Mon Sep 17 00:00:00 2001 From: "travis@sgi.com" Date: Wed, 25 Jan 2017 10:35:18 -0600 Subject: x86/platform/UV: Add Support for UV4 Hubless systems Add recognition and support for UV4 hubless systems. Signed-off-by: Mike Travis Reviewed-by: Russ Anderson Acked-by: Thomas Gleixner Acked-by: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170125163517.398537358@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv.h | 2 ++ arch/x86/kernel/apic/x2apic_uv_x.c | 30 ++++++++++++++++++++++++++++-- 2 files changed, 30 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv.h b/arch/x86/include/asm/uv/uv.h index 062921ef34e9..6686820feae9 100644 --- a/arch/x86/include/asm/uv/uv.h +++ b/arch/x86/include/asm/uv/uv.h @@ -10,6 +10,7 @@ struct mm_struct; extern enum uv_system_type get_uv_system_type(void); extern int is_uv_system(void); +extern int is_uv_hubless(void); extern void uv_cpu_init(void); extern void uv_nmi_init(void); extern void uv_system_init(void); @@ -23,6 +24,7 @@ extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, static inline enum uv_system_type get_uv_system_type(void) { return UV_NONE; } static inline int is_uv_system(void) { return 0; } +static inline int is_uv_hubless(void) { return 0; } static inline void uv_cpu_init(void) { } static inline void uv_system_init(void) { } static inline const struct cpumask * diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index 656994ac4677..d02cc7e65e4d 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -42,6 +42,7 @@ DEFINE_PER_CPU(int, x2apic_extra_bits); static enum uv_system_type uv_system_type; +static bool uv_hubless_system; static u64 gru_start_paddr, gru_end_paddr; static u64 gru_dist_base, gru_first_node_paddr = -1LL, gru_last_node_paddr; static u64 gru_dist_lmask, gru_dist_umask; @@ -225,8 +226,14 @@ static int __init uv_acpi_madt_oem_check(char *oem_id, char *oem_table_id) int pnodeid; int uv_apic; - if (strncmp(oem_id, "SGI", 3) != 0) + if (strncmp(oem_id, "SGI", 3) != 0) { + if (strncmp(oem_id, "NSGI", 4) == 0) { + uv_hubless_system = true; + pr_info("UV: OEM IDs %s/%s, HUBLESS\n", + oem_id, oem_table_id); + } return 0; + } if (numa_off) { pr_err("UV: NUMA is off, disabling UV support\n"); @@ -300,6 +307,12 @@ int is_uv_system(void) } EXPORT_SYMBOL_GPL(is_uv_system); +int is_uv_hubless(void) +{ + return uv_hubless_system; +} +EXPORT_SYMBOL_GPL(is_uv_hubless); + void **__uv_hub_info_list; EXPORT_SYMBOL_GPL(__uv_hub_info_list); @@ -1353,7 +1366,7 @@ static void __init build_socket_tables(void) } } -void __init uv_system_init(void) +static void __init uv_system_init_hub(void) { struct uv_hub_info_s hub_info = {0}; int bytes, cpu, nodeid; @@ -1490,4 +1503,17 @@ void __init uv_system_init(void) reboot_type = BOOT_ACPI; } +/* + * There is a small amount of UV specific code needed to initialize a + * UV system that does not have a "UV HUB" (referred to as "hubless"). + */ +void __init uv_system_init(void) +{ + if (likely(!is_uv_system() && !is_uv_hubless())) + return; + + if (is_uv_system()) + uv_system_init_hub(); +} + apic_driver(apic_x2apic_uv_x); -- cgit v1.2.3 From abdf1df6bc0416ec19b841e92b497ca55b23454c Mon Sep 17 00:00:00 2001 From: "travis@sgi.com" Date: Wed, 25 Jan 2017 10:35:19 -0600 Subject: x86/platform/UV: Add Support for UV4 Hubless NMIs Merge new UV Hubless NMI support into existing UV NMI handler. Signed-off-by: Mike Travis Reviewed-by: Russ Anderson Acked-by: Thomas Gleixner Acked-by: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170125163517.585269837@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 3 + arch/x86/kernel/apic/x2apic_uv_x.c | 2 + arch/x86/platform/uv/uv_nmi.c | 193 ++++++++++++++++++++++++++++++++----- 3 files changed, 176 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 097b80c989c4..72e8300b1e8a 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -772,6 +772,7 @@ static inline int uv_num_possible_blades(void) /* Per Hub NMI support */ extern void uv_nmi_setup(void); +extern void uv_nmi_setup_hubless(void); /* BMC sets a bit this MMR non-zero before sending an NMI */ #define UVH_NMI_MMR UVH_SCRATCH5 @@ -799,6 +800,8 @@ struct uv_hub_nmi_s { atomic_t read_mmr_count; /* count of MMR reads */ atomic_t nmi_count; /* count of true UV NMIs */ unsigned long nmi_value; /* last value read from NMI MMR */ + bool hub_present; /* false means UV hubless system */ + bool pch_owner; /* indicates this hub owns PCH */ }; struct uv_cpu_nmi_s { diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index d02cc7e65e4d..e9f8f8cdd570 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1514,6 +1514,8 @@ void __init uv_system_init(void) if (is_uv_system()) uv_system_init_hub(); + else + uv_nmi_setup_hubless(); } apic_driver(apic_x2apic_uv_x); diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 8410e7d0a5b5..df7b092941fe 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -67,6 +67,18 @@ static struct uv_hub_nmi_s **uv_hub_nmi_list; DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); EXPORT_PER_CPU_SYMBOL_GPL(uv_cpu_nmi); +/* UV hubless values */ +#define NMI_CONTROL_PORT 0x70 +#define NMI_DUMMY_PORT 0x71 +#define GPI_NMI_STS_GPP_D_0 0x164 +#define GPI_NMI_ENA_GPP_D_0 0x174 +#define STS_GPP_D_0_MASK 0x1 +#define PAD_CFG_DW0_GPP_D_0 0x4c0 +#define GPIROUTNMI (1ul << 17) +#define PCH_PCR_GPIO_1_BASE 0xfdae0000ul +#define PCH_PCR_GPIO_ADDRESS(offset) (int *)((u64)(pch_base) | (u64)(offset)) + +static u64 *pch_base; static unsigned long nmi_mmr; static unsigned long nmi_mmr_clear; static unsigned long nmi_mmr_pending; @@ -144,6 +156,19 @@ module_param_named(wait_count, uv_nmi_wait_count, int, 0644); static int uv_nmi_retry_count = 500; module_param_named(retry_count, uv_nmi_retry_count, int, 0644); +static bool uv_pch_intr_enable = true; +static bool uv_pch_intr_now_enabled; +module_param_named(pch_intr_enable, uv_pch_intr_enable, bool, 0644); + +static int uv_nmi_debug; +module_param_named(debug, uv_nmi_debug, int, 0644); + +#define nmi_debug(fmt, ...) \ + do { \ + if (uv_nmi_debug) \ + pr_info(fmt, ##__VA_ARGS__); \ + } while (0) + /* * Valid NMI Actions: * "dump" - dump process stack for each cpu @@ -191,6 +216,77 @@ static inline void uv_local_mmr_clear_nmi(void) uv_write_local_mmr(nmi_mmr_clear, nmi_mmr_pending); } +/* + * UV hubless NMI handler functions + */ +static inline void uv_reassert_nmi(void) +{ + /* (from arch/x86/include/asm/mach_traps.h) */ + outb(0x8f, NMI_CONTROL_PORT); + inb(NMI_DUMMY_PORT); /* dummy read */ + outb(0x0f, NMI_CONTROL_PORT); + inb(NMI_DUMMY_PORT); /* dummy read */ +} + +static void uv_init_hubless_pch_io(int offset, int mask, int data) +{ + int *addr = PCH_PCR_GPIO_ADDRESS(offset); + int readd = readl(addr); + + if (mask) { /* OR in new data */ + int writed = (readd & ~mask) | data; + + nmi_debug("UV:PCH: %p = %x & %x | %x (%x)\n", + addr, readd, ~mask, data, writed); + writel(writed, addr); + } else if (readd & data) { /* clear status bit */ + nmi_debug("UV:PCH: %p = %x\n", addr, data); + writel(data, addr); + } + + (void)readl(addr); /* flush write data */ +} + +static void uv_nmi_setup_hubless_intr(void) +{ + uv_pch_intr_now_enabled = uv_pch_intr_enable; + + uv_init_hubless_pch_io( + PAD_CFG_DW0_GPP_D_0, GPIROUTNMI, + uv_pch_intr_now_enabled ? GPIROUTNMI : 0); + + nmi_debug("UV:NMI: GPP_D_0 interrupt %s\n", + uv_pch_intr_now_enabled ? "enabled" : "disabled"); +} + +static int uv_nmi_test_hubless(struct uv_hub_nmi_s *hub_nmi) +{ + int *pstat = PCH_PCR_GPIO_ADDRESS(GPI_NMI_STS_GPP_D_0); + int status = *pstat; + + hub_nmi->nmi_value = status; + atomic_inc(&hub_nmi->read_mmr_count); + + if (!(status & STS_GPP_D_0_MASK)) /* Not a UV external NMI */ + return 0; + + *pstat = STS_GPP_D_0_MASK; /* Is a UV NMI: clear GPP_D_0 status */ + (void)*pstat; /* flush write */ + + return 1; +} + +static int uv_test_nmi(struct uv_hub_nmi_s *hub_nmi) +{ + if (hub_nmi->hub_present) + return uv_nmi_test_mmr(hub_nmi); + + if (hub_nmi->pch_owner) /* Only PCH owner can check status */ + return uv_nmi_test_hubless(hub_nmi); + + return -1; +} + /* * If first cpu in on this hub, set hub_nmi "in_nmi" and "owner" values and * return true. If first cpu in on the system, set global "in_nmi" flag. @@ -214,6 +310,7 @@ static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi) { int cpu = smp_processor_id(); int nmi = 0; + int nmi_detected = 0; local64_inc(&uv_nmi_count); this_cpu_inc(uv_cpu_nmi.queries); @@ -224,20 +321,26 @@ static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi) break; if (raw_spin_trylock(&hub_nmi->nmi_lock)) { + nmi_detected = uv_test_nmi(hub_nmi); - /* check hub MMR NMI flag */ - if (uv_nmi_test_mmr(hub_nmi)) { + /* check flag for UV external NMI */ + if (nmi_detected > 0) { uv_set_in_nmi(cpu, hub_nmi); nmi = 1; break; } - /* MMR NMI flag is clear */ + /* A non-PCH node in a hubless system waits for NMI */ + else if (nmi_detected < 0) + goto slave_wait; + + /* MMR/PCH NMI flag is clear */ raw_spin_unlock(&hub_nmi->nmi_lock); } else { - /* wait a moment for the hub nmi locker to set flag */ - cpu_relax(); + + /* Wait a moment for the HUB NMI locker to set flag */ +slave_wait: cpu_relax(); udelay(uv_nmi_slave_delay); /* re-check hub in_nmi flag */ @@ -246,13 +349,20 @@ static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi) break; } - /* check if this BMC missed setting the MMR NMI flag */ + /* + * Check if this BMC missed setting the MMR NMI flag (or) + * UV hubless system where only PCH owner can check flag + */ if (!nmi) { nmi = atomic_read(&uv_in_nmi); if (nmi) uv_set_in_nmi(cpu, hub_nmi); } + /* If we're holding the hub lock, release it now */ + if (nmi_detected < 0) + raw_spin_unlock(&hub_nmi->nmi_lock); + } while (0); if (!nmi) @@ -269,7 +379,10 @@ static inline void uv_clear_nmi(int cpu) if (cpu == atomic_read(&hub_nmi->cpu_owner)) { atomic_set(&hub_nmi->cpu_owner, -1); atomic_set(&hub_nmi->in_nmi, 0); - uv_local_mmr_clear_nmi(); + if (hub_nmi->hub_present) + uv_local_mmr_clear_nmi(); + else + uv_reassert_nmi(); raw_spin_unlock(&hub_nmi->nmi_lock); } } @@ -297,11 +410,12 @@ static void uv_nmi_cleanup_mask(void) } } -/* Loop waiting as cpus enter nmi handler */ +/* Loop waiting as cpus enter NMI handler */ static int uv_nmi_wait_cpus(int first) { int i, j, k, n = num_online_cpus(); int last_k = 0, waiting = 0; + int cpu = smp_processor_id(); if (first) { cpumask_copy(uv_nmi_cpu_mask, cpu_online_mask); @@ -310,6 +424,12 @@ static int uv_nmi_wait_cpus(int first) k = n - cpumask_weight(uv_nmi_cpu_mask); } + /* PCH NMI causes only one cpu to respond */ + if (first && uv_pch_intr_now_enabled) { + cpumask_clear_cpu(cpu, uv_nmi_cpu_mask); + return n - k - 1; + } + udelay(uv_nmi_initial_delay); for (i = 0; i < uv_nmi_retry_count; i++) { int loop_delay = uv_nmi_loop_delay; @@ -358,7 +478,7 @@ static void uv_nmi_wait(int master) break; /* if not all made it in, send IPI NMI to them */ - pr_alert("UV: Sending NMI IPI to %d non-responding CPUs: %*pbl\n", + pr_alert("UV: Sending NMI IPI to %d CPUs: %*pbl\n", cpumask_weight(uv_nmi_cpu_mask), cpumask_pr_args(uv_nmi_cpu_mask)); @@ -538,7 +658,7 @@ static inline int uv_nmi_kdb_reason(void) #else /* !CONFIG_KGDB_KDB */ static inline int uv_nmi_kdb_reason(void) { - /* Insure user is expecting to attach gdb remote */ + /* Ensure user is expecting to attach gdb remote */ if (uv_nmi_action_is("kgdb")) return 0; @@ -626,15 +746,18 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) /* Pause as all cpus enter the NMI handler */ uv_nmi_wait(master); - /* Dump state of each cpu */ - if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) + /* Process actions other than "kdump": */ + if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) { uv_nmi_dump_state(cpu, regs, master); - - /* Call KGDB/KDB if enabled */ - else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb")) + } else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb")) { uv_call_kgdb_kdb(cpu, regs, master); + } else { + if (master) + pr_alert("UV: unknown NMI action: %s\n", uv_nmi_action); + uv_nmi_sync_exit(master); + } - /* Clear per_cpu "in nmi" flag */ + /* Clear per_cpu "in_nmi" flag */ this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_OUT); /* Clear MMR NMI flag on each hub */ @@ -648,6 +771,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) atomic_set(&uv_nmi_cpu, -1); atomic_set(&uv_in_nmi, 0); atomic_set(&uv_nmi_kexec_failed, 0); + atomic_set(&uv_nmi_slave_continue, SLAVE_CLEAR); } uv_nmi_touch_watchdogs(); @@ -697,28 +821,53 @@ void uv_nmi_init(void) apic_write(APIC_LVT1, value); } -void uv_nmi_setup(void) +/* Setup HUB NMI info */ +void __init uv_nmi_setup_common(bool hubbed) { int size = sizeof(void *) * (1 << NODES_SHIFT); - int cpu, nid; + int cpu; - /* Setup hub nmi info */ - uv_nmi_setup_mmrs(); uv_hub_nmi_list = kzalloc(size, GFP_KERNEL); - pr_info("UV: NMI hub list @ 0x%p (%d)\n", uv_hub_nmi_list, size); + nmi_debug("UV: NMI hub list @ 0x%p (%d)\n", uv_hub_nmi_list, size); BUG_ON(!uv_hub_nmi_list); size = sizeof(struct uv_hub_nmi_s); for_each_present_cpu(cpu) { - nid = cpu_to_node(cpu); + int nid = cpu_to_node(cpu); if (uv_hub_nmi_list[nid] == NULL) { uv_hub_nmi_list[nid] = kzalloc_node(size, GFP_KERNEL, nid); BUG_ON(!uv_hub_nmi_list[nid]); raw_spin_lock_init(&(uv_hub_nmi_list[nid]->nmi_lock)); atomic_set(&uv_hub_nmi_list[nid]->cpu_owner, -1); + uv_hub_nmi_list[nid]->hub_present = hubbed; + uv_hub_nmi_list[nid]->pch_owner = (nid == 0); } uv_hub_nmi_per(cpu) = uv_hub_nmi_list[nid]; } BUG_ON(!alloc_cpumask_var(&uv_nmi_cpu_mask, GFP_KERNEL)); +} + +/* Setup for UV Hub systems */ +void __init uv_nmi_setup(void) +{ + uv_nmi_setup_mmrs(); + uv_nmi_setup_common(true); + uv_register_nmi_notifier(); + pr_info("UV: Hub NMI enabled\n"); +} + +/* Setup for UV Hubless systems */ +void __init uv_nmi_setup_hubless(void) +{ + uv_nmi_setup_common(false); + pch_base = xlate_dev_mem_ptr(PCH_PCR_GPIO_1_BASE); + nmi_debug("UV: PCH base:%p from 0x%lx, GPP_D_0\n", + pch_base, PCH_PCR_GPIO_1_BASE); + uv_init_hubless_pch_io(GPI_NMI_ENA_GPP_D_0, + STS_GPP_D_0_MASK, STS_GPP_D_0_MASK); + uv_nmi_setup_hubless_intr(); + /* Ensure NMI enabled in Processor Interface Reg: */ + uv_reassert_nmi(); uv_register_nmi_notifier(); + pr_info("UV: Hubless NMI enabled\n"); } -- cgit v1.2.3 From 278c9b099b2fc0cc0a51de95a1dcefcf54ca2183 Mon Sep 17 00:00:00 2001 From: "travis@sgi.com" Date: Wed, 25 Jan 2017 10:35:20 -0600 Subject: x86/platform/UV: Add basic CPU NMI health check Add a low impact health check triggered by the system NMI command that essentially checks which CPUs are responding to external NMI's. Signed-off-by: Mike Travis Reviewed-by: Russ Anderson Reviewed-by: Alex Thorlton Acked-by: Thomas Gleixner Acked-by: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170125163517.756690240@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/uv_nmi.c | 21 ++++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index df7b092941fe..8a4aa5b3c11a 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -176,6 +176,7 @@ module_param_named(debug, uv_nmi_debug, int, 0644); * "kdump" - do crash dump * "kdb" - enter KDB (default) * "kgdb" - enter KGDB + * "health" - check if CPUs respond to NMI */ static char uv_nmi_action[8] = "kdb"; module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); @@ -571,6 +572,22 @@ static void uv_nmi_sync_exit(int master) } } +/* Current "health" check is to check which CPU's are responsive */ +static void uv_nmi_action_health(int cpu, struct pt_regs *regs, int master) +{ + if (master) { + int in = atomic_read(&uv_nmi_cpus_in_nmi); + int out = num_online_cpus() - in; + + pr_alert("UV: NMI CPU health check (non-responding:%d)\n", out); + atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); + } else { + while (!atomic_read(&uv_nmi_slave_continue)) + cpu_relax(); + } + uv_nmi_sync_exit(master); +} + /* Walk through cpu list and dump state of each */ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master) { @@ -747,7 +764,9 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) uv_nmi_wait(master); /* Process actions other than "kdump": */ - if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) { + if (uv_nmi_action_is("health")) { + uv_nmi_action_health(cpu, regs, master); + } else if (uv_nmi_action_is("ips") || uv_nmi_action_is("dump")) { uv_nmi_dump_state(cpu, regs, master); } else if (uv_nmi_action_is("kdb") || uv_nmi_action_is("kgdb")) { uv_call_kgdb_kdb(cpu, regs, master); -- cgit v1.2.3 From f550e4692749a909d3f5453ef11b4c8ab2071070 Mon Sep 17 00:00:00 2001 From: "travis@sgi.com" Date: Wed, 25 Jan 2017 10:35:21 -0600 Subject: x86/platform/UV: Verify NMI action is valid, default is standard Verify that the NMI action being set is valid. The default NMI action changes from the non-standard 'kdb' to the more standard 'dump'. Signed-off-by: Mike Travis Reviewed-by: Russ Anderson Reviewed-by: Alex Thorlton Acked-by: Thomas Gleixner Acked-by: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170125163517.922751779@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/uv_nmi.c | 69 ++++++++++++++++++++++++++++++++++++------- 1 file changed, 58 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 8a4aa5b3c11a..c10e00b2b2ee 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -169,17 +169,64 @@ module_param_named(debug, uv_nmi_debug, int, 0644); pr_info(fmt, ##__VA_ARGS__); \ } while (0) -/* - * Valid NMI Actions: - * "dump" - dump process stack for each cpu - * "ips" - dump IP info for each cpu - * "kdump" - do crash dump - * "kdb" - enter KDB (default) - * "kgdb" - enter KGDB - * "health" - check if CPUs respond to NMI - */ -static char uv_nmi_action[8] = "kdb"; -module_param_string(action, uv_nmi_action, sizeof(uv_nmi_action), 0644); +/* Valid NMI Actions */ +#define ACTION_LEN 16 +static struct nmi_action { + char *action; + char *desc; +} valid_acts[] = { + { "kdump", "do kernel crash dump" }, + { "dump", "dump process stack for each cpu" }, + { "ips", "dump Inst Ptr info for each cpu" }, + { "kdb", "enter KDB (needs kgdboc= assignment)" }, + { "kgdb", "enter KGDB (needs gdb target remote)" }, + { "health", "check if CPUs respond to NMI" }, +}; +typedef char action_t[ACTION_LEN]; +static action_t uv_nmi_action = { "dump" }; + +static int param_get_action(char *buffer, const struct kernel_param *kp) +{ + return sprintf(buffer, "%s\n", uv_nmi_action); +} + +static int param_set_action(const char *val, const struct kernel_param *kp) +{ + int i; + int n = ARRAY_SIZE(valid_acts); + char arg[ACTION_LEN], *p; + + /* (remove possible '\n') */ + strncpy(arg, val, ACTION_LEN - 1); + arg[ACTION_LEN - 1] = '\0'; + p = strchr(arg, '\n'); + if (p) + *p = '\0'; + + for (i = 0; i < n; i++) + if (!strcmp(arg, valid_acts[i].action)) + break; + + if (i < n) { + strcpy(uv_nmi_action, arg); + pr_info("UV: New NMI action:%s\n", uv_nmi_action); + return 0; + } + + pr_err("UV: Invalid NMI action:%s, valid actions are:\n", arg); + for (i = 0; i < n; i++) + pr_err("UV: %-8s - %s\n", + valid_acts[i].action, valid_acts[i].desc); + return -EINVAL; +} + +static const struct kernel_param_ops param_ops_action = { + .get = param_get_action, + .set = param_set_action, +}; +#define param_check_action(name, p) __param_check(name, p, action_t) + +module_param_named(action, uv_nmi_action, action, 0644); static inline bool uv_nmi_action_is(const char *action) { -- cgit v1.2.3 From 56e17ca2c5ed31f5812ed8e0694e7ef880068cfd Mon Sep 17 00:00:00 2001 From: "travis@sgi.com" Date: Wed, 25 Jan 2017 10:35:22 -0600 Subject: x86/platform/UV: Initialize PCH GPP_D_0 NMI Pin to be NMI source The initialize PCH NMI I/O function is separate and may be moved to BIOS for security reasons. This function detects whether the PCH NMI config has already been done and if not, it will then initialize the PCH here. Signed-off-by: Mike Travis Reviewed-by: Russ Anderson Acked-by: Thomas Gleixner Acked-by: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170125163518.089387859@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/uv_nmi.c | 127 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index c10e00b2b2ee..6a71b087da98 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -70,6 +70,7 @@ EXPORT_PER_CPU_SYMBOL_GPL(uv_cpu_nmi); /* UV hubless values */ #define NMI_CONTROL_PORT 0x70 #define NMI_DUMMY_PORT 0x71 +#define PAD_OWN_GPP_D_0 0x2c #define GPI_NMI_STS_GPP_D_0 0x164 #define GPI_NMI_ENA_GPP_D_0 0x174 #define STS_GPP_D_0_MASK 0x1 @@ -160,6 +161,9 @@ static bool uv_pch_intr_enable = true; static bool uv_pch_intr_now_enabled; module_param_named(pch_intr_enable, uv_pch_intr_enable, bool, 0644); +static bool uv_pch_init_enable = true; +module_param_named(pch_init_enable, uv_pch_init_enable, bool, 0644); + static int uv_nmi_debug; module_param_named(debug, uv_nmi_debug, int, 0644); @@ -307,6 +311,127 @@ static void uv_nmi_setup_hubless_intr(void) uv_pch_intr_now_enabled ? "enabled" : "disabled"); } +static struct init_nmi { + unsigned int offset; + unsigned int mask; + unsigned int data; +} init_nmi[] = { + { /* HOSTSW_OWN_GPP_D_0 */ + .offset = 0x84, + .mask = 0x1, + .data = 0x0, /* ACPI Mode */ + }, + +/* clear status */ + { /* GPI_INT_STS_GPP_D_0 */ + .offset = 0x104, + .mask = 0x0, + .data = 0x1, /* Clear Status */ + }, + { /* GPI_GPE_STS_GPP_D_0 */ + .offset = 0x124, + .mask = 0x0, + .data = 0x1, /* Clear Status */ + }, + { /* GPI_SMI_STS_GPP_D_0 */ + .offset = 0x144, + .mask = 0x0, + .data = 0x1, /* Clear Status */ + }, + { /* GPI_NMI_STS_GPP_D_0 */ + .offset = 0x164, + .mask = 0x0, + .data = 0x1, /* Clear Status */ + }, + +/* disable interrupts */ + { /* GPI_INT_EN_GPP_D_0 */ + .offset = 0x114, + .mask = 0x1, + .data = 0x0, /* disable interrupt generation */ + }, + { /* GPI_GPE_EN_GPP_D_0 */ + .offset = 0x134, + .mask = 0x1, + .data = 0x0, /* disable interrupt generation */ + }, + { /* GPI_SMI_EN_GPP_D_0 */ + .offset = 0x154, + .mask = 0x1, + .data = 0x0, /* disable interrupt generation */ + }, + { /* GPI_NMI_EN_GPP_D_0 */ + .offset = 0x174, + .mask = 0x1, + .data = 0x0, /* disable interrupt generation */ + }, + +/* setup GPP_D_0 Pad Config */ + { /* PAD_CFG_DW0_GPP_D_0 */ + .offset = 0x4c0, + .mask = 0xffffffff, + .data = 0x82020100, +/* + * 31:30 Pad Reset Config (PADRSTCFG): = 2h # PLTRST# (default) + * + * 29 RX Pad State Select (RXPADSTSEL): = 0 # Raw RX pad state directly + * from RX buffer (default) + * + * 28 RX Raw Override to '1' (RXRAW1): = 0 # No Override + * + * 26:25 RX Level/Edge Configuration (RXEVCFG): + * = 0h # Level + * = 1h # Edge + * + * 23 RX Invert (RXINV): = 0 # No Inversion (signal active high) + * + * 20 GPIO Input Route IOxAPIC (GPIROUTIOXAPIC): + * = 0 # Routing does not cause peripheral IRQ... + * # (we want an NMI not an IRQ) + * + * 19 GPIO Input Route SCI (GPIROUTSCI): = 0 # Routing does not cause SCI. + * 18 GPIO Input Route SMI (GPIROUTSMI): = 0 # Routing does not cause SMI. + * 17 GPIO Input Route NMI (GPIROUTNMI): = 1 # Routing can cause NMI. + * + * 11:10 Pad Mode (PMODE1/0): = 0h = GPIO control the Pad. + * 9 GPIO RX Disable (GPIORXDIS): + * = 0 # Enable the input buffer (active low enable) + * + * 8 GPIO TX Disable (GPIOTXDIS): + * = 1 # Disable the output buffer; i.e. Hi-Z + * + * 1 GPIO RX State (GPIORXSTATE): This is the current internal RX pad state.. + * 0 GPIO TX State (GPIOTXSTATE): + * = 0 # (Leave at default) + */ + }, + +/* Pad Config DW1 */ + { /* PAD_CFG_DW1_GPP_D_0 */ + .offset = 0x4c4, + .mask = 0x3c00, + .data = 0, /* Termination = none (default) */ + }, +}; + +static void uv_init_hubless_pch_d0(void) +{ + int i, read; + + read = *PCH_PCR_GPIO_ADDRESS(PAD_OWN_GPP_D_0); + if (read != 0) { + pr_info("UV: Hubless NMI already configured\n"); + return; + } + + nmi_debug("UV: Initializing UV Hubless NMI on PCH\n"); + for (i = 0; i < ARRAY_SIZE(init_nmi); i++) { + uv_init_hubless_pch_io(init_nmi[i].offset, + init_nmi[i].mask, + init_nmi[i].data); + } +} + static int uv_nmi_test_hubless(struct uv_hub_nmi_s *hub_nmi) { int *pstat = PCH_PCR_GPIO_ADDRESS(GPI_NMI_STS_GPP_D_0); @@ -929,6 +1054,8 @@ void __init uv_nmi_setup_hubless(void) pch_base = xlate_dev_mem_ptr(PCH_PCR_GPIO_1_BASE); nmi_debug("UV: PCH base:%p from 0x%lx, GPP_D_0\n", pch_base, PCH_PCR_GPIO_1_BASE); + if (uv_pch_init_enable) + uv_init_hubless_pch_d0(); uv_init_hubless_pch_io(GPI_NMI_ENA_GPP_D_0, STS_GPP_D_0_MASK, STS_GPP_D_0_MASK); uv_nmi_setup_hubless_intr(); -- cgit v1.2.3 From 9ec808a0225aabab59fb2932b70784b087ac0f58 Mon Sep 17 00:00:00 2001 From: "travis@sgi.com" Date: Wed, 25 Jan 2017 10:35:23 -0600 Subject: x86/platform/UV: Ensure uv_system_init is called when necessary Move the check to whether this is a UV system that needs initialization from is_uv_system() to the internal uv_system_init() function. This is because on a UV system without a HUB the is_uv_system() returns false. But we still need some specific UV system initialization. See the uv_system_init() for change to a quick check if UV is applicable. This change should not increase overhead since is_uv_system() also called into this same area. Signed-off-by: Mike Travis Reviewed-by: Russ Anderson Acked-by: Thomas Gleixner Acked-by: Dimitri Sivanich Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170125163518.256403963@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/smpboot.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 46732dc3b73c..386c7f713c2a 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1341,8 +1341,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) pr_info("CPU0: "); print_cpu_info(&cpu_data(0)); - if (is_uv_system()) - uv_system_init(); + uv_system_init(); set_mtrr_aps_delayed_init(); -- cgit v1.2.3 From 1e74016370ec3d552a7f5df18bb2b0f1c80b5a9f Mon Sep 17 00:00:00 2001 From: "travis@sgi.com" Date: Wed, 25 Jan 2017 10:35:24 -0600 Subject: x86/platform/UV: Clean up the NMI code to match current coding style Update UV NMI to current coding style. Signed-off-by: Mike Travis Acked-by: Thomas Gleixner Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Link: http://lkml.kernel.org/r/20170125163518.419094259@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/uv_nmi.c | 74 +++++++++++++++++++++---------------------- 1 file changed, 37 insertions(+), 37 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 6a71b087da98..0ecd7bf7d2d3 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -45,8 +45,8 @@ * * Handle system-wide NMI events generated by the global 'power nmi' command. * - * Basic operation is to field the NMI interrupt on each cpu and wait - * until all cpus have arrived into the nmi handler. If some cpus do not + * Basic operation is to field the NMI interrupt on each CPU and wait + * until all CPU's have arrived into the nmi handler. If some CPU's do not * make it into the handler, try and force them in with the IPI(NMI) signal. * * We also have to lessen UV Hub MMR accesses as much as possible as this @@ -56,7 +56,7 @@ * To do this we register our primary NMI notifier on the NMI_UNKNOWN * chain. This reduces the number of false NMI calls when the perf * tools are running which generate an enormous number of NMIs per - * second (~4M/s for 1024 cpu threads). Our secondary NMI handler is + * second (~4M/s for 1024 CPU threads). Our secondary NMI handler is * very short as it only checks that if it has been "pinged" with the * IPI(NMI) signal as mentioned above, and does not read the UV Hub's MMR. * @@ -113,7 +113,7 @@ static int param_get_local64(char *buffer, const struct kernel_param *kp) static int param_set_local64(const char *val, const struct kernel_param *kp) { - /* clear on any write */ + /* Clear on any write */ local64_set((local64_t *)kp->arg, 0); return 0; } @@ -322,7 +322,7 @@ static struct init_nmi { .data = 0x0, /* ACPI Mode */ }, -/* clear status */ +/* Clear status: */ { /* GPI_INT_STS_GPP_D_0 */ .offset = 0x104, .mask = 0x0, @@ -344,29 +344,29 @@ static struct init_nmi { .data = 0x1, /* Clear Status */ }, -/* disable interrupts */ +/* Disable interrupts: */ { /* GPI_INT_EN_GPP_D_0 */ .offset = 0x114, .mask = 0x1, - .data = 0x0, /* disable interrupt generation */ + .data = 0x0, /* Disable interrupt generation */ }, { /* GPI_GPE_EN_GPP_D_0 */ .offset = 0x134, .mask = 0x1, - .data = 0x0, /* disable interrupt generation */ + .data = 0x0, /* Disable interrupt generation */ }, { /* GPI_SMI_EN_GPP_D_0 */ .offset = 0x154, .mask = 0x1, - .data = 0x0, /* disable interrupt generation */ + .data = 0x0, /* Disable interrupt generation */ }, { /* GPI_NMI_EN_GPP_D_0 */ .offset = 0x174, .mask = 0x1, - .data = 0x0, /* disable interrupt generation */ + .data = 0x0, /* Disable interrupt generation */ }, -/* setup GPP_D_0 Pad Config */ +/* Setup GPP_D_0 Pad Config: */ { /* PAD_CFG_DW0_GPP_D_0 */ .offset = 0x4c0, .mask = 0xffffffff, @@ -444,7 +444,7 @@ static int uv_nmi_test_hubless(struct uv_hub_nmi_s *hub_nmi) return 0; *pstat = STS_GPP_D_0_MASK; /* Is a UV NMI: clear GPP_D_0 status */ - (void)*pstat; /* flush write */ + (void)*pstat; /* Flush write */ return 1; } @@ -461,8 +461,8 @@ static int uv_test_nmi(struct uv_hub_nmi_s *hub_nmi) } /* - * If first cpu in on this hub, set hub_nmi "in_nmi" and "owner" values and - * return true. If first cpu in on the system, set global "in_nmi" flag. + * If first CPU in on this hub, set hub_nmi "in_nmi" and "owner" values and + * return true. If first CPU in on the system, set global "in_nmi" flag. */ static int uv_set_in_nmi(int cpu, struct uv_hub_nmi_s *hub_nmi) { @@ -496,7 +496,7 @@ static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi) if (raw_spin_trylock(&hub_nmi->nmi_lock)) { nmi_detected = uv_test_nmi(hub_nmi); - /* check flag for UV external NMI */ + /* Check flag for UV external NMI */ if (nmi_detected > 0) { uv_set_in_nmi(cpu, hub_nmi); nmi = 1; @@ -516,7 +516,7 @@ static int uv_check_nmi(struct uv_hub_nmi_s *hub_nmi) slave_wait: cpu_relax(); udelay(uv_nmi_slave_delay); - /* re-check hub in_nmi flag */ + /* Re-check hub in_nmi flag */ nmi = atomic_read(&hub_nmi->in_nmi); if (nmi) break; @@ -560,7 +560,7 @@ static inline void uv_clear_nmi(int cpu) } } -/* Ping non-responding cpus attemping to force them into the NMI handler */ +/* Ping non-responding CPU's attemping to force them into the NMI handler */ static void uv_nmi_nr_cpus_ping(void) { int cpu; @@ -571,7 +571,7 @@ static void uv_nmi_nr_cpus_ping(void) apic->send_IPI_mask(uv_nmi_cpu_mask, APIC_DM_NMI); } -/* Clean up flags for cpus that ignored both NMI and ping */ +/* Clean up flags for CPU's that ignored both NMI and ping */ static void uv_nmi_cleanup_mask(void) { int cpu; @@ -583,7 +583,7 @@ static void uv_nmi_cleanup_mask(void) } } -/* Loop waiting as cpus enter NMI handler */ +/* Loop waiting as CPU's enter NMI handler */ static int uv_nmi_wait_cpus(int first) { int i, j, k, n = num_online_cpus(); @@ -597,7 +597,7 @@ static int uv_nmi_wait_cpus(int first) k = n - cpumask_weight(uv_nmi_cpu_mask); } - /* PCH NMI causes only one cpu to respond */ + /* PCH NMI causes only one CPU to respond */ if (first && uv_pch_intr_now_enabled) { cpumask_clear_cpu(cpu, uv_nmi_cpu_mask); return n - k - 1; @@ -618,13 +618,13 @@ static int uv_nmi_wait_cpus(int first) k = n; break; } - if (last_k != k) { /* abort if no new cpus coming in */ + if (last_k != k) { /* abort if no new CPU's coming in */ last_k = k; waiting = 0; } else if (++waiting > uv_nmi_wait_count) break; - /* extend delay if waiting only for cpu 0 */ + /* Extend delay if waiting only for CPU 0: */ if (waiting && (n - k) == 1 && cpumask_test_cpu(0, uv_nmi_cpu_mask)) loop_delay *= 100; @@ -635,29 +635,29 @@ static int uv_nmi_wait_cpus(int first) return n - k; } -/* Wait until all slave cpus have entered UV NMI handler */ +/* Wait until all slave CPU's have entered UV NMI handler */ static void uv_nmi_wait(int master) { - /* indicate this cpu is in */ + /* Indicate this CPU is in: */ this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_IN); - /* if not the first cpu in (the master), then we are a slave cpu */ + /* If not the first CPU in (the master), then we are a slave CPU */ if (!master) return; do { - /* wait for all other cpus to gather here */ + /* Wait for all other CPU's to gather here */ if (!uv_nmi_wait_cpus(1)) break; - /* if not all made it in, send IPI NMI to them */ + /* If not all made it in, send IPI NMI to them */ pr_alert("UV: Sending NMI IPI to %d CPUs: %*pbl\n", cpumask_weight(uv_nmi_cpu_mask), cpumask_pr_args(uv_nmi_cpu_mask)); uv_nmi_nr_cpus_ping(); - /* if all cpus are in, then done */ + /* If all CPU's are in, then done */ if (!uv_nmi_wait_cpus(0)) break; @@ -709,7 +709,7 @@ static void uv_nmi_dump_state_cpu(int cpu, struct pt_regs *regs) this_cpu_write(uv_cpu_nmi.state, UV_NMI_STATE_DUMP_DONE); } -/* Trigger a slave cpu to dump it's state */ +/* Trigger a slave CPU to dump it's state */ static void uv_nmi_trigger_dump(int cpu) { int retry = uv_nmi_trigger_delay; @@ -730,7 +730,7 @@ static void uv_nmi_trigger_dump(int cpu) uv_cpu_nmi_per(cpu).state = UV_NMI_STATE_DUMP_DONE; } -/* Wait until all cpus ready to exit */ +/* Wait until all CPU's ready to exit */ static void uv_nmi_sync_exit(int master) { atomic_dec(&uv_nmi_cpus_in_nmi); @@ -760,7 +760,7 @@ static void uv_nmi_action_health(int cpu, struct pt_regs *regs, int master) uv_nmi_sync_exit(master); } -/* Walk through cpu list and dump state of each */ +/* Walk through CPU list and dump state of each */ static void uv_nmi_dump_state(int cpu, struct pt_regs *regs, int master) { if (master) { @@ -872,7 +872,7 @@ static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) if (reason < 0) return; - /* call KGDB NMI handler as MASTER */ + /* Call KGDB NMI handler as MASTER */ ret = kgdb_nmicallin(cpu, X86_TRAP_NMI, regs, reason, &uv_nmi_slave_continue); if (ret) { @@ -880,7 +880,7 @@ static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) atomic_set(&uv_nmi_slave_continue, SLAVE_EXIT); } } else { - /* wait for KGDB signal that it's ready for slaves to enter */ + /* Wait for KGDB signal that it's ready for slaves to enter */ int sig; do { @@ -888,7 +888,7 @@ static void uv_call_kgdb_kdb(int cpu, struct pt_regs *regs, int master) sig = atomic_read(&uv_nmi_slave_continue); } while (!sig); - /* call KGDB as slave */ + /* Call KGDB as slave */ if (sig == SLAVE_CONTINUE) kgdb_nmicallback(cpu, regs); } @@ -932,7 +932,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) strncpy(uv_nmi_action, "dump", strlen(uv_nmi_action)); } - /* Pause as all cpus enter the NMI handler */ + /* Pause as all CPU's enter the NMI handler */ uv_nmi_wait(master); /* Process actions other than "kdump": */ @@ -972,7 +972,7 @@ int uv_handle_nmi(unsigned int reason, struct pt_regs *regs) } /* - * NMI handler for pulling in CPUs when perf events are grabbing our NMI + * NMI handler for pulling in CPU's when perf events are grabbing our NMI */ static int uv_handle_nmi_ping(unsigned int reason, struct pt_regs *regs) { @@ -1005,7 +1005,7 @@ void uv_nmi_init(void) unsigned int value; /* - * Unmask NMI on all cpus + * Unmask NMI on all CPU's */ value = apic_read(APIC_LVT1) | APIC_DM_NMI; value &= ~APIC_LVT_MASKED; -- cgit v1.2.3 From 5443624bedd0d23e112d5f2a919435182875bce9 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Fri, 27 Jan 2017 17:16:43 +0200 Subject: perf/x86/intel/pt: Add format strings for PTWRITE and power event tracing Commit: 8ee83b2ab3 ("perf/x86/intel/pt: Add support for PTWRITE and power event tracing") forgot to add format strings to the PT driver. So one could enable these features by setting corresponding bits in the event config, but not by their mnemonic names. This patch adds the format strings. Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Fixes: 8ee83b2ab3 ("perf/x86/intel/pt: Add support for PTWRITE...") Link: http://lkml.kernel.org/r/20170127151644.8585-2-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/pt.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/pt.c b/arch/x86/events/intel/pt.c index 1c1b9fe705c8..5900471ee508 100644 --- a/arch/x86/events/intel/pt.c +++ b/arch/x86/events/intel/pt.c @@ -99,18 +99,24 @@ static struct attribute_group pt_cap_group = { }; PMU_FORMAT_ATTR(cyc, "config:1" ); +PMU_FORMAT_ATTR(pwr_evt, "config:4" ); +PMU_FORMAT_ATTR(fup_on_ptw, "config:5" ); PMU_FORMAT_ATTR(mtc, "config:9" ); PMU_FORMAT_ATTR(tsc, "config:10" ); PMU_FORMAT_ATTR(noretcomp, "config:11" ); +PMU_FORMAT_ATTR(ptw, "config:12" ); PMU_FORMAT_ATTR(mtc_period, "config:14-17" ); PMU_FORMAT_ATTR(cyc_thresh, "config:19-22" ); PMU_FORMAT_ATTR(psb_period, "config:24-27" ); static struct attribute *pt_formats_attr[] = { &format_attr_cyc.attr, + &format_attr_pwr_evt.attr, + &format_attr_fup_on_ptw.attr, &format_attr_mtc.attr, &format_attr_tsc.attr, &format_attr_noretcomp.attr, + &format_attr_ptw.attr, &format_attr_mtc_period.attr, &format_attr_cyc_thresh.attr, &format_attr_psb_period.attr, -- cgit v1.2.3 From c26819900036f5b91608051a0fc7c76f6b4ffc7b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Wed, 1 Feb 2017 22:17:39 +0800 Subject: crypto: aesni - Fix failure when pcbc module is absent MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When aesni is built as a module together with pcbc, the pcbc module must be present for aesni to load. However, the pcbc module may not be present for reasons such as its absence on initramfs. This patch allows the aesni to function even if the pcbc module is enabled but not present. Reported-by: Arkadiusz Miśkiewicz Signed-off-by: Herbert Xu --- arch/x86/crypto/aesni-intel_glue.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/aesni-intel_glue.c b/arch/x86/crypto/aesni-intel_glue.c index 6ef688a1ef3e..7ff1b0c86a8e 100644 --- a/arch/x86/crypto/aesni-intel_glue.c +++ b/arch/x86/crypto/aesni-intel_glue.c @@ -1085,9 +1085,9 @@ static void aesni_free_simds(void) aesni_simd_skciphers[i]; i++) simd_skcipher_free(aesni_simd_skciphers[i]); - for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2) && - aesni_simd_skciphers2[i].simd; i++) - simd_skcipher_free(aesni_simd_skciphers2[i].simd); + for (i = 0; i < ARRAY_SIZE(aesni_simd_skciphers2); i++) + if (aesni_simd_skciphers2[i].simd) + simd_skcipher_free(aesni_simd_skciphers2[i].simd); } static int __init aesni_init(void) @@ -1168,7 +1168,7 @@ static int __init aesni_init(void) simd = simd_skcipher_create_compat(algname, drvname, basename); err = PTR_ERR(simd); if (IS_ERR(simd)) - goto unregister_simds; + continue; aesni_simd_skciphers2[i].simd = simd; } -- cgit v1.2.3 From 00c87e9a70a17b355b81c36adedf05e84f54e10d Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Wed, 1 Feb 2017 14:19:53 +0100 Subject: KVM: x86: do not save guest-unsupported XSAVE state MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Saving unsupported state prevents migration when the new host does not support a XSAVE feature of the original host, even if the feature is not exposed to the guest. We've masked host features with guest-visible features before, with 4344ee981e21 ("KVM: x86: only copy XSAVE state for the supported features") and dropped it when implementing XSAVES. Do it again. Fixes: df1daba7d1cb ("KVM: x86: support XSAVES usage in the host") Cc: stable@vger.kernel.org Reviewed-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d153be8929a6..e52c9088660f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -3182,6 +3182,7 @@ static void fill_xsave(u8 *dest, struct kvm_vcpu *vcpu) memcpy(dest, xsave, XSAVE_HDR_OFFSET); /* Set XSTATE_BV */ + xstate_bv &= vcpu->arch.guest_supported_xcr0 | XFEATURE_MASK_FPSSE; *(u64 *)(dest + XSAVE_HDR_OFFSET) = xstate_bv; /* -- cgit v1.2.3 From ae47eda905e61ef6ba0b6f79b967c9de15ca4f8e Mon Sep 17 00:00:00 2001 From: Grzegorz Andrejczuk Date: Fri, 20 Jan 2017 14:22:33 +0100 Subject: x86/msr: Add MSR_MISC_FEATURE_ENABLES and RING3MWAIT bit Define new MSR MISC_FEATURE_ENABLES (0x140). On supported CPUs if bit 1 of this MSR is set, then calling MONITOR and MWAIT instructions outside of ring 0 will not cause invalid-opcode exception. The MSR MISC_FEATURE_ENABLES is not yet documented in the SDM. Here is the relevant documentation: Hex Dec Name Scope 140H 320 MISC_FEATURE_ENABLES Thread 0 Reserved 1 If set to 1, the MONITOR and MWAIT instructions do not cause invalid-opcode exceptions when executed with CPL > 0 or in virtual-8086 mode. If MWAIT is executed when CPL > 0 or in virtual-8086 mode, and if EAX indicates a C-state other than C0 or C1, the instruction operates as if EAX indicated the C-state C1. 63:2 Reserved Signed-off-by: Grzegorz Andrejczuk Cc: Piotr.Luc@intel.com Cc: dave.hansen@linux.intel.com Link: http://lkml.kernel.org/r/1484918557-15481-2-git-send-email-grzegorz.andrejczuk@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/msr-index.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 710273c617b8..00293a94ffaf 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -543,6 +543,11 @@ #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT 39 #define MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE (1ULL << MSR_IA32_MISC_ENABLE_IP_PREF_DISABLE_BIT) +/* MISC_FEATURE_ENABLES non-architectural features */ +#define MSR_MISC_FEATURE_ENABLES 0x00000140 + +#define MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT 1 + #define MSR_IA32_TSC_DEADLINE 0x000006E0 /* P4/Xeon+ specific */ -- cgit v1.2.3 From 0274f9551eff55dbd63b5f5f3efe30fe5d4c801c Mon Sep 17 00:00:00 2001 From: Grzegorz Andrejczuk Date: Fri, 20 Jan 2017 14:22:34 +0100 Subject: x86/elf: Add HWCAP2 to expose ring 3 MONITOR/MWAIT Introduce ELF_HWCAP2 variable for x86 and reserve its bit 0 to expose the ring 3 MONITOR/MWAIT. HWCAP variables contain bitmasks which can be used by userspace applications to detect which instruction sets are supported by CPU. On x86 architecture information about CPU capabilities can be checked via CPUID instructions, unfortunately presence of ring 3 MONITOR/MWAIT feature cannot be checked this way. ELF_HWCAP cannot be used as well, because on x86 it is set to CPUID[1].EDX which means that all bits are reserved there. HWCAP2 approach was chosen because it reuses existing solution present in other architectures, so only minor modifications are required to the kernel and userspace applications. When ELF_HWCAP2 is defined kernel maps it to AT_HWCAP2 during the start of the application. This way the ring 3 MONITOR/MWAIT feature can be detected using getauxval() API in a simple and fast manner. ELF_HWCAP2 type is u32 to be consistent with x86 ELF_HWCAP type. Signed-off-by: Grzegorz Andrejczuk Cc: Piotr.Luc@intel.com Cc: dave.hansen@linux.intel.com Link: http://lkml.kernel.org/r/1484918557-15481-3-git-send-email-grzegorz.andrejczuk@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/elf.h | 9 +++++++++ arch/x86/include/uapi/asm/hwcap2.h | 7 +++++++ arch/x86/kernel/cpu/common.c | 3 +++ 3 files changed, 19 insertions(+) create mode 100644 arch/x86/include/uapi/asm/hwcap2.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index e7f155c3045e..9d49c18b5ea9 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -258,6 +258,15 @@ extern int force_personality32; #define ELF_HWCAP (boot_cpu_data.x86_capability[CPUID_1_EDX]) +extern u32 elf_hwcap2; + +/* + * HWCAP2 supplies mask with kernel enabled CPU features, so that + * the application can discover that it can safely use them. + * The bits are defined in uapi/asm/hwcap2.h. + */ +#define ELF_HWCAP2 (elf_hwcap2) + /* This yields a string that ld.so will use to load implementation specific libraries for optimization. This is more specific in intent than poking at uname or /proc/cpuinfo. diff --git a/arch/x86/include/uapi/asm/hwcap2.h b/arch/x86/include/uapi/asm/hwcap2.h new file mode 100644 index 000000000000..0bd2be5c7617 --- /dev/null +++ b/arch/x86/include/uapi/asm/hwcap2.h @@ -0,0 +1,7 @@ +#ifndef _ASM_X86_HWCAP2_H +#define _ASM_X86_HWCAP2_H + +/* MONITOR/MWAIT enabled in Ring 3 */ +#define HWCAP2_RING3MWAIT (1 << 0) + +#endif diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bab7a8a4293..f879429cfcaa 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include @@ -51,6 +52,8 @@ #include "cpu.h" +u32 elf_hwcap2 __read_mostly; + /* all of these masks are initialized in setup_cpu_local_masks() */ cpumask_var_t cpu_initialized_mask; cpumask_var_t cpu_callout_mask; -- cgit v1.2.3 From 1d12d0ef0194ccc4dcebed3d96bb2301b26fc3ee Mon Sep 17 00:00:00 2001 From: Grzegorz Andrejczuk Date: Fri, 20 Jan 2017 14:22:35 +0100 Subject: x86/cpufeature: Add RING3MWAIT to CPU features Add software-defined CPUID bit for the non-architectural ring 3 MONITOR/MWAIT feature. Signed-off-by: Grzegorz Andrejczuk Cc: Piotr.Luc@intel.com Cc: dave.hansen@linux.intel.com Link: http://lkml.kernel.org/r/1484918557-15481-4-git-send-email-grzegorz.andrejczuk@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index d9d7136edf05..56e5184514c6 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -100,7 +100,7 @@ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ +#define X86_FEATURE_RING3MWAIT ( 3*32+25) /* ring 3 MONITOR/MWAIT */ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ -- cgit v1.2.3 From e16fd002afe2b16d828bbf738b8a81a185fe9272 Mon Sep 17 00:00:00 2001 From: Grzegorz Andrejczuk Date: Fri, 20 Jan 2017 14:22:36 +0100 Subject: x86/cpufeature: Enable RING3MWAIT for Knights Landing Enable ring 3 MONITOR/MWAIT for Intel Xeon Phi x200 codenamed Knights Landing. Presence of this feature cannot be detected automatically (by reading any other MSR) therefore it is required to explicitly check for the family and model of the CPU before attempting to enable it. Signed-off-by: Grzegorz Andrejczuk Cc: Piotr.Luc@intel.com Cc: dave.hansen@linux.intel.com Link: http://lkml.kernel.org/r/1484918557-15481-5-git-send-email-grzegorz.andrejczuk@intel.com Signed-off-by: Thomas Gleixner --- Documentation/admin-guide/kernel-parameters.txt | 4 +++ arch/x86/kernel/cpu/intel.c | 37 +++++++++++++++++++++++++ 2 files changed, 41 insertions(+) (limited to 'arch/x86') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index be7c0d9506b1..cfbb3fc938f7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3563,6 +3563,10 @@ rhash_entries= [KNL,NET] Set number of hash buckets for route cache + ring3mwait=disable + [KNL] Disable ring 3 MONITOR/MWAIT feature on supported + CPUs. + ro [KNL] Mount root device read-only on boot rodata= [KNL] diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 203f860d2ab3..da2401a4b0f4 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #ifdef CONFIG_X86_64 #include @@ -62,6 +64,39 @@ void check_mpx_erratum(struct cpuinfo_x86 *c) } } +static bool ring3mwait_disabled __read_mostly; + +static int __init ring3mwait_disable(char *__unused) +{ + ring3mwait_disabled = true; + return 0; +} +__setup("ring3mwait=disable", ring3mwait_disable); + +static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) +{ + /* + * Ring 3 MONITOR/MWAIT feature cannot be detected without + * cpu model and family comparison. + */ + if (c->x86 != 6 || c->x86_model != INTEL_FAM6_XEON_PHI_KNL) + return; + + if (ring3mwait_disabled) { + msr_clear_bit(MSR_MISC_FEATURE_ENABLES, + MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT); + return; + } + + msr_set_bit(MSR_MISC_FEATURE_ENABLES, + MSR_MISC_FEATURE_ENABLES_RING3MWAIT_BIT); + + set_cpu_cap(c, X86_FEATURE_RING3MWAIT); + + if (c == &boot_cpu_data) + ELF_HWCAP2 |= HWCAP2_RING3MWAIT; +} + static void early_init_intel(struct cpuinfo_x86 *c) { u64 misc_enable; @@ -560,6 +595,8 @@ static void init_intel(struct cpuinfo_x86 *c) detect_vmx_virtcap(c); init_intel_energy_perf(c); + + probe_xeon_phi_r3mwait(c); } #ifdef CONFIG_X86_32 -- cgit v1.2.3 From 68dee8e2f2cacc54d038394e70d22411dee89da2 Mon Sep 17 00:00:00 2001 From: Nikola Pajkovsky Date: Tue, 15 Nov 2016 09:47:49 +0100 Subject: x86/pci-calgary: Fix iommu_free() comparison of unsigned expression >= 0 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit commit 8fd524b355da ("x86: Kill bad_dma_address variable") has killed bad_dma_address variable and used instead of macro DMA_ERROR_CODE which is always zero. Since dma_addr is unsigned, the statement dma_addr >= DMA_ERROR_CODE is always true, and not needed. arch/x86/kernel/pci-calgary_64.c: In function ‘iommu_free’: arch/x86/kernel/pci-calgary_64.c:299:2: warning: comparison of unsigned expression >= 0 is always true [-Wtype-limits] if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { Fixes: 8fd524b355da ("x86: Kill bad_dma_address variable") Signed-off-by: Nikola Pajkovsky Cc: iommu@lists.linux-foundation.org Cc: Jon Mason Cc: Muli Ben-Yehuda Link: http://lkml.kernel.org/r/7612c0f9dd7c1290407dbf8e809def922006920b.1479161177.git.npajkovsky@suse.cz Signed-off-by: Thomas Gleixner --- arch/x86/kernel/pci-calgary_64.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/pci-calgary_64.c b/arch/x86/kernel/pci-calgary_64.c index 5d400ba1349d..d47517941bbc 100644 --- a/arch/x86/kernel/pci-calgary_64.c +++ b/arch/x86/kernel/pci-calgary_64.c @@ -296,7 +296,7 @@ static void iommu_free(struct iommu_table *tbl, dma_addr_t dma_addr, /* were we called with bad_dma_address? */ badend = DMA_ERROR_CODE + (EMERGENCY_PAGES * PAGE_SIZE); - if (unlikely((dma_addr >= DMA_ERROR_CODE) && (dma_addr < badend))) { + if (unlikely(dma_addr < badend)) { WARN(1, KERN_ERR "Calgary: driver tried unmapping bad DMA " "address 0x%Lx\n", dma_addr); return; -- cgit v1.2.3 From 07d495dae20717b00881798ef812f7aa53ca0eb3 Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Mon, 28 Nov 2016 13:50:57 +0600 Subject: x86/traps: Get rid of unnecessary preempt_disable/preempt_enable_no_resched Exception handlers which may run on IST stack call ist_enter() at the start of execution and ist_exit() in the end. ist_enter() disables preemption unconditionally and ist_exit() enables it. So the extra preempt_disable/enable() pairs nested inside the ist_enter/exit() regions are pointless and can be removed. Signed-off-by: Alexander Kuleshov Cc: Tony Luck Cc: Jianyu Zhan Cc: Paul Gortmaker Cc: Andy Lutomirski Cc: Borislav Petkov Link: http://lkml.kernel.org/r/20161128075057.7724-1-kuleshovmail@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/traps.c | 5 ----- 1 file changed, 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index bf0c6d049080..1dc86ee60a03 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -563,11 +563,9 @@ dotraplinkage void notrace do_int3(struct pt_regs *regs, long error_code) * as we may switch to the interrupt stack. */ debug_stack_usage_inc(); - preempt_disable(); cond_local_irq_enable(regs); do_trap(X86_TRAP_BP, SIGTRAP, "int3", regs, error_code, NULL); cond_local_irq_disable(regs); - preempt_enable_no_resched(); debug_stack_usage_dec(); exit: ist_exit(regs); @@ -742,14 +740,12 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) debug_stack_usage_inc(); /* It's safe to allow irq's after DR6 has been saved */ - preempt_disable(); cond_local_irq_enable(regs); if (v8086_mode(regs)) { handle_vm86_trap((struct kernel_vm86_regs *) regs, error_code, X86_TRAP_DB); cond_local_irq_disable(regs); - preempt_enable_no_resched(); debug_stack_usage_dec(); goto exit; } @@ -769,7 +765,6 @@ dotraplinkage void do_debug(struct pt_regs *regs, long error_code) if (tsk->thread.debugreg6 & (DR_STEP | DR_TRAP_BITS) || user_icebp) send_sigtrap(tsk, regs, error_code, si_code); cond_local_irq_disable(regs); - preempt_enable_no_resched(); debug_stack_usage_dec(); exit: -- cgit v1.2.3 From 1013fe32a63d1139b1b32049ea46c0c462738d8b Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Mon, 19 Dec 2016 22:35:57 +0800 Subject: x86/mm/pat: Use rb_entry() To make the code clearer, use rb_entry() instead of open coding it Signed-off-by: Geliang Tang Cc: Masahiro Yamada Cc: Toshi Kani Cc: Paul Gortmaker Link: http://lkml.kernel.org/r/974a91cd4ed2d04c92e4faa4765077e38f248d6b.1482157956.git.geliangtang@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/pat_rbtree.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/pat_rbtree.c b/arch/x86/mm/pat_rbtree.c index 159b52ccd600..d76485b22824 100644 --- a/arch/x86/mm/pat_rbtree.c +++ b/arch/x86/mm/pat_rbtree.c @@ -47,7 +47,7 @@ static u64 get_subtree_max_end(struct rb_node *node) { u64 ret = 0; if (node) { - struct memtype *data = container_of(node, struct memtype, rb); + struct memtype *data = rb_entry(node, struct memtype, rb); ret = data->subtree_max_end; } return ret; @@ -79,7 +79,7 @@ static struct memtype *memtype_rb_lowest_match(struct rb_root *root, struct memtype *last_lower = NULL; while (node) { - struct memtype *data = container_of(node, struct memtype, rb); + struct memtype *data = rb_entry(node, struct memtype, rb); if (get_subtree_max_end(node->rb_left) > start) { /* Lowest overlap if any must be on left side */ @@ -121,7 +121,7 @@ static struct memtype *memtype_rb_match(struct rb_root *root, node = rb_next(&match->rb); if (node) - match = container_of(node, struct memtype, rb); + match = rb_entry(node, struct memtype, rb); else match = NULL; } @@ -150,7 +150,7 @@ static int memtype_rb_check_conflict(struct rb_root *root, node = rb_next(&match->rb); while (node) { - match = container_of(node, struct memtype, rb); + match = rb_entry(node, struct memtype, rb); if (match->start >= end) /* Checked all possible matches */ goto success; @@ -181,7 +181,7 @@ static void memtype_rb_insert(struct rb_root *root, struct memtype *newdata) struct rb_node *parent = NULL; while (*node) { - struct memtype *data = container_of(*node, struct memtype, rb); + struct memtype *data = rb_entry(*node, struct memtype, rb); parent = *node; if (data->subtree_max_end < newdata->end) @@ -270,7 +270,7 @@ int rbt_memtype_copy_nth_element(struct memtype *out, loff_t pos) } if (node) { /* pos == i */ - struct memtype *this = container_of(node, struct memtype, rb); + struct memtype *this = rb_entry(node, struct memtype, rb); *out = *this; return 0; } else { -- cgit v1.2.3 From 4d8bb00604b182b62e7786bae0e58e0befeeff85 Mon Sep 17 00:00:00 2001 From: Piotr Luc Date: Fri, 20 Jan 2017 14:22:37 +0100 Subject: x86/cpufeature: Enable RING3MWAIT for Knights Mill Enable ring 3 MONITOR/MWAIT for Intel Xeon Phi codenamed Knights Mill. We can't guarantee that this (KNM) will be the last CPU model that needs this hack. But, we do recognize that this is far from optimal, and there is an effort to ensure we don't keep doing extending this hack forever. Signed-off-by: Piotr Luc Cc: Piotr.Luc@intel.com Cc: dave.hansen@linux.intel.com Link: http://lkml.kernel.org/r/1484918557-15481-6-git-send-email-grzegorz.andrejczuk@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index da2401a4b0f4..a4c4ff9b27e4 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -79,8 +79,15 @@ static void probe_xeon_phi_r3mwait(struct cpuinfo_x86 *c) * Ring 3 MONITOR/MWAIT feature cannot be detected without * cpu model and family comparison. */ - if (c->x86 != 6 || c->x86_model != INTEL_FAM6_XEON_PHI_KNL) + if (c->x86 != 6) return; + switch (c->x86_model) { + case INTEL_FAM6_XEON_PHI_KNL: + case INTEL_FAM6_XEON_PHI_KNM: + break; + default: + return; + } if (ring3mwait_disabled) { msr_clear_bit(MSR_MISC_FEATURE_ENABLES, -- cgit v1.2.3 From 79a8b9aa388b0620cc1d525d7c0f0d9a8a85e08e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sun, 5 Feb 2017 11:50:21 +0100 Subject: x86/CPU/AMD: Bring back Compute Unit ID Commit: a33d331761bc ("x86/CPU/AMD: Fix Bulldozer topology") restored the initial approach we had with the Fam15h topology of enumerating CU (Compute Unit) threads as cores. And this is still correct - they're beefier than HT threads but still have some shared functionality. Our current approach has a problem with the Mad Max Steam game, for example. Yves Dionne reported a certain "choppiness" while playing on v4.9.5. That problem stems most likely from the fact that the CU threads share resources within one CU and when we schedule to a thread of a different compute unit, this incurs latency due to migrating the working set to a different CU through the caches. When the thread siblings mask mirrors that aspect of the CUs and threads, the scheduler pays attention to it and tries to schedule within one CU first. Which takes care of the latency, of course. Reported-by: Yves Dionne Signed-off-by: Borislav Petkov Cc: # 4.9 Cc: Brice Goglin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Yazen Ghannam Link: http://lkml.kernel.org/r/20170205105022.8705-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 1 + arch/x86/kernel/cpu/amd.c | 9 ++++++++- arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/smpboot.c | 12 +++++++++--- 4 files changed, 19 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1be64da0384e..e6cfe7ba2d65 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -104,6 +104,7 @@ struct cpuinfo_x86 { __u8 x86_phys_bits; /* CPUID returned core id bits: */ __u8 x86_coreid_bits; + __u8 cu_id; /* Max extended CPUID function supported: */ __u32 extended_cpuid_level; /* Maximum supported CPUID level, -1=no CPUID: */ diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 1d3167269a67..20dc44d1e6be 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -309,8 +309,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c) /* get information required for multi-node processors */ if (boot_cpu_has(X86_FEATURE_TOPOEXT)) { + u32 eax, ebx, ecx, edx; - node_id = cpuid_ecx(0x8000001e) & 7; + cpuid(0x8000001e, &eax, &ebx, &ecx, &edx); + + node_id = ecx & 0xff; + smp_num_siblings = ((ebx >> 8) & 0xff) + 1; + + if (c->x86 == 0x15) + c->cu_id = ebx & 0xff; /* * We may have multiple LLCs if L3 caches exist, so check if we diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index 9bab7a8a4293..ede03e849a8b 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1015,6 +1015,7 @@ static void identify_cpu(struct cpuinfo_x86 *c) c->x86_model_id[0] = '\0'; /* Unset */ c->x86_max_cores = 1; c->x86_coreid_bits = 0; + c->cu_id = 0xff; #ifdef CONFIG_X86_64 c->x86_clflush_size = 64; c->x86_phys_bits = 36; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 46732dc3b73c..99b920d0e516 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -433,9 +433,15 @@ static bool match_smt(struct cpuinfo_x86 *c, struct cpuinfo_x86 *o) int cpu1 = c->cpu_index, cpu2 = o->cpu_index; if (c->phys_proc_id == o->phys_proc_id && - per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2) && - c->cpu_core_id == o->cpu_core_id) - return topology_sane(c, o, "smt"); + per_cpu(cpu_llc_id, cpu1) == per_cpu(cpu_llc_id, cpu2)) { + if (c->cpu_core_id == o->cpu_core_id) + return topology_sane(c, o, "smt"); + + if ((c->cu_id != 0xff) && + (o->cu_id != 0xff) && + (c->cu_id == o->cu_id)) + return topology_sane(c, o, "smt"); + } } else if (c->phys_proc_id == o->phys_proc_id && c->cpu_core_id == o->cpu_core_id) { -- cgit v1.2.3 From 08b259631b5a1d912af4832847b5642f377d9101 Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Sun, 5 Feb 2017 11:50:22 +0100 Subject: x86/CPU/AMD: Fix Zen SMT topology After: a33d331761bc ("x86/CPU/AMD: Fix Bulldozer topology") our SMT scheduling topology for Fam17h systems is broken, because the ThreadId is included in the ApicId when SMT is enabled. So, without further decoding cpu_core_id is unique for each thread rather than the same for threads on the same core. This didn't affect systems with SMT disabled. Make cpu_core_id be what it is defined to be. Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Cc: # 4.9 Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170205105022.8705-2-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 20dc44d1e6be..2b4cf04239b6 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -319,6 +319,13 @@ static void amd_get_topology(struct cpuinfo_x86 *c) if (c->x86 == 0x15) c->cu_id = ebx & 0xff; + if (c->x86 >= 0x17) { + c->cpu_core_id = ebx & 0xff; + + if (smp_num_siblings > 1) + c->x86_max_cores /= smp_num_siblings; + } + /* * We may have multiple LLCs if L3 caches exist, so check if we * have an L3 cache by looking at the L3 cache CPUID leaf. -- cgit v1.2.3 From b6263178b8dbd9fe70d55f136c2a1da39309520e Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Mon, 6 Feb 2017 18:55:43 +0900 Subject: kprobes/x86: Use hlist_for_each_entry() instead of hlist_for_each_entry_safe() Use hlist_for_each_entry() in the first loop in the kretprobe trampoline_handler() function, because it doesn't change the hlist. Signed-off-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/148637493309.19245.12546866092052500584.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index eb3509338ae0..520b8dfe1640 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -745,7 +745,7 @@ __visible __used void *trampoline_handler(struct pt_regs *regs) * will be the real return address, and all the rest will * point to kretprobe_trampoline. */ - hlist_for_each_entry_safe(ri, tmp, head, hlist) { + hlist_for_each_entry(ri, head, hlist) { if (ri->task != current) /* another task is sharing our hash bucket */ continue; -- cgit v1.2.3 From 5773ebfee729acf93b330664eab4c8d77edc2193 Mon Sep 17 00:00:00 2001 From: Niklas Cassel Date: Mon, 6 Feb 2017 17:43:49 +0100 Subject: x86/kconfig: Remove misleading note regarding hibernation and KASLR There used to be a restriction with KASLR and hibernation, but this is no longer true, and since commit: 65fe935dd238 ("x86/KASLR, x86/power: Remove x86 hibernation restrictions") the parameter "kaslr" does no longer exist. Signed-off-by: Niklas Cassel Cc: Kees Cook Cc: Linus Torvalds Cc: Niklas Cassel Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1486399429-23078-1-git-send-email-niklass@axis.com Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 4 ---- 1 file changed, 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e487493bbd47..4e6dbca03aed 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1994,10 +1994,6 @@ config RANDOMIZE_BASE theoretically possible, but the implementations are further limited due to memory layouts. - If CONFIG_HIBERNATE is also enabled, KASLR is disabled at boot - time. To enable it, boot with "kaslr" on the kernel command - line (which will also disable hibernation). - If unsure, say N. # Relocation on x86 needs some additional build support -- cgit v1.2.3 From 543113d2f4b5dd40d46a95502effe86b845dfe34 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Tue, 7 Feb 2017 12:44:48 +0800 Subject: x86/apic: Fix a typo in a comment line s/bringin /bringing Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: trivial@kernel.org Link: http://lkml.kernel.org/r/1486442688-24690-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index fdb9c46227cc..8567c851172c 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1246,7 +1246,7 @@ static void lapic_setup_esr(void) /** * setup_local_APIC - setup the local APIC * - * Used to setup local APIC while initializing BSP or bringin up APs. + * Used to setup local APIC while initializing BSP or bringing up APs. * Always called with preemption disabled. */ void setup_local_APIC(void) -- cgit v1.2.3 From a2cd2f3f29f26782b7484b32e2af172e29313717 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 6 Feb 2017 11:22:40 +0000 Subject: x86/efi: Allow invocation of arbitrary runtime services Provide the ability to perform mixed-mode runtime service calls for x86 in the same way the following commit provided the ability to invoke for boot services: 0a637ee61247bd ("x86/efi: Allow invocation of arbitrary boot services") Suggested-by: Lukas Wunner Signed-off-by: David Howells Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1486380166-31868-2-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/boot/compressed/eboot.c | 1 + arch/x86/boot/compressed/head_32.S | 6 +++--- arch/x86/boot/compressed/head_64.S | 8 ++++---- arch/x86/include/asm/efi.h | 5 +++++ 4 files changed, 13 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index 6d3aeabbce68..f99978db6b6f 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -32,6 +32,7 @@ static void setup_boot_services##bits(struct efi_config *c) \ \ table = (typeof(table))sys_table; \ \ + c->runtime_services = table->runtime; \ c->boot_services = table->boottime; \ c->text_output = table->con_out; \ } diff --git a/arch/x86/boot/compressed/head_32.S b/arch/x86/boot/compressed/head_32.S index fd0b6a272dd5..d85b9625e836 100644 --- a/arch/x86/boot/compressed/head_32.S +++ b/arch/x86/boot/compressed/head_32.S @@ -82,7 +82,7 @@ ENTRY(efi_pe_entry) /* Relocate efi_config->call() */ leal efi32_config(%esi), %eax - add %esi, 32(%eax) + add %esi, 40(%eax) pushl %eax call make_boot_params @@ -108,7 +108,7 @@ ENTRY(efi32_stub_entry) /* Relocate efi_config->call() */ leal efi32_config(%esi), %eax - add %esi, 32(%eax) + add %esi, 40(%eax) pushl %eax 2: call efi_main @@ -264,7 +264,7 @@ relocated: #ifdef CONFIG_EFI_STUB .data efi32_config: - .fill 4,8,0 + .fill 5,8,0 .long efi_call_phys .long 0 .byte 0 diff --git a/arch/x86/boot/compressed/head_64.S b/arch/x86/boot/compressed/head_64.S index 4d85e600db78..d2ae1f821e0c 100644 --- a/arch/x86/boot/compressed/head_64.S +++ b/arch/x86/boot/compressed/head_64.S @@ -264,7 +264,7 @@ ENTRY(efi_pe_entry) /* * Relocate efi_config->call(). */ - addq %rbp, efi64_config+32(%rip) + addq %rbp, efi64_config+40(%rip) movq %rax, %rdi call make_boot_params @@ -284,7 +284,7 @@ handover_entry: * Relocate efi_config->call(). */ movq efi_config(%rip), %rax - addq %rbp, 32(%rax) + addq %rbp, 40(%rax) 2: movq efi_config(%rip), %rdi call efi_main @@ -456,14 +456,14 @@ efi_config: #ifdef CONFIG_EFI_MIXED .global efi32_config efi32_config: - .fill 4,8,0 + .fill 5,8,0 .quad efi64_thunk .byte 0 #endif .global efi64_config efi64_config: - .fill 4,8,0 + .fill 5,8,0 .quad efi_call .byte 1 #endif /* CONFIG_EFI_STUB */ diff --git a/arch/x86/include/asm/efi.h b/arch/x86/include/asm/efi.h index e99675b9c861..2f77bcefe6b4 100644 --- a/arch/x86/include/asm/efi.h +++ b/arch/x86/include/asm/efi.h @@ -191,6 +191,7 @@ static inline efi_status_t efi_thunk_set_virtual_address_map( struct efi_config { u64 image_handle; u64 table; + u64 runtime_services; u64 boot_services; u64 text_output; efi_status_t (*call)(unsigned long, ...); @@ -226,6 +227,10 @@ static inline bool efi_is_64bit(void) #define __efi_call_early(f, ...) \ __efi_early()->call((unsigned long)f, __VA_ARGS__); +#define efi_call_runtime(f, ...) \ + __efi_early()->call(efi_table_attr(efi_runtime_services, f, \ + __efi_early()->runtime_services), __VA_ARGS__) + extern bool efi_reboot_required(void); #else -- cgit v1.2.3 From de8cb458625c164bb3f93c4e415e479afce8fa9d Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 6 Feb 2017 11:22:43 +0000 Subject: efi: Get and store the secure boot status Get the firmware's secure-boot status in the kernel boot wrapper and stash it somewhere that the main kernel image can find. The efi_get_secureboot() function is extracted from the ARM stub and (a) generalised so that it can be called from x86 and (b) made to use efi_call_runtime() so that it can be run in mixed-mode. For x86, it is stored in boot_params and can be overridden by the boot loader or kexec. This allows secure-boot mode to be passed on to a new kernel. Suggested-by: Lukas Wunner Signed-off-by: David Howells Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1486380166-31868-5-git-send-email-ard.biesheuvel@linaro.org [ Small readability edits. ] Signed-off-by: Ingo Molnar --- Documentation/x86/zero-page.txt | 2 + arch/x86/boot/compressed/eboot.c | 7 ++++ arch/x86/include/uapi/asm/bootparam.h | 3 +- arch/x86/kernel/asm-offsets.c | 1 + drivers/firmware/efi/libstub/Makefile | 2 +- drivers/firmware/efi/libstub/arm-stub.c | 63 +++---------------------------- drivers/firmware/efi/libstub/secureboot.c | 61 ++++++++++++++++++++++++++++++ include/linux/efi.h | 8 ++++ 8 files changed, 88 insertions(+), 59 deletions(-) create mode 100644 drivers/firmware/efi/libstub/secureboot.c (limited to 'arch/x86') diff --git a/Documentation/x86/zero-page.txt b/Documentation/x86/zero-page.txt index 95a4d34af3fd..b8527c6b7646 100644 --- a/Documentation/x86/zero-page.txt +++ b/Documentation/x86/zero-page.txt @@ -31,6 +31,8 @@ Offset Proto Name Meaning 1E9/001 ALL eddbuf_entries Number of entries in eddbuf (below) 1EA/001 ALL edd_mbr_sig_buf_entries Number of entries in edd_mbr_sig_buffer (below) +1EB/001 ALL kbd_status Numlock is enabled +1EC/001 ALL secure_boot Secure boot is enabled in the firmware 1EF/001 ALL sentinel Used to detect broken bootloaders 290/040 ALL edd_mbr_sig_buffer EDD MBR signatures 2D0/A00 ALL e820_map E820 memory map table diff --git a/arch/x86/boot/compressed/eboot.c b/arch/x86/boot/compressed/eboot.c index f99978db6b6f..801c7a158e55 100644 --- a/arch/x86/boot/compressed/eboot.c +++ b/arch/x86/boot/compressed/eboot.c @@ -988,6 +988,13 @@ struct boot_params *efi_main(struct efi_config *c, else setup_boot_services32(efi_early); + /* + * If the boot loader gave us a value for secure_boot then we use that, + * otherwise we ask the BIOS. + */ + if (boot_params->secure_boot == efi_secureboot_mode_unset) + boot_params->secure_boot = efi_get_secureboot(sys_table); + setup_graphics(boot_params); setup_efi_pci(boot_params); diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index b10bf319ed20..5138dacf8bb8 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -135,7 +135,8 @@ struct boot_params { __u8 eddbuf_entries; /* 0x1e9 */ __u8 edd_mbr_sig_buf_entries; /* 0x1ea */ __u8 kbd_status; /* 0x1eb */ - __u8 _pad5[3]; /* 0x1ec */ + __u8 secure_boot; /* 0x1ec */ + __u8 _pad5[2]; /* 0x1ed */ /* * The sentinel is set to a nonzero value (0xff) in header.S. * diff --git a/arch/x86/kernel/asm-offsets.c b/arch/x86/kernel/asm-offsets.c index c62e015b126c..de827d6ac8c2 100644 --- a/arch/x86/kernel/asm-offsets.c +++ b/arch/x86/kernel/asm-offsets.c @@ -81,6 +81,7 @@ void common(void) { BLANK(); OFFSET(BP_scratch, boot_params, scratch); + OFFSET(BP_secure_boot, boot_params, secure_boot); OFFSET(BP_loadflags, boot_params, hdr.loadflags); OFFSET(BP_hardware_subarch, boot_params, hdr.hardware_subarch); OFFSET(BP_version, boot_params, hdr.version); diff --git a/drivers/firmware/efi/libstub/Makefile b/drivers/firmware/efi/libstub/Makefile index 33e0e2f1a730..f7425960f6a5 100644 --- a/drivers/firmware/efi/libstub/Makefile +++ b/drivers/firmware/efi/libstub/Makefile @@ -28,7 +28,7 @@ OBJECT_FILES_NON_STANDARD := y # Prevents link failures: __sanitizer_cov_trace_pc() is not linked in. KCOV_INSTRUMENT := n -lib-y := efi-stub-helper.o gop.o +lib-y := efi-stub-helper.o gop.o secureboot.o # include the stub's generic dependencies from lib/ when building for ARM/arm64 arm-deps := fdt_rw.c fdt_ro.c fdt_wip.c fdt.c fdt_empty_tree.c fdt_sw.c sort.c diff --git a/drivers/firmware/efi/libstub/arm-stub.c b/drivers/firmware/efi/libstub/arm-stub.c index 6fca48c9e054..d4056c6be1ec 100644 --- a/drivers/firmware/efi/libstub/arm-stub.c +++ b/drivers/firmware/efi/libstub/arm-stub.c @@ -20,52 +20,6 @@ bool __nokaslr; -static int efi_get_secureboot(efi_system_table_t *sys_table_arg) -{ - static efi_char16_t const sb_var_name[] = { - 'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 }; - static efi_char16_t const sm_var_name[] = { - 'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0 }; - - efi_guid_t var_guid = EFI_GLOBAL_VARIABLE_GUID; - efi_get_variable_t *f_getvar = sys_table_arg->runtime->get_variable; - u8 val; - unsigned long size = sizeof(val); - efi_status_t status; - - status = f_getvar((efi_char16_t *)sb_var_name, (efi_guid_t *)&var_guid, - NULL, &size, &val); - - if (status != EFI_SUCCESS) - goto out_efi_err; - - if (val == 0) - return 0; - - status = f_getvar((efi_char16_t *)sm_var_name, (efi_guid_t *)&var_guid, - NULL, &size, &val); - - if (status != EFI_SUCCESS) - goto out_efi_err; - - if (val == 1) - return 0; - - return 1; - -out_efi_err: - switch (status) { - case EFI_NOT_FOUND: - return 0; - case EFI_DEVICE_ERROR: - return -EIO; - case EFI_SECURITY_VIOLATION: - return -EACCES; - default: - return -EINVAL; - } -} - efi_status_t efi_open_volume(efi_system_table_t *sys_table_arg, void *__image, void **__fh) { @@ -157,7 +111,7 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, efi_guid_t loaded_image_proto = LOADED_IMAGE_PROTOCOL_GUID; unsigned long reserve_addr = 0; unsigned long reserve_size = 0; - int secure_boot = 0; + enum efi_secureboot_mode secure_boot; struct screen_info *si; /* Check if we were booted by the EFI firmware */ @@ -227,19 +181,14 @@ unsigned long efi_entry(void *handle, efi_system_table_t *sys_table, pr_efi_err(sys_table, "Failed to parse EFI cmdline options\n"); secure_boot = efi_get_secureboot(sys_table); - if (secure_boot > 0) - pr_efi(sys_table, "UEFI Secure Boot is enabled.\n"); - - if (secure_boot < 0) { - pr_efi_err(sys_table, - "could not determine UEFI Secure Boot status.\n"); - } /* - * Unauthenticated device tree data is a security hazard, so - * ignore 'dtb=' unless UEFI Secure Boot is disabled. + * Unauthenticated device tree data is a security hazard, so ignore + * 'dtb=' unless UEFI Secure Boot is disabled. We assume that secure + * boot is enabled if we can't determine its state. */ - if (secure_boot != 0 && strstr(cmdline_ptr, "dtb=")) { + if (secure_boot != efi_secureboot_mode_disabled && + strstr(cmdline_ptr, "dtb=")) { pr_efi(sys_table, "Ignoring DTB from command line.\n"); } else { status = handle_cmdline_files(sys_table, image, cmdline_ptr, diff --git a/drivers/firmware/efi/libstub/secureboot.c b/drivers/firmware/efi/libstub/secureboot.c new file mode 100644 index 000000000000..b20b8b460d77 --- /dev/null +++ b/drivers/firmware/efi/libstub/secureboot.c @@ -0,0 +1,61 @@ +/* + * Secure boot handling. + * + * Copyright (C) 2013,2014 Linaro Limited + * Roy Franz + * + * This file is part of the Linux kernel, and is made available under the + * terms of the GNU General Public License version 2. + */ +#include +#include + +/* BIOS variables */ +static const efi_guid_t efi_variable_guid = EFI_GLOBAL_VARIABLE_GUID; +static const efi_char16_t const efi_SecureBoot_name[] = { + 'S', 'e', 'c', 'u', 'r', 'e', 'B', 'o', 'o', 't', 0 +}; +static const efi_char16_t const efi_SetupMode_name[] = { + 'S', 'e', 't', 'u', 'p', 'M', 'o', 'd', 'e', 0 +}; + +#define get_efi_var(name, vendor, ...) \ + efi_call_runtime(get_variable, \ + (efi_char16_t *)(name), (efi_guid_t *)(vendor), \ + __VA_ARGS__); + +/* + * Determine whether we're in secure boot mode. + */ +enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table_arg) +{ + u8 secboot, setupmode; + unsigned long size; + efi_status_t status; + + size = sizeof(secboot); + status = get_efi_var(efi_SecureBoot_name, &efi_variable_guid, + NULL, &size, &secboot); + if (status != EFI_SUCCESS) + goto out_efi_err; + + size = sizeof(setupmode); + status = get_efi_var(efi_SetupMode_name, &efi_variable_guid, + NULL, &size, &setupmode); + if (status != EFI_SUCCESS) + goto out_efi_err; + + if (secboot == 0 || setupmode == 1) + return efi_secureboot_mode_disabled; + + pr_efi(sys_table_arg, "UEFI Secure Boot is enabled.\n"); + return efi_secureboot_mode_enabled; + +out_efi_err: + pr_efi_err(sys_table_arg, "Could not determine UEFI Secure Boot status.\n"); + if (status == EFI_NOT_FOUND) + return efi_secureboot_mode_disabled; + return efi_secureboot_mode_unknown; +} diff --git a/include/linux/efi.h b/include/linux/efi.h index d00538a65899..94d34e0be24f 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1480,6 +1480,14 @@ efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, bool efi_runtime_disabled(void); extern void efi_call_virt_check_flags(unsigned long flags, const char *call); +enum efi_secureboot_mode { + efi_secureboot_mode_unset, + efi_secureboot_mode_unknown, + efi_secureboot_mode_disabled, + efi_secureboot_mode_enabled, +}; +enum efi_secureboot_mode efi_get_secureboot(efi_system_table_t *sys_table); + /* * Arch code can implement the following three template macros, avoiding * reptition for the void/non-void return cases of {__,}efi_call_virt(): -- cgit v1.2.3 From 9661b332041dab63ba2e5222b40a9f916c1368a9 Mon Sep 17 00:00:00 2001 From: David Howells Date: Mon, 6 Feb 2017 11:22:45 +0000 Subject: efi: Print the secure boot status in x86 setup_arch() Print the secure boot status in the x86 setup_arch() function, but otherwise do nothing more for now. More functionality will be added later, but this at least allows for testing. Signed-off-by: David Howells [ Use efi_enabled() instead of IS_ENABLED(CONFIG_EFI). ] Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1486380166-31868-7-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- arch/x86/kernel/setup.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c index 4cfba947d774..69780edf0dde 100644 --- a/arch/x86/kernel/setup.c +++ b/arch/x86/kernel/setup.c @@ -1176,6 +1176,20 @@ void __init setup_arch(char **cmdline_p) /* Allocate bigger log buffer */ setup_log_buf(1); + if (efi_enabled(EFI_BOOT)) { + switch (boot_params.secure_boot) { + case efi_secureboot_mode_disabled: + pr_info("Secure boot disabled\n"); + break; + case efi_secureboot_mode_enabled: + pr_info("Secure boot enabled\n"); + break; + default: + pr_info("Secure boot could not be determined\n"); + break; + } + } + reserve_initrd(); acpi_table_upgrade(); -- cgit v1.2.3 From febf2407418a2d6c042fcd77b206040449cb9a70 Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Mon, 6 Feb 2017 18:01:51 +0100 Subject: x86/ACPI: keep x86_cpu_to_acpiid mapping valid on CPU hotplug We may or may not have all possible CPUs in MADT on boot but in any case we're overwriting x86_cpu_to_acpiid mapping with U32_MAX when acpi_register_lapic() is called again on the CPU hotplug path: acpi_processor_hotadd_init() -> acpi_map_cpu() -> acpi_register_lapic() As we have the required acpi_id information in acpi_processor_hotadd_init() propagate it to acpi_map_cpu() to always keep x86_cpu_to_acpiid mapping valid. Reported-by: Andrew Jones Signed-off-by: Vitaly Kuznetsov Signed-off-by: Rafael J. Wysocki --- arch/ia64/kernel/acpi.c | 3 ++- arch/x86/kernel/acpi/boot.c | 5 +++-- drivers/acpi/acpi_processor.c | 4 ++-- include/linux/acpi.h | 3 ++- 4 files changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/acpi.c b/arch/ia64/kernel/acpi.c index 9273e034b730..7508c306aa9e 100644 --- a/arch/ia64/kernel/acpi.c +++ b/arch/ia64/kernel/acpi.c @@ -887,7 +887,8 @@ static int _acpi_map_lsapic(acpi_handle handle, int physid, int *pcpu) } /* wrapper to silence section mismatch warning */ -int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) +int __ref acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, + int *pcpu) { return _acpi_map_lsapic(handle, physid, pcpu); } diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 64422f850e95..04bc5f3f9aa1 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -723,11 +723,12 @@ int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) return 0; } -int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu) +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, + int *pcpu) { int cpu; - cpu = acpi_register_lapic(physid, U32_MAX, ACPI_MADT_ENABLED); + cpu = acpi_register_lapic(physid, acpi_id, ACPI_MADT_ENABLED); if (cpu < 0) { pr_info(PREFIX "Unable to map lapic to logical cpu number\n"); return cpu; diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 3de3b6b8f0f1..4467a8089ab8 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -165,7 +165,7 @@ static int acpi_processor_errata(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU int __weak acpi_map_cpu(acpi_handle handle, - phys_cpuid_t physid, int *pcpu) + phys_cpuid_t physid, u32 acpi_id, int *pcpu) { return -ENODEV; } @@ -203,7 +203,7 @@ static int acpi_processor_hotadd_init(struct acpi_processor *pr) cpu_maps_update_begin(); cpu_hotplug_begin(); - ret = acpi_map_cpu(pr->handle, pr->phys_id, &pr->id); + ret = acpi_map_cpu(pr->handle, pr->phys_id, pr->acpi_id, &pr->id); if (ret) goto out; diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 5b36974ed60a..6ab47e92c65a 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -291,7 +291,8 @@ bool acpi_processor_validate_proc_id(int proc_id); #ifdef CONFIG_ACPI_HOTPLUG_CPU /* Arch dependent functions for cpu hotplug support */ -int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, int *pcpu); +int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, + int *pcpu); int acpi_unmap_cpu(int cpu); int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -- cgit v1.2.3 From 5a7670ee23f2c07a639c263b70140eaf1da9f68f Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Fri, 3 Feb 2017 16:57:22 -0500 Subject: x86/boot/32: Convert the 32-bit pgtable setup code from assembly to C The new Xen PVH entry point requires page tables to be setup by the kernel since it is entered with paging disabled. Pull the common code out of head_32.S so that mk_early_pgtbl_32() can be invoked from both the new Xen entry point and the existing startup_32() code. Convert resulting common code to C. Signed-off-by: Boris Ostrovsky Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: matt@codeblueprint.co.uk Cc: xen-devel@lists.xenproject.org Link: http://lkml.kernel.org/r/1481215471-9639-1-git-send-email-boris.ostrovsky@oracle.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/pgtable_32.h | 32 ++++++++++ arch/x86/kernel/head32.c | 62 +++++++++++++++++++ arch/x86/kernel/head_32.S | 121 +++----------------------------------- 3 files changed, 101 insertions(+), 114 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable_32.h b/arch/x86/include/asm/pgtable_32.h index b6c0b404898a..fbc73360aea0 100644 --- a/arch/x86/include/asm/pgtable_32.h +++ b/arch/x86/include/asm/pgtable_32.h @@ -27,6 +27,7 @@ struct vm_area_struct; extern pgd_t swapper_pg_dir[1024]; extern pgd_t initial_page_table[1024]; +extern pmd_t initial_pg_pmd[]; static inline void pgtable_cache_init(void) { } static inline void check_pgt_cache(void) { } @@ -75,4 +76,35 @@ do { \ #define kern_addr_valid(kaddr) (0) #endif +/* + * This is how much memory in addition to the memory covered up to + * and including _end we need mapped initially. + * We need: + * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) + * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) + * + * Modulo rounding, each megabyte assigned here requires a kilobyte of + * memory, which is currently unreclaimed. + * + * This should be a multiple of a page. + * + * KERNEL_IMAGE_SIZE should be greater than pa(_end) + * and small than max_low_pfn, otherwise will waste some page table entries + */ +#if PTRS_PER_PMD > 1 +#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) +#else +#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) +#endif + +/* + * Number of possible pages in the lowmem region. + * + * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a + * gas warning about overflowing shift count when gas has been compiled + * with only a host target support using a 32-bit type for internal + * representation. + */ +#define LOWMEM_PAGES ((((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT)) + #endif /* _ASM_X86_PGTABLE_32_H */ diff --git a/arch/x86/kernel/head32.c b/arch/x86/kernel/head32.c index f16c55bfc090..e5fb436a6548 100644 --- a/arch/x86/kernel/head32.c +++ b/arch/x86/kernel/head32.c @@ -49,3 +49,65 @@ asmlinkage __visible void __init i386_start_kernel(void) start_kernel(); } + +/* + * Initialize page tables. This creates a PDE and a set of page + * tables, which are located immediately beyond __brk_base. The variable + * _brk_end is set up to point to the first "safe" location. + * Mappings are created both at virtual address 0 (identity mapping) + * and PAGE_OFFSET for up to _end. + * + * In PAE mode initial_page_table is statically defined to contain + * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 + * entries). The identity mapping is handled by pointing two PGD entries + * to the first kernel PMD. Note the upper half of each PMD or PTE are + * always zero at this stage. + */ +void __init mk_early_pgtbl_32(void) +{ +#ifdef __pa +#undef __pa +#endif +#define __pa(x) ((unsigned long)(x) - PAGE_OFFSET) + pte_t pte, *ptep; + int i; + unsigned long *ptr; + /* Enough space to fit pagetables for the low memory linear map */ + const unsigned long limit = __pa(_end) + + (PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT); +#ifdef CONFIG_X86_PAE + pmd_t pl2, *pl2p = (pmd_t *)__pa(initial_pg_pmd); +#define SET_PL2(pl2, val) { (pl2).pmd = (val); } +#else + pgd_t pl2, *pl2p = (pgd_t *)__pa(initial_page_table); +#define SET_PL2(pl2, val) { (pl2).pgd = (val); } +#endif + + ptep = (pte_t *)__pa(__brk_base); + pte.pte = PTE_IDENT_ATTR; + + while ((pte.pte & PTE_PFN_MASK) < limit) { + + SET_PL2(pl2, (unsigned long)ptep | PDE_IDENT_ATTR); + *pl2p = pl2; +#ifndef CONFIG_X86_PAE + /* Kernel PDE entry */ + *(pl2p + ((PAGE_OFFSET >> PGDIR_SHIFT))) = pl2; +#endif + for (i = 0; i < PTRS_PER_PTE; i++) { + *ptep = pte; + pte.pte += PAGE_SIZE; + ptep++; + } + + pl2p++; + } + + ptr = (unsigned long *)__pa(&max_pfn_mapped); + /* Can't use pte_pfn() since it's a call with CONFIG_PARAVIRT */ + *ptr = (pte.pte & PTE_PFN_MASK) >> PAGE_SHIFT; + + ptr = (unsigned long *)__pa(&_brk_end); + *ptr = (unsigned long)ptep + PAGE_OFFSET; +} + diff --git a/arch/x86/kernel/head_32.S b/arch/x86/kernel/head_32.S index 4e8577d03372..1f85ee8f9439 100644 --- a/arch/x86/kernel/head_32.S +++ b/arch/x86/kernel/head_32.S @@ -24,6 +24,7 @@ #include #include #include +#include /* Physical address */ #define pa(X) ((X) - __PAGE_OFFSET) @@ -41,43 +42,9 @@ #define X86_CAPABILITY new_cpu_data+CPUINFO_x86_capability #define X86_VENDOR_ID new_cpu_data+CPUINFO_x86_vendor_id -/* - * This is how much memory in addition to the memory covered up to - * and including _end we need mapped initially. - * We need: - * (KERNEL_IMAGE_SIZE/4096) / 1024 pages (worst case, non PAE) - * (KERNEL_IMAGE_SIZE/4096) / 512 + 4 pages (worst case for PAE) - * - * Modulo rounding, each megabyte assigned here requires a kilobyte of - * memory, which is currently unreclaimed. - * - * This should be a multiple of a page. - * - * KERNEL_IMAGE_SIZE should be greater than pa(_end) - * and small than max_low_pfn, otherwise will waste some page table entries - */ - -#if PTRS_PER_PMD > 1 -#define PAGE_TABLE_SIZE(pages) (((pages) / PTRS_PER_PMD) + PTRS_PER_PGD) -#else -#define PAGE_TABLE_SIZE(pages) ((pages) / PTRS_PER_PGD) -#endif #define SIZEOF_PTREGS 17*4 -/* - * Number of possible pages in the lowmem region. - * - * We shift 2 by 31 instead of 1 by 32 to the left in order to avoid a - * gas warning about overflowing shift count when gas has been compiled - * with only a host target support using a 32-bit type for internal - * representation. - */ -LOWMEM_PAGES = (((2<<31) - __PAGE_OFFSET) >> PAGE_SHIFT) - -/* Enough space to fit pagetables for the low memory linear map */ -MAPPING_BEYOND_END = PAGE_TABLE_SIZE(LOWMEM_PAGES) << PAGE_SHIFT - /* * Worst-case size of the kernel mapping we need to make: * a relocatable kernel can live anywhere in lowmem, so we need to be able @@ -160,90 +127,15 @@ ENTRY(startup_32) call load_ucode_bsp #endif -/* - * Initialize page tables. This creates a PDE and a set of page - * tables, which are located immediately beyond __brk_base. The variable - * _brk_end is set up to point to the first "safe" location. - * Mappings are created both at virtual address 0 (identity mapping) - * and PAGE_OFFSET for up to _end. - */ -#ifdef CONFIG_X86_PAE - - /* - * In PAE mode initial_page_table is statically defined to contain - * enough entries to cover the VMSPLIT option (that is the top 1, 2 or 3 - * entries). The identity mapping is handled by pointing two PGD entries - * to the first kernel PMD. - * - * Note the upper half of each PMD or PTE are always zero at this stage. - */ - -#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ - - xorl %ebx,%ebx /* %ebx is kept at zero */ - - movl $pa(__brk_base), %edi - movl $pa(initial_pg_pmd), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PMD entry */ - movl %ecx,(%edx) /* Store PMD entry */ - /* Upper half already zero */ - addl $8,%edx - movl $512,%ecx -11: - stosl - xchgl %eax,%ebx - stosl - xchgl %eax,%ebx - addl $0x1000,%eax - loop 11b - - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b -1: - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) + /* Create early pagetables. */ + call mk_early_pgtbl_32 /* Do early initialization of the fixmap area */ movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax +#ifdef CONFIG_X86_PAE +#define KPMDS (((-__PAGE_OFFSET) >> 30) & 3) /* Number of kernel PMDs */ movl %eax,pa(initial_pg_pmd+0x1000*KPMDS-8) -#else /* Not PAE */ - -page_pde_offset = (__PAGE_OFFSET >> 20); - - movl $pa(__brk_base), %edi - movl $pa(initial_page_table), %edx - movl $PTE_IDENT_ATTR, %eax -10: - leal PDE_IDENT_ATTR(%edi),%ecx /* Create PDE entry */ - movl %ecx,(%edx) /* Store identity PDE entry */ - movl %ecx,page_pde_offset(%edx) /* Store kernel PDE entry */ - addl $4,%edx - movl $1024, %ecx -11: - stosl - addl $0x1000,%eax - loop 11b - /* - * End condition: we must map up to the end + MAPPING_BEYOND_END. - */ - movl $pa(_end) + MAPPING_BEYOND_END + PTE_IDENT_ATTR, %ebp - cmpl %ebp,%eax - jb 10b - addl $__PAGE_OFFSET, %edi - movl %edi, pa(_brk_end) - shrl $12, %eax - movl %eax, pa(max_pfn_mapped) - - /* Do early initialization of the fixmap area */ - movl $pa(initial_pg_fixmap)+PDE_IDENT_ATTR,%eax +#else movl %eax,pa(initial_page_table+0xffc) #endif @@ -666,6 +558,7 @@ ENTRY(setup_once_ref) __PAGE_ALIGNED_BSS .align PAGE_SIZE #ifdef CONFIG_X86_PAE +.globl initial_pg_pmd initial_pg_pmd: .fill 1024*KPMDS,4,0 #else -- cgit v1.2.3 From 063334f30543597430f172bd7690d21e3590e148 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Fri, 3 Feb 2017 16:57:22 -0500 Subject: xen/x86: Remove PVH support We are replacing existing PVH guests with new implementation. We are keeping xen_pvh_domain() macro (for now set to zero) because when we introduce new PVH implementation later in this series we will reuse current PVH-specific code (xen_pvh_gnttab_setup()), and that code is conditioned by 'if (xen_pvh_domain())'. (We will also need a noop xen_pvh_domain() for !CONFIG_XEN_PVH). Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/xen/enlighten.c | 140 ++++++--------------------------------- arch/x86/xen/mmu.c | 21 +----- arch/x86/xen/setup.c | 37 +---------- arch/x86/xen/smp.c | 78 ++++++++-------------- arch/x86/xen/smp.h | 8 --- arch/x86/xen/xen-head.S | 62 ++--------------- arch/x86/xen/xen-ops.h | 1 - drivers/xen/events/events_base.c | 1 - include/xen/xen.h | 13 +--- 9 files changed, 54 insertions(+), 307 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 51ef95232725..828f1b226f56 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1138,10 +1138,11 @@ void xen_setup_vcpu_info_placement(void) xen_vcpu_setup(cpu); } - /* xen_vcpu_setup managed to place the vcpu_info within the - * percpu area for all cpus, so make use of it. Note that for - * PVH we want to use native IRQ mechanism. */ - if (have_vcpu_info_placement && !xen_pvh_domain()) { + /* + * xen_vcpu_setup managed to place the vcpu_info within the + * percpu area for all cpus, so make use of it. + */ + if (have_vcpu_info_placement) { pv_irq_ops.save_fl = __PV_IS_CALLEE_SAVE(xen_save_fl_direct); pv_irq_ops.restore_fl = __PV_IS_CALLEE_SAVE(xen_restore_fl_direct); pv_irq_ops.irq_disable = __PV_IS_CALLEE_SAVE(xen_irq_disable_direct); @@ -1413,49 +1414,9 @@ static void __init xen_boot_params_init_edd(void) * Set up the GDT and segment registers for -fstack-protector. Until * we do this, we have to be careful not to call any stack-protected * function, which is most of the kernel. - * - * Note, that it is __ref because the only caller of this after init - * is PVH which is not going to use xen_load_gdt_boot or other - * __init functions. */ -static void __ref xen_setup_gdt(int cpu) +static void xen_setup_gdt(int cpu) { - if (xen_feature(XENFEAT_auto_translated_physmap)) { -#ifdef CONFIG_X86_64 - unsigned long dummy; - - load_percpu_segment(cpu); /* We need to access per-cpu area */ - switch_to_new_gdt(cpu); /* GDT and GS set */ - - /* We are switching of the Xen provided GDT to our HVM mode - * GDT. The new GDT has __KERNEL_CS with CS.L = 1 - * and we are jumping to reload it. - */ - asm volatile ("pushq %0\n" - "leaq 1f(%%rip),%0\n" - "pushq %0\n" - "lretq\n" - "1:\n" - : "=&r" (dummy) : "0" (__KERNEL_CS)); - - /* - * While not needed, we also set the %es, %ds, and %fs - * to zero. We don't care about %ss as it is NULL. - * Strictly speaking this is not needed as Xen zeros those - * out (and also MSR_FS_BASE, MSR_GS_BASE, MSR_KERNEL_GS_BASE) - * - * Linux zeros them in cpu_init() and in secondary_startup_64 - * (for BSP). - */ - loadsegment(es, 0); - loadsegment(ds, 0); - loadsegment(fs, 0); -#else - /* PVH: TODO Implement. */ - BUG(); -#endif - return; /* PVH does not need any PV GDT ops. */ - } pv_cpu_ops.write_gdt_entry = xen_write_gdt_entry_boot; pv_cpu_ops.load_gdt = xen_load_gdt_boot; @@ -1466,59 +1427,6 @@ static void __ref xen_setup_gdt(int cpu) pv_cpu_ops.load_gdt = xen_load_gdt; } -#ifdef CONFIG_XEN_PVH -/* - * A PV guest starts with default flags that are not set for PVH, set them - * here asap. - */ -static void xen_pvh_set_cr_flags(int cpu) -{ - - /* Some of these are setup in 'secondary_startup_64'. The others: - * X86_CR0_TS, X86_CR0_PE, X86_CR0_ET are set by Xen for HVM guests - * (which PVH shared codepaths), while X86_CR0_PG is for PVH. */ - write_cr0(read_cr0() | X86_CR0_MP | X86_CR0_NE | X86_CR0_WP | X86_CR0_AM); - - if (!cpu) - return; - /* - * For BSP, PSE PGE are set in probe_page_size_mask(), for APs - * set them here. For all, OSFXSR OSXMMEXCPT are set in fpu__init_cpu(). - */ - if (boot_cpu_has(X86_FEATURE_PSE)) - cr4_set_bits_and_update_boot(X86_CR4_PSE); - - if (boot_cpu_has(X86_FEATURE_PGE)) - cr4_set_bits_and_update_boot(X86_CR4_PGE); -} - -/* - * Note, that it is ref - because the only caller of this after init - * is PVH which is not going to use xen_load_gdt_boot or other - * __init functions. - */ -void __ref xen_pvh_secondary_vcpu_init(int cpu) -{ - xen_setup_gdt(cpu); - xen_pvh_set_cr_flags(cpu); -} - -static void __init xen_pvh_early_guest_init(void) -{ - if (!xen_feature(XENFEAT_auto_translated_physmap)) - return; - - BUG_ON(!xen_feature(XENFEAT_hvm_callback_vector)); - - xen_pvh_early_cpu_init(0, false); - xen_pvh_set_cr_flags(0); - -#ifdef CONFIG_X86_32 - BUG(); /* PVH: Implement proper support. */ -#endif -} -#endif /* CONFIG_XEN_PVH */ - static void __init xen_dom0_set_legacy_features(void) { x86_platform.legacy.rtc = 1; @@ -1555,24 +1463,17 @@ asmlinkage __visible void __init xen_start_kernel(void) xen_domain_type = XEN_PV_DOMAIN; xen_setup_features(); -#ifdef CONFIG_XEN_PVH - xen_pvh_early_guest_init(); -#endif + xen_setup_machphys_mapping(); /* Install Xen paravirt ops */ pv_info = xen_info; pv_init_ops = xen_init_ops; - if (!xen_pvh_domain()) { - pv_cpu_ops = xen_cpu_ops; + pv_cpu_ops = xen_cpu_ops; - x86_platform.get_nmi_reason = xen_get_nmi_reason; - } + x86_platform.get_nmi_reason = xen_get_nmi_reason; - if (xen_feature(XENFEAT_auto_translated_physmap)) - x86_init.resources.memory_setup = xen_auto_xlated_memory_setup; - else - x86_init.resources.memory_setup = xen_memory_setup; + x86_init.resources.memory_setup = xen_memory_setup; x86_init.oem.arch_setup = xen_arch_setup; x86_init.oem.banner = xen_banner; @@ -1665,18 +1566,15 @@ asmlinkage __visible void __init xen_start_kernel(void) /* set the limit of our address space */ xen_reserve_top(); - /* PVH: runs at default kernel iopl of 0 */ - if (!xen_pvh_domain()) { - /* - * We used to do this in xen_arch_setup, but that is too late - * on AMD were early_cpu_init (run before ->arch_setup()) calls - * early_amd_init which pokes 0xcf8 port. - */ - set_iopl.iopl = 1; - rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); - if (rc != 0) - xen_raw_printk("physdev_op failed %d\n", rc); - } + /* + * We used to do this in xen_arch_setup, but that is too late + * on AMD were early_cpu_init (run before ->arch_setup()) calls + * early_amd_init which pokes 0xcf8 port. + */ + set_iopl.iopl = 1; + rc = HYPERVISOR_physdev_op(PHYSDEVOP_set_iopl, &set_iopl); + if (rc != 0) + xen_raw_printk("physdev_op failed %d\n", rc); #ifdef CONFIG_X86_32 /* set up basic CPUID stuff */ diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index 7d5afdb417cc..f6740b5b1738 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -1792,10 +1792,6 @@ static void __init set_page_prot_flags(void *addr, pgprot_t prot, unsigned long pfn = __pa(addr) >> PAGE_SHIFT; pte_t pte = pfn_pte(pfn, prot); - /* For PVH no need to set R/O or R/W to pin them or unpin them. */ - if (xen_feature(XENFEAT_auto_translated_physmap)) - return; - if (HYPERVISOR_update_va_mapping((unsigned long)addr, pte, flags)) BUG(); } @@ -1902,8 +1898,7 @@ static void __init check_pt_base(unsigned long *pt_base, unsigned long *pt_end, * level2_ident_pgt, and level2_kernel_pgt. This means that only the * kernel has a physical mapping to start with - but that's enough to * get __va working. We need to fill in the rest of the physical - * mapping once some sort of allocator has been set up. NOTE: for - * PVH, the page tables are native. + * mapping once some sort of allocator has been set up. */ void __init xen_setup_kernel_pagetable(pgd_t *pgd, unsigned long max_pfn) { @@ -2812,16 +2807,6 @@ static int do_remap_gfn(struct vm_area_struct *vma, BUG_ON(!((vma->vm_flags & (VM_PFNMAP | VM_IO)) == (VM_PFNMAP | VM_IO))); - if (xen_feature(XENFEAT_auto_translated_physmap)) { -#ifdef CONFIG_XEN_PVH - /* We need to update the local page tables and the xen HAP */ - return xen_xlate_remap_gfn_array(vma, addr, gfn, nr, err_ptr, - prot, domid, pages); -#else - return -EINVAL; -#endif - } - rmd.mfn = gfn; rmd.prot = prot; /* We use the err_ptr to indicate if there we are doing a contiguous @@ -2915,10 +2900,6 @@ int xen_unmap_domain_gfn_range(struct vm_area_struct *vma, if (!pages || !xen_feature(XENFEAT_auto_translated_physmap)) return 0; -#ifdef CONFIG_XEN_PVH - return xen_xlate_unmap_gfn_range(vma, numpgs, pages); -#else return -EINVAL; -#endif } EXPORT_SYMBOL_GPL(xen_unmap_domain_gfn_range); diff --git a/arch/x86/xen/setup.c b/arch/x86/xen/setup.c index f3f7b41116f7..a8c306cf8868 100644 --- a/arch/x86/xen/setup.c +++ b/arch/x86/xen/setup.c @@ -914,39 +914,6 @@ char * __init xen_memory_setup(void) return "Xen"; } -/* - * Machine specific memory setup for auto-translated guests. - */ -char * __init xen_auto_xlated_memory_setup(void) -{ - struct xen_memory_map memmap; - int i; - int rc; - - memmap.nr_entries = ARRAY_SIZE(xen_e820_map); - set_xen_guest_handle(memmap.buffer, xen_e820_map); - - rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); - if (rc < 0) - panic("No memory map (%d)\n", rc); - - xen_e820_map_entries = memmap.nr_entries; - - sanitize_e820_map(xen_e820_map, ARRAY_SIZE(xen_e820_map), - &xen_e820_map_entries); - - for (i = 0; i < xen_e820_map_entries; i++) - e820_add_region(xen_e820_map[i].addr, xen_e820_map[i].size, - xen_e820_map[i].type); - - /* Remove p2m info, it is not needed. */ - xen_start_info->mfn_list = 0; - xen_start_info->first_p2m_pfn = 0; - xen_start_info->nr_p2m_frames = 0; - - return "Xen"; -} - /* * Set the bit indicating "nosegneg" library variants should be used. * We only need to bother in pure 32-bit mode; compat 32-bit processes @@ -1032,8 +999,8 @@ void __init xen_pvmmu_arch_setup(void) void __init xen_arch_setup(void) { xen_panic_handler_init(); - if (!xen_feature(XENFEAT_auto_translated_physmap)) - xen_pvmmu_arch_setup(); + + xen_pvmmu_arch_setup(); #ifdef CONFIG_ACPI if (!(xen_start_info->flags & SIF_INITDOMAIN)) { diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 311acad7dad2..0dee6f59ea82 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -99,18 +99,8 @@ static void cpu_bringup(void) local_irq_enable(); } -/* - * Note: cpu parameter is only relevant for PVH. The reason for passing it - * is we can't do smp_processor_id until the percpu segments are loaded, for - * which we need the cpu number! So we pass it in rdi as first parameter. - */ -asmlinkage __visible void cpu_bringup_and_idle(int cpu) +asmlinkage __visible void cpu_bringup_and_idle(void) { -#ifdef CONFIG_XEN_PVH - if (xen_feature(XENFEAT_auto_translated_physmap) && - xen_feature(XENFEAT_supervisor_mode_kernel)) - xen_pvh_secondary_vcpu_init(cpu); -#endif cpu_bringup(); cpu_startup_entry(CPUHP_AP_ONLINE_IDLE); } @@ -404,61 +394,47 @@ cpu_initialize_context(unsigned int cpu, struct task_struct *idle) gdt = get_cpu_gdt_table(cpu); #ifdef CONFIG_X86_32 - /* Note: PVH is not yet supported on x86_32. */ ctxt->user_regs.fs = __KERNEL_PERCPU; ctxt->user_regs.gs = __KERNEL_STACK_CANARY; #endif memset(&ctxt->fpu_ctxt, 0, sizeof(ctxt->fpu_ctxt)); - if (!xen_feature(XENFEAT_auto_translated_physmap)) { - ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; - ctxt->flags = VGCF_IN_KERNEL; - ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ - ctxt->user_regs.ds = __USER_DS; - ctxt->user_regs.es = __USER_DS; - ctxt->user_regs.ss = __KERNEL_DS; + ctxt->user_regs.eip = (unsigned long)cpu_bringup_and_idle; + ctxt->flags = VGCF_IN_KERNEL; + ctxt->user_regs.eflags = 0x1000; /* IOPL_RING1 */ + ctxt->user_regs.ds = __USER_DS; + ctxt->user_regs.es = __USER_DS; + ctxt->user_regs.ss = __KERNEL_DS; - xen_copy_trap_info(ctxt->trap_ctxt); + xen_copy_trap_info(ctxt->trap_ctxt); - ctxt->ldt_ents = 0; + ctxt->ldt_ents = 0; - BUG_ON((unsigned long)gdt & ~PAGE_MASK); + BUG_ON((unsigned long)gdt & ~PAGE_MASK); - gdt_mfn = arbitrary_virt_to_mfn(gdt); - make_lowmem_page_readonly(gdt); - make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); + gdt_mfn = arbitrary_virt_to_mfn(gdt); + make_lowmem_page_readonly(gdt); + make_lowmem_page_readonly(mfn_to_virt(gdt_mfn)); - ctxt->gdt_frames[0] = gdt_mfn; - ctxt->gdt_ents = GDT_ENTRIES; + ctxt->gdt_frames[0] = gdt_mfn; + ctxt->gdt_ents = GDT_ENTRIES; - ctxt->kernel_ss = __KERNEL_DS; - ctxt->kernel_sp = idle->thread.sp0; + ctxt->kernel_ss = __KERNEL_DS; + ctxt->kernel_sp = idle->thread.sp0; #ifdef CONFIG_X86_32 - ctxt->event_callback_cs = __KERNEL_CS; - ctxt->failsafe_callback_cs = __KERNEL_CS; + ctxt->event_callback_cs = __KERNEL_CS; + ctxt->failsafe_callback_cs = __KERNEL_CS; #else - ctxt->gs_base_kernel = per_cpu_offset(cpu); -#endif - ctxt->event_callback_eip = - (unsigned long)xen_hypervisor_callback; - ctxt->failsafe_callback_eip = - (unsigned long)xen_failsafe_callback; - ctxt->user_regs.cs = __KERNEL_CS; - per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); - } -#ifdef CONFIG_XEN_PVH - else { - /* - * The vcpu comes on kernel page tables which have the NX pte - * bit set. This means before DS/SS is touched, NX in - * EFER must be set. Hence the following assembly glue code. - */ - ctxt->user_regs.eip = (unsigned long)xen_pvh_early_cpu_init; - ctxt->user_regs.rdi = cpu; - ctxt->user_regs.rsi = true; /* entry == true */ - } + ctxt->gs_base_kernel = per_cpu_offset(cpu); #endif + ctxt->event_callback_eip = + (unsigned long)xen_hypervisor_callback; + ctxt->failsafe_callback_eip = + (unsigned long)xen_failsafe_callback; + ctxt->user_regs.cs = __KERNEL_CS; + per_cpu(xen_cr3, cpu) = __pa(swapper_pg_dir); + ctxt->user_regs.esp = idle->thread.sp0 - sizeof(struct pt_regs); ctxt->ctrlreg[3] = xen_pfn_to_cr3(virt_to_gfn(swapper_pg_dir)); if (HYPERVISOR_vcpu_op(VCPUOP_initialise, xen_vcpu_nr(cpu), ctxt)) diff --git a/arch/x86/xen/smp.h b/arch/x86/xen/smp.h index c5c16dc4f694..9beef333584a 100644 --- a/arch/x86/xen/smp.h +++ b/arch/x86/xen/smp.h @@ -21,12 +21,4 @@ static inline int xen_smp_intr_init(unsigned int cpu) static inline void xen_smp_intr_free(unsigned int cpu) {} #endif /* CONFIG_SMP */ -#ifdef CONFIG_XEN_PVH -extern void xen_pvh_early_cpu_init(int cpu, bool entry); -#else -static inline void xen_pvh_early_cpu_init(int cpu, bool entry) -{ -} -#endif - #endif diff --git a/arch/x86/xen/xen-head.S b/arch/x86/xen/xen-head.S index 7f8d8abf4c1a..37794e42b67d 100644 --- a/arch/x86/xen/xen-head.S +++ b/arch/x86/xen/xen-head.S @@ -16,25 +16,6 @@ #include #include -#ifdef CONFIG_XEN_PVH -#define PVH_FEATURES_STR "|writable_descriptor_tables|auto_translated_physmap|supervisor_mode_kernel" -/* Note the lack of 'hvm_callback_vector'. Older hypervisor will - * balk at this being part of XEN_ELFNOTE_FEATURES, so we put it in - * XEN_ELFNOTE_SUPPORTED_FEATURES which older hypervisors will ignore. - */ -#define PVH_FEATURES ((1 << XENFEAT_writable_page_tables) | \ - (1 << XENFEAT_auto_translated_physmap) | \ - (1 << XENFEAT_supervisor_mode_kernel) | \ - (1 << XENFEAT_hvm_callback_vector)) -/* The XENFEAT_writable_page_tables is not stricly necessary as we set that - * up regardless whether this CONFIG option is enabled or not, but it - * clarifies what the right flags need to be. - */ -#else -#define PVH_FEATURES_STR "" -#define PVH_FEATURES (0) -#endif - __INIT ENTRY(startup_xen) cld @@ -54,41 +35,6 @@ ENTRY(startup_xen) __FINIT -#ifdef CONFIG_XEN_PVH -/* - * xen_pvh_early_cpu_init() - early PVH VCPU initialization - * @cpu: this cpu number (%rdi) - * @entry: true if this is a secondary vcpu coming up on this entry - * point, false if this is the boot CPU being initialized for - * the first time (%rsi) - * - * Note: This is called as a function on the boot CPU, and is the entry point - * on the secondary CPU. - */ -ENTRY(xen_pvh_early_cpu_init) - mov %rsi, %r11 - - /* Gather features to see if NX implemented. */ - mov $0x80000001, %eax - cpuid - mov %edx, %esi - - mov $MSR_EFER, %ecx - rdmsr - bts $_EFER_SCE, %eax - - bt $20, %esi - jnc 1f /* No NX, skip setting it */ - bts $_EFER_NX, %eax -1: wrmsr -#ifdef CONFIG_SMP - cmp $0, %r11b - jne cpu_bringup_and_idle -#endif - ret - -#endif /* CONFIG_XEN_PVH */ - .pushsection .text .balign PAGE_SIZE ENTRY(hypercall_page) @@ -114,10 +60,10 @@ ENTRY(hypercall_page) #endif ELFNOTE(Xen, XEN_ELFNOTE_ENTRY, _ASM_PTR startup_xen) ELFNOTE(Xen, XEN_ELFNOTE_HYPERCALL_PAGE, _ASM_PTR hypercall_page) - ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, .ascii "!writable_page_tables|pae_pgdir_above_4gb"; .asciz PVH_FEATURES_STR) - ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, .long (PVH_FEATURES) | - (1 << XENFEAT_writable_page_tables) | - (1 << XENFEAT_dom0)) + ELFNOTE(Xen, XEN_ELFNOTE_FEATURES, + .ascii "!writable_page_tables|pae_pgdir_above_4gb") + ELFNOTE(Xen, XEN_ELFNOTE_SUPPORTED_FEATURES, + .long (1 << XENFEAT_writable_page_tables) | (1 << XENFEAT_dom0)) ELFNOTE(Xen, XEN_ELFNOTE_PAE_MODE, .asciz "yes") ELFNOTE(Xen, XEN_ELFNOTE_LOADER, .asciz "generic") ELFNOTE(Xen, XEN_ELFNOTE_L1_MFN_VALID, diff --git a/arch/x86/xen/xen-ops.h b/arch/x86/xen/xen-ops.h index ac0a2b0f9e62..f6a41c41ebc7 100644 --- a/arch/x86/xen/xen-ops.h +++ b/arch/x86/xen/xen-ops.h @@ -146,5 +146,4 @@ __visible void xen_adjust_exception_frame(void); extern int xen_panic_handler_init(void); -void xen_pvh_secondary_vcpu_init(int cpu); #endif /* XEN_OPS_H */ diff --git a/drivers/xen/events/events_base.c b/drivers/xen/events/events_base.c index fd8e872d2943..6a53577772c9 100644 --- a/drivers/xen/events/events_base.c +++ b/drivers/xen/events/events_base.c @@ -1704,7 +1704,6 @@ void __init xen_init_IRQ(void) pirq_eoi_map = (void *)__get_free_page(GFP_KERNEL|__GFP_ZERO); eoi_gmfn.gmfn = virt_to_gfn(pirq_eoi_map); rc = HYPERVISOR_physdev_op(PHYSDEVOP_pirq_eoi_gmfn_v2, &eoi_gmfn); - /* TODO: No PVH support for PIRQ EOI */ if (rc != 0) { free_page((unsigned long) pirq_eoi_map); pirq_eoi_map = NULL; diff --git a/include/xen/xen.h b/include/xen/xen.h index f0f0252cff9a..d0f96840f71f 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -29,17 +29,6 @@ extern enum xen_domain_type xen_domain_type; #define xen_initial_domain() (0) #endif /* CONFIG_XEN_DOM0 */ -#ifdef CONFIG_XEN_PVH -/* This functionality exists only for x86. The XEN_PVHVM support exists - * only in x86 world - hence on ARM it will be always disabled. - * N.B. ARM guests are neither PV nor HVM nor PVHVM. - * It's a bit like PVH but is different also (it's further towards the H - * end of the spectrum than even PVH). - */ -#include -#define xen_pvh_domain() (xen_pv_domain() && \ - xen_feature(XENFEAT_auto_translated_physmap)) -#else #define xen_pvh_domain() (0) -#endif + #endif /* _XEN_XEN_H */ -- cgit v1.2.3 From 7243b93345f7f8de260e8f5b4670803e64fcbb00 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Sun, 5 Feb 2017 19:50:52 -0500 Subject: xen/pvh: Bootstrap PVH guest Start PVH guest at XEN_ELFNOTE_PHYS32_ENTRY address. Setup hypercall page, initialize boot_params, enable early page tables. Since this stub is executed before kernel entry point we cannot use variables in .bss which is cleared by kernel. We explicitly place variables that are initialized here into .data. While adjusting xen_hvm_init_shared_info() make it use cpuid_e?x() instead of cpuid() (wherever possible). Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross --- arch/x86/xen/Kconfig | 2 +- arch/x86/xen/Makefile | 1 + arch/x86/xen/enlighten.c | 124 +++++++++++++++++++++++++++++++++--- arch/x86/xen/xen-pvh.S | 161 +++++++++++++++++++++++++++++++++++++++++++++++ include/xen/xen.h | 5 ++ 5 files changed, 282 insertions(+), 11 deletions(-) create mode 100644 arch/x86/xen/xen-pvh.S (limited to 'arch/x86') diff --git a/arch/x86/xen/Kconfig b/arch/x86/xen/Kconfig index c7b15f3e2cf3..76b6dbd627df 100644 --- a/arch/x86/xen/Kconfig +++ b/arch/x86/xen/Kconfig @@ -53,5 +53,5 @@ config XEN_DEBUG_FS config XEN_PVH bool "Support for running as a PVH guest" - depends on X86_64 && XEN && XEN_PVHVM + depends on XEN && XEN_PVHVM && ACPI def_bool n diff --git a/arch/x86/xen/Makefile b/arch/x86/xen/Makefile index e47e52787d32..cb0164aee156 100644 --- a/arch/x86/xen/Makefile +++ b/arch/x86/xen/Makefile @@ -23,3 +23,4 @@ obj-$(CONFIG_XEN_DEBUG_FS) += debugfs.o obj-$(CONFIG_XEN_DOM0) += vga.o obj-$(CONFIG_SWIOTLB_XEN) += pci-swiotlb-xen.o obj-$(CONFIG_XEN_EFI) += efi.o +obj-$(CONFIG_XEN_PVH) += xen-pvh.o diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 828f1b226f56..d2144f7c8fab 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include @@ -176,6 +177,20 @@ struct tls_descs { */ static DEFINE_PER_CPU(struct tls_descs, shadow_tls_desc); +#ifdef CONFIG_XEN_PVH +/* + * PVH variables. + * + * xen_pvh and pvh_bootparams need to live in data segment since they + * are used after startup_{32|64}, which clear .bss, are invoked. + */ +bool xen_pvh __attribute__((section(".data"))) = 0; +struct boot_params pvh_bootparams __attribute__((section(".data"))); + +struct hvm_start_info pvh_start_info; +unsigned int pvh_start_info_sz = sizeof(pvh_start_info); +#endif + static void clamp_max_cpus(void) { #ifdef CONFIG_SMP @@ -1656,6 +1671,90 @@ asmlinkage __visible void __init xen_start_kernel(void) #endif } +#ifdef CONFIG_XEN_PVH +static void __init init_pvh_bootparams(void) +{ + struct xen_memory_map memmap; + unsigned int i; + int rc; + + memset(&pvh_bootparams, 0, sizeof(pvh_bootparams)); + + memmap.nr_entries = ARRAY_SIZE(pvh_bootparams.e820_map); + set_xen_guest_handle(memmap.buffer, pvh_bootparams.e820_map); + rc = HYPERVISOR_memory_op(XENMEM_memory_map, &memmap); + if (rc) { + xen_raw_printk("XENMEM_memory_map failed (%d)\n", rc); + BUG(); + } + + if (memmap.nr_entries < E820MAX - 1) { + pvh_bootparams.e820_map[memmap.nr_entries].addr = + ISA_START_ADDRESS; + pvh_bootparams.e820_map[memmap.nr_entries].size = + ISA_END_ADDRESS - ISA_START_ADDRESS; + pvh_bootparams.e820_map[memmap.nr_entries].type = + E820_RESERVED; + memmap.nr_entries++; + } else + xen_raw_printk("Warning: Can fit ISA range into e820\n"); + + sanitize_e820_map(pvh_bootparams.e820_map, + ARRAY_SIZE(pvh_bootparams.e820_map), + &memmap.nr_entries); + + pvh_bootparams.e820_entries = memmap.nr_entries; + for (i = 0; i < pvh_bootparams.e820_entries; i++) + e820_add_region(pvh_bootparams.e820_map[i].addr, + pvh_bootparams.e820_map[i].size, + pvh_bootparams.e820_map[i].type); + + pvh_bootparams.hdr.cmd_line_ptr = + pvh_start_info.cmdline_paddr; + + /* The first module is always ramdisk. */ + if (pvh_start_info.nr_modules) { + struct hvm_modlist_entry *modaddr = + __va(pvh_start_info.modlist_paddr); + pvh_bootparams.hdr.ramdisk_image = modaddr->paddr; + pvh_bootparams.hdr.ramdisk_size = modaddr->size; + } + + /* + * See Documentation/x86/boot.txt. + * + * Version 2.12 supports Xen entry point but we will use default x86/PC + * environment (i.e. hardware_subarch 0). + */ + pvh_bootparams.hdr.version = 0x212; + pvh_bootparams.hdr.type_of_loader = (9 << 4) | 0; /* Xen loader */ +} + +/* + * This routine (and those that it might call) should not use + * anything that lives in .bss since that segment will be cleared later. + */ +void __init xen_prepare_pvh(void) +{ + u32 msr; + u64 pfn; + + if (pvh_start_info.magic != XEN_HVM_START_MAGIC_VALUE) { + xen_raw_printk("Error: Unexpected magic value (0x%08x)\n", + pvh_start_info.magic); + BUG(); + } + + xen_pvh = 1; + + msr = cpuid_ebx(xen_cpuid_base() + 2); + pfn = __pa(hypercall_page); + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + + init_pvh_bootparams(); +} +#endif + void __ref xen_hvm_init_shared_info(void) { int cpu; @@ -1695,20 +1794,29 @@ void __ref xen_hvm_init_shared_info(void) static void __init init_hvm_pv_info(void) { int major, minor; - uint32_t eax, ebx, ecx, edx, pages, msr, base; - u64 pfn; + uint32_t eax, ebx, ecx, edx, base; base = xen_cpuid_base(); - cpuid(base + 1, &eax, &ebx, &ecx, &edx); + eax = cpuid_eax(base + 1); major = eax >> 16; minor = eax & 0xffff; printk(KERN_INFO "Xen version %d.%d.\n", major, minor); - cpuid(base + 2, &pages, &msr, &ecx, &edx); + xen_domain_type = XEN_HVM_DOMAIN; - pfn = __pa(hypercall_page); - wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + /* PVH set up hypercall page in xen_prepare_pvh(). */ + if (xen_pvh_domain()) + pv_info.name = "Xen PVH"; + else { + u64 pfn; + uint32_t msr; + + pv_info.name = "Xen HVM"; + msr = cpuid_ebx(base + 2); + pfn = __pa(hypercall_page); + wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); + } xen_setup_features(); @@ -1717,10 +1825,6 @@ static void __init init_hvm_pv_info(void) this_cpu_write(xen_vcpu_id, ebx); else this_cpu_write(xen_vcpu_id, smp_processor_id()); - - pv_info.name = "Xen HVM"; - - xen_domain_type = XEN_HVM_DOMAIN; } #endif diff --git a/arch/x86/xen/xen-pvh.S b/arch/x86/xen/xen-pvh.S new file mode 100644 index 000000000000..5e246716d58f --- /dev/null +++ b/arch/x86/xen/xen-pvh.S @@ -0,0 +1,161 @@ +/* + * Copyright C 2016, Oracle and/or its affiliates. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program. If not, see . + */ + + .code32 + .text +#define _pa(x) ((x) - __START_KERNEL_map) + +#include +#include +#include +#include +#include +#include +#include +#include +#include + + __HEAD + +/* + * Entry point for PVH guests. + * + * Xen ABI specifies the following register state when we come here: + * + * - `ebx`: contains the physical memory address where the loader has placed + * the boot start info structure. + * - `cr0`: bit 0 (PE) must be set. All the other writeable bits are cleared. + * - `cr4`: all bits are cleared. + * - `cs `: must be a 32-bit read/execute code segment with a base of ‘0’ + * and a limit of ‘0xFFFFFFFF’. The selector value is unspecified. + * - `ds`, `es`: must be a 32-bit read/write data segment with a base of + * ‘0’ and a limit of ‘0xFFFFFFFF’. The selector values are all + * unspecified. + * - `tr`: must be a 32-bit TSS (active) with a base of '0' and a limit + * of '0x67'. + * - `eflags`: bit 17 (VM) must be cleared. Bit 9 (IF) must be cleared. + * Bit 8 (TF) must be cleared. Other bits are all unspecified. + * + * All other processor registers and flag bits are unspecified. The OS is in + * charge of setting up it's own stack, GDT and IDT. + */ + +ENTRY(pvh_start_xen) + cld + + lgdt (_pa(gdt)) + + mov $(__BOOT_DS),%eax + mov %eax,%ds + mov %eax,%es + mov %eax,%ss + + /* Stash hvm_start_info. */ + mov $_pa(pvh_start_info), %edi + mov %ebx, %esi + mov _pa(pvh_start_info_sz), %ecx + shr $2,%ecx + rep + movsl + + mov $_pa(early_stack_end), %esp + + /* Enable PAE mode. */ + mov %cr4, %eax + orl $X86_CR4_PAE, %eax + mov %eax, %cr4 + +#ifdef CONFIG_X86_64 + /* Enable Long mode. */ + mov $MSR_EFER, %ecx + rdmsr + btsl $_EFER_LME, %eax + wrmsr + + /* Enable pre-constructed page tables. */ + mov $_pa(init_level4_pgt), %eax + mov %eax, %cr3 + mov $(X86_CR0_PG | X86_CR0_PE), %eax + mov %eax, %cr0 + + /* Jump to 64-bit mode. */ + ljmp $__KERNEL_CS, $_pa(1f) + + /* 64-bit entry point. */ + .code64 +1: + call xen_prepare_pvh + + /* startup_64 expects boot_params in %rsi. */ + mov $_pa(pvh_bootparams), %rsi + mov $_pa(startup_64), %rax + jmp *%rax + +#else /* CONFIG_X86_64 */ + + call mk_early_pgtbl_32 + + mov $_pa(initial_page_table), %eax + mov %eax, %cr3 + + mov %cr0, %eax + or $(X86_CR0_PG | X86_CR0_PE), %eax + mov %eax, %cr0 + + ljmp $__BOOT_CS, $1f +1: + call xen_prepare_pvh + mov $_pa(pvh_bootparams), %esi + + /* startup_32 doesn't expect paging and PAE to be on. */ + ljmp $__BOOT_CS, $_pa(2f) +2: + mov %cr0, %eax + and $~X86_CR0_PG, %eax + mov %eax, %cr0 + mov %cr4, %eax + and $~X86_CR4_PAE, %eax + mov %eax, %cr4 + + ljmp $__BOOT_CS, $_pa(startup_32) +#endif +END(pvh_start_xen) + + .section ".init.data","aw" + .balign 8 +gdt: + .word gdt_end - gdt_start + .long _pa(gdt_start) + .word 0 +gdt_start: + .quad 0x0000000000000000 /* NULL descriptor */ + .quad 0x0000000000000000 /* reserved */ +#ifdef CONFIG_X86_64 + .quad GDT_ENTRY(0xa09a, 0, 0xfffff) /* __KERNEL_CS */ +#else + .quad GDT_ENTRY(0xc09a, 0, 0xfffff) /* __KERNEL_CS */ +#endif + .quad GDT_ENTRY(0xc092, 0, 0xfffff) /* __KERNEL_DS */ +gdt_end: + + .balign 4 +early_stack: + .fill 256, 1, 0 +early_stack_end: + + ELFNOTE(Xen, XEN_ELFNOTE_PHYS32_ENTRY, + _ASM_PTR (pvh_start_xen - __START_KERNEL_map)) diff --git a/include/xen/xen.h b/include/xen/xen.h index d0f96840f71f..6e8b7fc79801 100644 --- a/include/xen/xen.h +++ b/include/xen/xen.h @@ -29,6 +29,11 @@ extern enum xen_domain_type xen_domain_type; #define xen_initial_domain() (0) #endif /* CONFIG_XEN_DOM0 */ +#ifdef CONFIG_XEN_PVH +extern bool xen_pvh; +#define xen_pvh_domain() (xen_hvm_domain() && xen_pvh) +#else #define xen_pvh_domain() (0) +#endif #endif /* _XEN_XEN_H */ -- cgit v1.2.3 From 5adad168e586cb381633f45d181bb729b04393a5 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Sun, 5 Feb 2017 19:50:58 -0500 Subject: xen/pvh: Make sure we don't use ACPI_IRQ_MODEL_PIC for SCI Since we are not using PIC and (at least currently) don't have IOAPIC we want to make sure that acpi_irq_model doesn't stay set to ACPI_IRQ_MODEL_PIC (which is the default value). If we allowed it to stay then acpi_os_install_interrupt_handler() would try (and fail) to request_irq() for PIC. Instead we set the model to ACPI_IRQ_MODEL_PLATFORM which will prevent this from happening. Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross --- arch/x86/xen/enlighten.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index d2144f7c8fab..6d406f3465bc 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1672,6 +1672,16 @@ asmlinkage __visible void __init xen_start_kernel(void) } #ifdef CONFIG_XEN_PVH + +static void xen_pvh_arch_setup(void) +{ +#ifdef CONFIG_ACPI + /* Make sure we don't fall back to (default) ACPI_IRQ_MODEL_PIC. */ + if (nr_ioapics == 0) + acpi_irq_model = ACPI_IRQ_MODEL_PLATFORM; +#endif +} + static void __init init_pvh_bootparams(void) { struct xen_memory_map memmap; @@ -1752,6 +1762,8 @@ void __init xen_prepare_pvh(void) wrmsr_safe(msr, (u32)pfn, (u32)(pfn >> 32)); init_pvh_bootparams(); + + x86_init.oem.arch_setup = xen_pvh_arch_setup; } #endif -- cgit v1.2.3 From bcc57df281d93dfa502c824e9f73e0191c3f7c34 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 6 Feb 2017 10:57:15 -0500 Subject: xen/pvh: PVH guests always have PV devices Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross Reviewed-by: Konrad Rzeszutek Wilk --- arch/x86/xen/platform-pci-unplug.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/xen/platform-pci-unplug.c b/arch/x86/xen/platform-pci-unplug.c index 90d1b83cf35f..33a783c77d96 100644 --- a/arch/x86/xen/platform-pci-unplug.c +++ b/arch/x86/xen/platform-pci-unplug.c @@ -73,8 +73,8 @@ bool xen_has_pv_devices(void) if (!xen_domain()) return false; - /* PV domains always have them. */ - if (xen_pv_domain()) + /* PV and PVH domains always have them. */ + if (xen_pv_domain() || xen_pvh_domain()) return true; /* And user has xen_platform_pci=0 set in guest config as -- cgit v1.2.3 From 7a1c44ebc5ac2e2c28d95b0da6060728c334e7e4 Mon Sep 17 00:00:00 2001 From: Boris Ostrovsky Date: Mon, 6 Feb 2017 10:58:06 -0500 Subject: xen/pvh: Use Xen's emergency_restart op for PVH guests Using native_machine_emergency_restart (called during reboot) will lead PVH guests to machine_real_restart() where we try to use real_mode_header which is not initialized. Signed-off-by: Boris Ostrovsky Reviewed-by: Juergen Gross --- arch/x86/xen/enlighten.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/xen/enlighten.c b/arch/x86/xen/enlighten.c index 6d406f3465bc..ec1d5c46e58f 100644 --- a/arch/x86/xen/enlighten.c +++ b/arch/x86/xen/enlighten.c @@ -1924,6 +1924,9 @@ static void __init xen_hvm_guest_init(void) x86_init.irqs.intr_init = xen_init_IRQ; xen_hvm_init_time_ops(); xen_hvm_init_mmu_ops(); + + if (xen_pvh_domain()) + machine_ops.emergency_restart = xen_emergency_restart; #ifdef CONFIG_KEXEC_CORE machine_ops.shutdown = xen_hvm_shutdown; machine_ops.crash_shutdown = xen_hvm_crash_shutdown; -- cgit v1.2.3 From 42cf014d38d8822cce63703a467e00f65d000952 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 25 Jan 2017 11:58:57 +0100 Subject: KVM: nVMX: kmap() can't fail kmap() can't fail, therefore it will always return a valid pointer. Let's just get rid of the unnecessary checks. Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 9 --------- 1 file changed, 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d850d5d36182..693e4203b666 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4973,10 +4973,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) return 0; vapic_page = kmap(vmx->nested.virtual_apic_page); - if (!vapic_page) { - WARN_ON(1); - return -ENOMEM; - } __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); kunmap(vmx->nested.virtual_apic_page); @@ -9738,11 +9734,6 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, return false; } msr_bitmap_l1 = (unsigned long *)kmap(page); - if (!msr_bitmap_l1) { - nested_release_page_clean(page); - WARN_ON(1); - return false; - } memset(msr_bitmap_l0, 0xff, PAGE_SIZE); -- cgit v1.2.3 From 6342c50ad12e8ce0736e722184a7dbdea4a3477f Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Wed, 25 Jan 2017 11:58:58 +0100 Subject: KVM: nVMX: vmx_complete_nested_posted_interrupt() can't fail vmx_complete_nested_posted_interrupt() can't fail, let's turn it into a void function. Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 693e4203b666..7c3e42623090 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4953,7 +4953,7 @@ static bool vmx_get_enable_apicv(void) return enable_apicv; } -static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) +static void vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); int max_irr; @@ -4964,13 +4964,13 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmx->nested.pi_pending) { vmx->nested.pi_pending = false; if (!pi_test_and_clear_on(vmx->nested.pi_desc)) - return 0; + return; max_irr = find_last_bit( (unsigned long *)vmx->nested.pi_desc->pir, 256); if (max_irr == 256) - return 0; + return; vapic_page = kmap(vmx->nested.virtual_apic_page); __kvm_apic_update_irr(vmx->nested.pi_desc->pir, vapic_page); @@ -4983,7 +4983,6 @@ static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu) vmcs_write16(GUEST_INTR_STATUS, status); } } - return 0; } static inline bool kvm_vcpu_trigger_posted_interrupt(struct kvm_vcpu *vcpu) @@ -10695,7 +10694,8 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) return 0; } - return vmx_complete_nested_posted_interrupt(vcpu); + vmx_complete_nested_posted_interrupt(vcpu); + return 0; } static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 55dd00a73a518281bc846dc5de1a718349431eb2 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 24 Jan 2017 15:09:39 -0200 Subject: KVM: x86: add KVM_HC_CLOCK_PAIRING hypercall Add a hypercall to retrieve the host realtime clock and the TSC value used to calculate that clock read. Used to implement clock synchronization between host and guest. Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/hypercalls.txt | 35 +++++++++++++++++ arch/x86/include/uapi/asm/kvm_para.h | 9 +++++ arch/x86/kvm/x86.c | 66 ++++++++++++++++++++++++++++++++ include/uapi/linux/kvm_para.h | 2 + 4 files changed, 112 insertions(+) (limited to 'arch/x86') diff --git a/Documentation/virtual/kvm/hypercalls.txt b/Documentation/virtual/kvm/hypercalls.txt index c8d040e27046..feaaa634f154 100644 --- a/Documentation/virtual/kvm/hypercalls.txt +++ b/Documentation/virtual/kvm/hypercalls.txt @@ -81,3 +81,38 @@ the vcpu to sleep until occurrence of an appropriate event. Another vcpu of the same guest can wakeup the sleeping vcpu by issuing KVM_HC_KICK_CPU hypercall, specifying APIC ID (a1) of the vcpu to be woken up. An additional argument (a0) is used in the hypercall for future use. + + +6. KVM_HC_CLOCK_PAIRING +------------------------ +Architecture: x86 +Status: active +Purpose: Hypercall used to synchronize host and guest clocks. +Usage: + +a0: guest physical address where host copies +"struct kvm_clock_offset" structure. + +a1: clock_type, ATM only KVM_CLOCK_PAIRING_WALLCLOCK (0) +is supported (corresponding to the host's CLOCK_REALTIME clock). + + struct kvm_clock_pairing { + __s64 sec; + __s64 nsec; + __u64 tsc; + __u32 flags; + __u32 pad[9]; + }; + + Where: + * sec: seconds from clock_type clock. + * nsec: nanoseconds from clock_type clock. + * tsc: guest TSC value used to calculate sec/nsec pair + * flags: flags, unused (0) at the moment. + +The hypercall lets a guest compute a precise timestamp across +host and guest. The guest can use the returned TSC value to +compute the CLOCK_REALTIME for its clock, at the same instant. + +Returns KVM_EOPNOTSUPP if the host does not use TSC clocksource, +or if clock type is different than KVM_CLOCK_PAIRING_WALLCLOCK. diff --git a/arch/x86/include/uapi/asm/kvm_para.h b/arch/x86/include/uapi/asm/kvm_para.h index 1421a6585126..cff0bb6556f8 100644 --- a/arch/x86/include/uapi/asm/kvm_para.h +++ b/arch/x86/include/uapi/asm/kvm_para.h @@ -50,6 +50,15 @@ struct kvm_steal_time { __u32 pad[11]; }; +#define KVM_CLOCK_PAIRING_WALLCLOCK 0 +struct kvm_clock_pairing { + __s64 sec; + __s64 nsec; + __u64 tsc; + __u32 flags; + __u32 pad[9]; +}; + #define KVM_STEAL_ALIGNMENT_BITS 5 #define KVM_STEAL_VALID_BITS ((-1ULL << (KVM_STEAL_ALIGNMENT_BITS + 1))) #define KVM_STEAL_RESERVED_MASK (((1 << KVM_STEAL_ALIGNMENT_BITS) - 1 ) << 1) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 4fd4d4f35caf..09e5d31dac98 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1142,6 +1142,7 @@ struct pvclock_gtod_data { u64 boot_ns; u64 nsec_base; + u64 wall_time_sec; }; static struct pvclock_gtod_data pvclock_gtod_data; @@ -1165,6 +1166,8 @@ static void update_pvclock_gtod(struct timekeeper *tk) vdata->boot_ns = boot_ns; vdata->nsec_base = tk->tkr_mono.xtime_nsec; + vdata->wall_time_sec = tk->xtime_sec; + write_seqcount_end(&vdata->seq); } #endif @@ -1626,6 +1629,28 @@ static int do_monotonic_boot(s64 *t, u64 *cycle_now) return mode; } +static int do_realtime(struct timespec *ts, u64 *cycle_now) +{ + struct pvclock_gtod_data *gtod = &pvclock_gtod_data; + unsigned long seq; + int mode; + u64 ns; + + do { + seq = read_seqcount_begin(>od->seq); + mode = gtod->clock.vclock_mode; + ts->tv_sec = gtod->wall_time_sec; + ns = gtod->nsec_base; + ns += vgettsc(cycle_now); + ns >>= gtod->clock.shift; + } while (unlikely(read_seqcount_retry(>od->seq, seq))); + + ts->tv_sec += __iter_div_u64_rem(ns, NSEC_PER_SEC, &ns); + ts->tv_nsec = ns; + + return mode; +} + /* returns true if host is using tsc clocksource */ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now) { @@ -1635,6 +1660,17 @@ static bool kvm_get_time_and_clockread(s64 *kernel_ns, u64 *cycle_now) return do_monotonic_boot(kernel_ns, cycle_now) == VCLOCK_TSC; } + +/* returns true if host is using tsc clocksource */ +static bool kvm_get_walltime_and_clockread(struct timespec *ts, + u64 *cycle_now) +{ + /* checked again under seqlock below */ + if (pvclock_gtod_data.clock.vclock_mode != VCLOCK_TSC) + return false; + + return do_realtime(ts, cycle_now) == VCLOCK_TSC; +} #endif /* @@ -6112,6 +6148,33 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_halt); +static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, + unsigned long clock_type) +{ + struct kvm_clock_pairing clock_pairing; + struct timespec ts; + cycle_t cycle; + int ret; + + if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK) + return -KVM_EOPNOTSUPP; + + if (kvm_get_walltime_and_clockread(&ts, &cycle) == false) + return -KVM_EOPNOTSUPP; + + clock_pairing.sec = ts.tv_sec; + clock_pairing.nsec = ts.tv_nsec; + clock_pairing.tsc = kvm_read_l1_tsc(vcpu, cycle); + clock_pairing.flags = 0; + + ret = 0; + if (kvm_write_guest(vcpu->kvm, paddr, &clock_pairing, + sizeof(struct kvm_clock_pairing))) + ret = -KVM_EFAULT; + + return ret; +} + /* * kvm_pv_kick_cpu_op: Kick a vcpu. * @@ -6176,6 +6239,9 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); ret = 0; break; + case KVM_HC_CLOCK_PAIRING: + ret = kvm_pv_clock_pairing(vcpu, a0, a1); + break; default: ret = -KVM_ENOSYS; break; diff --git a/include/uapi/linux/kvm_para.h b/include/uapi/linux/kvm_para.h index bf6cd7d5cac2..fed506aeff62 100644 --- a/include/uapi/linux/kvm_para.h +++ b/include/uapi/linux/kvm_para.h @@ -14,6 +14,7 @@ #define KVM_EFAULT EFAULT #define KVM_E2BIG E2BIG #define KVM_EPERM EPERM +#define KVM_EOPNOTSUPP 95 #define KVM_HC_VAPIC_POLL_IRQ 1 #define KVM_HC_MMU_OP 2 @@ -23,6 +24,7 @@ #define KVM_HC_MIPS_GET_CLOCK_FREQ 6 #define KVM_HC_MIPS_EXIT_VM 7 #define KVM_HC_MIPS_CONSOLE_OUTPUT 8 +#define KVM_HC_CLOCK_PAIRING 9 /* * hypercalls use architecture specific -- cgit v1.2.3 From ad21fc4faa2a1f919bac1073b885df9310dbc581 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 6 Feb 2017 16:31:57 -0800 Subject: arch: Move CONFIG_DEBUG_RODATA and CONFIG_SET_MODULE_RONX to be common There are multiple architectures that support CONFIG_DEBUG_RODATA and CONFIG_SET_MODULE_RONX. These options also now have the ability to be turned off at runtime. Move these to an architecture independent location and make these options def_bool y for almost all of those arches. Signed-off-by: Laura Abbott Acked-by: Ingo Molnar Acked-by: Heiko Carstens Signed-off-by: Kees Cook --- Documentation/security/self-protection.txt | 6 ++++++ arch/Kconfig | 34 ++++++++++++++++++++++++++++++ arch/arm/Kconfig | 4 ++++ arch/arm/Kconfig.debug | 11 ---------- arch/arm/mm/Kconfig | 12 ----------- arch/arm64/Kconfig | 5 ++--- arch/arm64/Kconfig.debug | 11 ---------- arch/parisc/Kconfig | 1 + arch/parisc/Kconfig.debug | 11 ---------- arch/s390/Kconfig | 5 ++--- arch/s390/Kconfig.debug | 3 --- arch/x86/Kconfig | 5 ++--- arch/x86/Kconfig.debug | 11 ---------- 13 files changed, 51 insertions(+), 68 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/security/self-protection.txt b/Documentation/security/self-protection.txt index 3010576c9fca..f41dd00e8b98 100644 --- a/Documentation/security/self-protection.txt +++ b/Documentation/security/self-protection.txt @@ -56,6 +56,12 @@ CONFIG_DEBUG_SET_MODULE_RONX, which seek to make sure that code is not writable, data is not executable, and read-only data is neither writable nor executable. +Most architectures have these options on by default and not user selectable. +For some architectures like arm that wish to have these be selectable, +the architecture Kconfig can select ARCH_OPTIONAL_KERNEL_RWX to enable +a Kconfig prompt. CONFIG_ARCH_OPTIONAL_KERNEL_RWX_DEFAULT determines +the default setting when ARCH_OPTIONAL_KERNEL_RWX is enabled. + #### Function pointers and sensitive variables must not be writable Vast areas of kernel memory contain function pointers that are looked diff --git a/arch/Kconfig b/arch/Kconfig index 99839c23d453..3f8b8be3036f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -781,4 +781,38 @@ config VMAP_STACK the stack to map directly to the KASAN shadow map using a formula that is incorrect if the stack is in vmalloc space. +config ARCH_OPTIONAL_KERNEL_RWX + def_bool n + +config ARCH_OPTIONAL_KERNEL_RWX_DEFAULT + def_bool n + +config ARCH_HAS_STRICT_KERNEL_RWX + def_bool n + +config DEBUG_RODATA + bool "Make kernel text and rodata read-only" if ARCH_OPTIONAL_KERNEL_RWX + depends on ARCH_HAS_STRICT_KERNEL_RWX + default !ARCH_OPTIONAL_KERNEL_RWX || ARCH_OPTIONAL_KERNEL_RWX_DEFAULT + help + If this is set, kernel text and rodata memory will be made read-only, + and non-text memory will be made non-executable. This provides + protection against certain security exploits (e.g. executing the heap + or modifying text) + + These features are considered standard security practice these days. + You should say Y here in almost all cases. + +config ARCH_HAS_STRICT_MODULE_RWX + def_bool n + +config DEBUG_SET_MODULE_RONX + bool "Set loadable kernel module data as NX and text as RO" if ARCH_OPTIONAL_KERNEL_RWX + depends on ARCH_HAS_STRICT_MODULE_RWX && MODULES + default !ARCH_OPTIONAL_KERNEL_RWX || ARCH_OPTIONAL_KERNEL_RWX_DEFAULT + help + If this is set, module text and rodata memory will be made read-only, + and non-text memory will be made non-executable. This provides + protection against certain security exploits (e.g. writing to text) + source "kernel/gcov/Kconfig" diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 5fab553fd03a..8c88c8ad064b 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -4,10 +4,14 @@ config ARM select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_STRICT_KERNEL_RWX if MMU && !XIP_KERNEL + select ARCH_HAS_STRICT_MODULE_RWX if MMU select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_MIGHT_HAVE_PC_PARPORT + select ARCH_OPTIONAL_KERNEL_RWX if ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_OPTIONAL_KERNEL_RWX_DEFAULT if CPU_V7 select ARCH_SUPPORTS_ATOMIC_RMW select ARCH_USE_BUILTIN_BSWAP select ARCH_USE_CMPXCHG_LOCKREF diff --git a/arch/arm/Kconfig.debug b/arch/arm/Kconfig.debug index d83f7c369e51..426d2716f55d 100644 --- a/arch/arm/Kconfig.debug +++ b/arch/arm/Kconfig.debug @@ -1738,17 +1738,6 @@ config PID_IN_CONTEXTIDR additional instructions during context switch. Say Y here only if you are planning to use hardware trace tools with this kernel. -config DEBUG_SET_MODULE_RONX - bool "Set loadable kernel module data as NX and text as RO" - depends on MODULES && MMU - ---help--- - This option helps catch unintended modifications to loadable - kernel module's text and read-only data. It also prevents execution - of module data. Such protection may interfere with run-time code - patching and dynamic kernel tracing - and they might also protect - against certain classes of kernel exploits. - If in doubt, say "N". - source "drivers/hwtracing/coresight/Kconfig" endmenu diff --git a/arch/arm/mm/Kconfig b/arch/arm/mm/Kconfig index f68e8ec29447..419a0355d4e4 100644 --- a/arch/arm/mm/Kconfig +++ b/arch/arm/mm/Kconfig @@ -1051,18 +1051,6 @@ config ARCH_SUPPORTS_BIG_ENDIAN This option specifies the architecture can support big endian operation. -config DEBUG_RODATA - bool "Make kernel text and rodata read-only" - depends on MMU && !XIP_KERNEL - default y if CPU_V7 - help - If this is set, kernel text and rodata memory will be made - read-only, and non-text kernel memory will be made non-executable. - The tradeoff is that each region is padded to section-size (1MiB) - boundaries (because their permissions are different and splitting - the 1M pages into 4K ones causes TLB performance problems), which - can waste memory. - config DEBUG_ALIGN_RODATA bool "Make rodata strictly non-executable" depends on DEBUG_RODATA diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 111742126897..e1efbcc9de32 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -13,6 +13,8 @@ config ARM64 select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_SG_CHAIN + select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF select ARCH_SUPPORTS_ATOMIC_RMW @@ -123,9 +125,6 @@ config ARCH_PHYS_ADDR_T_64BIT config MMU def_bool y -config DEBUG_RODATA - def_bool y - config ARM64_PAGE_SHIFT int default 16 if ARM64_64K_PAGES diff --git a/arch/arm64/Kconfig.debug b/arch/arm64/Kconfig.debug index d1ebd46872fd..939815e8d695 100644 --- a/arch/arm64/Kconfig.debug +++ b/arch/arm64/Kconfig.debug @@ -71,17 +71,6 @@ config DEBUG_WX If in doubt, say "Y". -config DEBUG_SET_MODULE_RONX - bool "Set loadable kernel module data as NX and text as RO" - depends on MODULES - default y - help - Is this is set, kernel module text and rodata will be made read-only. - This is to help catch accidental or malicious attempts to change the - kernel's executable code. - - If in doubt, say Y. - config DEBUG_ALIGN_RODATA depends on DEBUG_RODATA bool "Align linker sections up to SECTION_SIZE" diff --git a/arch/parisc/Kconfig b/arch/parisc/Kconfig index 3a71f38cdc05..ad294b3fb90b 100644 --- a/arch/parisc/Kconfig +++ b/arch/parisc/Kconfig @@ -8,6 +8,7 @@ config PARISC select HAVE_SYSCALL_TRACEPOINTS select ARCH_WANT_FRAME_POINTERS select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_STRICT_KERNEL_RWX select RTC_CLASS select RTC_DRV_GENERIC select INIT_ALL_POSSIBLE diff --git a/arch/parisc/Kconfig.debug b/arch/parisc/Kconfig.debug index 68b7cbd0810a..0d856b94c9b1 100644 --- a/arch/parisc/Kconfig.debug +++ b/arch/parisc/Kconfig.debug @@ -5,15 +5,4 @@ source "lib/Kconfig.debug" config TRACE_IRQFLAGS_SUPPORT def_bool y -config DEBUG_RODATA - bool "Write protect kernel read-only data structures" - depends on DEBUG_KERNEL - default y - help - Mark the kernel read-only data as write-protected in the pagetables, - in order to catch accidental (and incorrect) writes to such const - data. This option may have a slight performance impact because a - portion of the kernel code won't be covered by a TLB anymore. - If in doubt, say "N". - endmenu diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c6722112527d..53bb0e3e0db3 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -62,9 +62,6 @@ config PCI_QUIRKS config ARCH_SUPPORTS_UPROBES def_bool y -config DEBUG_RODATA - def_bool y - config S390 def_bool y select ARCH_HAS_DEVMEM_IS_ALLOWED @@ -73,6 +70,8 @@ config S390 select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV select ARCH_HAS_SG_CHAIN + select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_INLINE_READ_LOCK diff --git a/arch/s390/Kconfig.debug b/arch/s390/Kconfig.debug index 26c5d5beb4be..57f8ea9c49e3 100644 --- a/arch/s390/Kconfig.debug +++ b/arch/s390/Kconfig.debug @@ -17,7 +17,4 @@ config S390_PTDUMP kernel. If in doubt, say "N" -config DEBUG_SET_MODULE_RONX - def_bool y - depends on MODULES endmenu diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e487493bbd47..13e1bf4b0fe5 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -54,6 +54,8 @@ config X86 select ARCH_HAS_MMIO_FLUSH select ARCH_HAS_PMEM_API if X86_64 select ARCH_HAS_SG_CHAIN + select ARCH_HAS_STRICT_KERNEL_RWX + select ARCH_HAS_STRICT_MODULE_RWX select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI @@ -309,9 +311,6 @@ config ARCH_SUPPORTS_UPROBES config FIX_EARLYCON_MEM def_bool y -config DEBUG_RODATA - def_bool y - config PGTABLE_LEVELS int default 4 if X86_64 diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index 67eec55093a5..69cdd0b2176b 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -109,17 +109,6 @@ config DEBUG_WX If in doubt, say "Y". -config DEBUG_SET_MODULE_RONX - bool "Set loadable kernel module data as NX and text as RO" - depends on MODULES - ---help--- - This option helps catch unintended modifications to loadable - kernel module's text and read-only data. It also prevents execution - of module data. Such protection may interfere with run-time code - patching and dynamic kernel tracing - and they might also protect - against certain classes of kernel exploits. - If in doubt, say "N". - config DEBUG_NX_TEST tristate "Testcase for the NX non-executable stack feature" depends on DEBUG_KERNEL && m -- cgit v1.2.3 From 80fbd89cbd07287a7013006c14ddec923b7a4ff6 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Feb 2017 10:57:24 +0100 Subject: KVM: x86: fix compilation Fix rebase breakage from commit 55dd00a73a51 ("KVM: x86: add KVM_HC_CLOCK_PAIRING hypercall", 2017-01-24), courtesy of the "I could have sworn I had pushed the right branch" department. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 09e5d31dac98..96dd7dd13ee6 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6153,7 +6153,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, { struct kvm_clock_pairing clock_pairing; struct timespec ts; - cycle_t cycle; + u64 cycle; int ret; if (clock_type != KVM_CLOCK_PAIRING_WALLCLOCK) -- cgit v1.2.3 From f4066c2bc4d0de4e5dcbff21dae41e89fe8f38c0 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Tue, 24 Jan 2017 15:09:41 -0200 Subject: kvmclock: export kvmclock clocksource and data pointers To be used by KVM PTP driver. Signed-off-by: Marcelo Tosatti Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvmclock.h | 6 ++++++ arch/x86/kernel/kvmclock.c | 5 ++++- 2 files changed, 10 insertions(+), 1 deletion(-) create mode 100644 arch/x86/include/asm/kvmclock.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvmclock.h b/arch/x86/include/asm/kvmclock.h new file mode 100644 index 000000000000..f260bef63591 --- /dev/null +++ b/arch/x86/include/asm/kvmclock.h @@ -0,0 +1,6 @@ +#ifndef _ASM_X86_KVM_CLOCK_H +#define _ASM_X86_KVM_CLOCK_H + +extern struct clocksource kvm_clock; + +#endif /* _ASM_X86_KVM_CLOCK_H */ diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index 2a5cafdf8808..995fa260a6da 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -28,6 +28,7 @@ #include #include +#include static int kvmclock __ro_after_init = 1; static int msr_kvm_system_time = MSR_KVM_SYSTEM_TIME; @@ -49,6 +50,7 @@ struct pvclock_vsyscall_time_info *pvclock_pvti_cpu0_va(void) { return hv_clock; } +EXPORT_SYMBOL_GPL(pvclock_pvti_cpu0_va); /* * The wallclock is the time of day when we booted. Since then, some time may @@ -174,13 +176,14 @@ bool kvm_check_and_clear_guest_paused(void) return ret; } -static struct clocksource kvm_clock = { +struct clocksource kvm_clock = { .name = "kvm-clock", .read = kvm_clock_get_cycles, .rating = 400, .mask = CLOCKSOURCE_MASK(64), .flags = CLOCK_SOURCE_IS_CONTINUOUS, }; +EXPORT_SYMBOL_GPL(kvm_clock); int kvm_register_clock(char *txt) { -- cgit v1.2.3 From d966564fcdc19e13eb6ba1fbe6b8101070339c3d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 8 Feb 2017 18:08:29 -0800 Subject: Revert "x86/ioapic: Restore IO-APIC irq_chip retrigger callback" This reverts commit 020eb3daaba2857b32c4cf4c82f503d6a00a67de. Gabriel C reports that it causes his machine to not boot, and we haven't tracked down the reason for it yet. Since the bug it fixes has been around for a longish time, we're better off reverting the fix for now. Gabriel says: "It hangs early and freezes with a lot RCU warnings. I bisected it down to : > Ruslan Ruslichenko (1): > x86/ioapic: Restore IO-APIC irq_chip retrigger callback Reverting this one fixes the problem for me.. The box is a PRIMERGY TX200 S5 , 2 socket , 2 x E5520 CPU(s) installed" and Ruslan and Thomas are currently stumped. Reported-and-bisected-by: Gabriel C Cc: Ruslan Ruslichenko Cc: Thomas Gleixner Cc: stable@kernel.org # for the backport of the original commit Signed-off-by: Linus Torvalds --- arch/x86/kernel/apic/io_apic.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/io_apic.c b/arch/x86/kernel/apic/io_apic.c index 52f352b063fd..bd6b8c270c24 100644 --- a/arch/x86/kernel/apic/io_apic.c +++ b/arch/x86/kernel/apic/io_apic.c @@ -1875,7 +1875,6 @@ static struct irq_chip ioapic_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ack_level, .irq_set_affinity = ioapic_set_affinity, - .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; @@ -1887,7 +1886,6 @@ static struct irq_chip ioapic_ir_chip __read_mostly = { .irq_ack = irq_chip_ack_parent, .irq_eoi = ioapic_ir_ack_level, .irq_set_affinity = ioapic_set_affinity, - .irq_retrigger = irq_chip_retrigger_hierarchy, .flags = IRQCHIP_SKIP_SET_WAKE, }; -- cgit v1.2.3 From 8ef81a9a453f9048c1683e40b540a4221986a2d1 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 9 Feb 2017 16:10:42 +0100 Subject: KVM: x86: hide KVM_HC_CLOCK_PAIRING on 32 bit The newly added hypercall doesn't work on x86-32: arch/x86/kvm/x86.c: In function 'kvm_pv_clock_pairing': arch/x86/kvm/x86.c:6163:6: error: implicit declaration of function 'kvm_get_walltime_and_clockread';did you mean 'kvm_get_time_scale'? [-Werror=implicit-function-declaration] This adds an #ifdef around it, matching the one around the related functions that are also only implemented on 64-bit systems. Fixes: 55dd00a73a51 ("KVM: x86: add KVM_HC_CLOCK_PAIRING hypercall") Signed-off-by: Arnd Bergmann Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 500008f800dc..2f64e5d0ae53 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6148,6 +6148,7 @@ int kvm_emulate_halt(struct kvm_vcpu *vcpu) } EXPORT_SYMBOL_GPL(kvm_emulate_halt); +#ifdef CONFIG_X86_64 static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, unsigned long clock_type) { @@ -6174,6 +6175,7 @@ static int kvm_pv_clock_pairing(struct kvm_vcpu *vcpu, gpa_t paddr, return ret; } +#endif /* * kvm_pv_kick_cpu_op: Kick a vcpu. @@ -6239,9 +6241,11 @@ int kvm_emulate_hypercall(struct kvm_vcpu *vcpu) kvm_pv_kick_cpu_op(vcpu->kvm, a0, a1); ret = 0; break; +#ifdef CONFIG_X86_64 case KVM_HC_CLOCK_PAIRING: ret = kvm_pv_clock_pairing(vcpu, a0, a1); break; +#endif default: ret = -KVM_ENOSYS; break; -- cgit v1.2.3 From f2e04214ef7f7e49d1e06109ad1b2718155dab25 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Feb 2017 16:08:41 +0100 Subject: x86/tsc: Avoid the large time jump when sanitizing TSC ADJUST Olof reported that on a machine which has a BIOS wreckaged TSC the timestamps in dmesg are making a large jump because the TSC value is jumping forward after resetting the TSC ADJUST register to a sane value. This can be avoided by calling the TSC ADJUST saniziting function before initializing the per cpu sched clock machinery. That takes the offset into account and avoid the time jump. What cannot be avoided is that the 'Firmware Bug' warnings on the secondary CPUs are printed with the large time offsets because it would be too much effort and ugly hackery to print those warnings into a buffer and emit them after the adjustemt on the starting CPUs. It's a firmware bug and should be fixed in firmware. The weird timestamps are collateral damage and just illustrate the sillyness of the BIOS folks: [ 0.397445] smp: Bringing up secondary CPUs ... [ 0.402100] x86: Booting SMP configuration: [ 0.406343] .... node #0, CPUs: #1 [1265776479.930667] [Firmware Bug]: TSC ADJUST differs: Reference CPU0: -2978888639075328 CPU1: -2978888639183101 [1265776479.944664] TSC ADJUST synchronize: Reference CPU0: 0 CPU1: -2978888639183101 [ 0.508119] #2 [1265776480.032346] [Firmware Bug]: TSC ADJUST differs: Reference CPU0: -2978888639075328 CPU2: -2978888639183677 [1265776480.044192] TSC ADJUST synchronize: Reference CPU0: 0 CPU2: -2978888639183677 [ 0.607643] #3 [1265776480.131874] [Firmware Bug]: TSC ADJUST differs: Reference CPU0: -2978888639075328 CPU3: -2978888639184530 [1265776480.143720] TSC ADJUST synchronize: Reference CPU0: 0 CPU3: -2978888639184530 [ 0.707108] smp: Brought up 1 node, 4 CPUs [ 0.711271] smpboot: Total of 4 processors activated (21698.88 BogoMIPS) Reported-by: Olof Johansson Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170209151231.411460506@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index e41af597aed8..37e7cf544e51 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1356,6 +1356,9 @@ void __init tsc_init(void) (unsigned long)cpu_khz / 1000, (unsigned long)cpu_khz % 1000); + /* Sanitize TSC ADJUST before cyc2ns gets initialized */ + tsc_store_and_check_tsc_adjust(true); + /* * Secondary CPUs do not run through tsc_init(), so set up * all the scale factors for all CPUs, assuming the same @@ -1386,8 +1389,6 @@ void __init tsc_init(void) if (unsynchronized_tsc()) mark_tsc_unstable("TSCs unsynchronized"); - else - tsc_store_and_check_tsc_adjust(true); check_system_tsc_reliable(); -- cgit v1.2.3 From 5f2e71e71410ecb858cfec184ba092adaca61626 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 9 Feb 2017 16:08:42 +0100 Subject: x86/tsc: Make the TSC ADJUST sanitizing work for tsc_reliable When the TSC is marked reliable then the synchronization check is skipped, but that also skips the TSC ADJUST sanitizing code. So on a machine with a wreckaged BIOS the TSC deviation between CPUs might go unnoticed. Let the TSC adjust sanitizing code run unconditionally and just skip the expensive synchronization checks when TSC is marked reliable. Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Olof Johansson Link: http://lkml.kernel.org/r/20170209151231.491189912@linutronix.de Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc_sync.c | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc_sync.c b/arch/x86/kernel/tsc_sync.c index d0db011051a5..728f75378475 100644 --- a/arch/x86/kernel/tsc_sync.c +++ b/arch/x86/kernel/tsc_sync.c @@ -286,13 +286,6 @@ void check_tsc_sync_source(int cpu) if (unsynchronized_tsc()) return; - if (tsc_clocksource_reliable) { - if (cpu == (nr_cpu_ids-1) || system_state != SYSTEM_BOOTING) - pr_info( - "Skipped synchronization checks as TSC is reliable.\n"); - return; - } - /* * Set the maximum number of test runs to * 1 if the CPU does not provide the TSC_ADJUST MSR @@ -380,14 +373,19 @@ void check_tsc_sync_target(void) int cpus = 2; /* Also aborts if there is no TSC. */ - if (unsynchronized_tsc() || tsc_clocksource_reliable) + if (unsynchronized_tsc()) return; /* * Store, verify and sanitize the TSC adjust register. If * successful skip the test. + * + * The test is also skipped when the TSC is marked reliable. This + * is true for SoCs which have no fallback clocksource. On these + * SoCs the TSC is frequency synchronized, but still the TSC ADJUST + * register might have been wreckaged by the BIOS.. */ - if (tsc_store_and_check_tsc_adjust(false)) { + if (tsc_store_and_check_tsc_adjust(false) || tsc_clocksource_reliable) { atomic_inc(&skip_test); return; } -- cgit v1.2.3 From 146fbb766934dc003fcbf755b519acef683576bf Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Fri, 10 Feb 2017 12:54:05 +0300 Subject: x86/mm/ptdump: Fix soft lockup in page table walker CONFIG_KASAN=y needs a lot of virtual memory mapped for its shadow. In that case ptdump_walk_pgd_level_core() takes a lot of time to walk across all page tables and doing this without a rescheduling causes soft lockups: NMI watchdog: BUG: soft lockup - CPU#3 stuck for 23s! [swapper/0:1] ... Call Trace: ptdump_walk_pgd_level_core+0x40c/0x550 ptdump_walk_pgd_level_checkwx+0x17/0x20 mark_rodata_ro+0x13b/0x150 kernel_init+0x2f/0x120 ret_from_fork+0x2c/0x40 I guess that this issue might arise even without KASAN on huge machines with several terabytes of RAM. Stick cond_resched() in pgd loop to fix this. Reported-by: Tobias Regnery Signed-off-by: Andrey Ryabinin Cc: kasan-dev@googlegroups.com Cc: Alexander Potapenko Cc: "Paul E . McKenney" Cc: Dmitry Vyukov Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/20170210095405.31802-1-aryabinin@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/dump_pagetables.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index ea9c49adaa1f..8aa6bea1cd6c 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -406,6 +407,7 @@ static void ptdump_walk_pgd_level_core(struct seq_file *m, pgd_t *pgd, } else note_page(m, &st, __pgprot(0), 1); + cond_resched(); start++; } -- cgit v1.2.3 From 9b06e1018abc65585b07c75c5b3f406dbabe7005 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Sat, 4 Feb 2017 08:46:23 -0700 Subject: Drivers: hv: Fix the bug in generating the guest ID Fix the bug in the generation of the guest ID. Without this fix the host side telemetry code is broken. Signed-off-by: K. Y. Srinivasan Fixes: 352c9624242d ("Drivers: hv: vmbus: Move the definition of generate_guest_id()") Signed-off-by: Greg Kroah-Hartman --- arch/x86/include/asm/mshyperv.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index f8dc3700de67..56407c6d2397 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -80,7 +80,7 @@ struct ms_hyperv_tsc_page { * */ -#define HV_LINUX_VENDOR_ID 0x8800 +#define HV_LINUX_VENDOR_ID 0x8100 /* * Generate the guest ID based on the guideline described above. @@ -91,7 +91,7 @@ static inline __u64 generate_guest_id(__u64 d_info1, __u64 kernel_version, { __u64 guest_id = 0; - guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 56); + guest_id = (((__u64)HV_LINUX_VENDOR_ID) << 48); guest_id |= (d_info1 << 48); guest_id |= (kernel_version << 16); guest_id |= d_info2; -- cgit v1.2.3 From dee863b571b0a76e9c549ee99e8782bb4bc6502b Mon Sep 17 00:00:00 2001 From: Vitaly Kuznetsov Date: Sat, 4 Feb 2017 09:57:13 -0700 Subject: hv: export current Hyper-V clocksource As a preparation to implementing Hyper-V PTP device supporting .getcrosststamp we need to export a reference to the current Hyper-V clocksource in use (MSR or TSC page). Signed-off-by: Vitaly Kuznetsov Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- arch/x86/hyperv/hv_init.c | 13 +++++++++---- arch/x86/include/asm/mshyperv.h | 3 +++ 2 files changed, 12 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index b371d0e984a9..c224b7df4d21 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -99,6 +99,9 @@ static struct clocksource hyperv_cs_msr = { }; static void *hypercall_pg; +struct clocksource *hyperv_cs; +EXPORT_SYMBOL_GPL(hyperv_cs); + /* * This function is to be invoked early in the boot sequence after the * hypervisor has been detected. @@ -141,10 +144,10 @@ void hyperv_init(void) union hv_x64_msr_hypercall_contents tsc_msr; tsc_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL); - if (!tsc_pg) { - clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); - return; - } + if (!tsc_pg) + goto register_msr_cs; + + hyperv_cs = &hyperv_cs_tsc; rdmsrl(HV_X64_MSR_REFERENCE_TSC, tsc_msr.as_uint64); @@ -161,6 +164,8 @@ void hyperv_init(void) * the partition counter. */ +register_msr_cs: + hyperv_cs = &hyperv_cs_msr; if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); } diff --git a/arch/x86/include/asm/mshyperv.h b/arch/x86/include/asm/mshyperv.h index 56407c6d2397..7c9c895432a9 100644 --- a/arch/x86/include/asm/mshyperv.h +++ b/arch/x86/include/asm/mshyperv.h @@ -3,6 +3,7 @@ #include #include +#include #include /* @@ -168,6 +169,8 @@ void hv_setup_crash_handler(void (*handler)(struct pt_regs *regs)); void hv_remove_crash_handler(void); #if IS_ENABLED(CONFIG_HYPERV) +extern struct clocksource *hyperv_cs; + void hyperv_init(void); void hyperv_report_panic(struct pt_regs *regs); bool hv_is_hypercall_page_setup(void); -- cgit v1.2.3 From 372b1e91343e657a7cc5e2e2bcecd5140ac28119 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 8 Feb 2017 18:30:56 -0700 Subject: drivers: hv: Turn off write permission on the hypercall page The hypercall page only needs to be executable but currently it is setup to be writable as well. Fix the issue. Signed-off-by: K. Y. Srinivasan Cc: Acked-by: Kees Cook Reported-by: Stephen Hemminger Tested-by: Stephen Hemminger Signed-off-by: Greg Kroah-Hartman --- arch/x86/hyperv/hv_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index c224b7df4d21..db64baf0e500 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -125,7 +125,7 @@ void hyperv_init(void) guest_id = generate_guest_id(0, LINUX_VERSION_CODE, 0); wrmsrl(HV_X64_MSR_GUEST_OS_ID, guest_id); - hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_EXEC); + hypercall_pg = __vmalloc(PAGE_SIZE, GFP_KERNEL, PAGE_KERNEL_RX); if (hypercall_pg == NULL) { wrmsrl(HV_X64_MSR_GUEST_OS_ID, 0); return; -- cgit v1.2.3 From 699c4cec238731a4c466f73fe6e9e45ab6f49a41 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Feb 2017 18:17:44 +0100 Subject: PCI/MSI: Remove pci_msi_domain_{alloc,free}_irqs() Just call the msi_* version directly instead of having trivial wrappers for one or two callsites. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner --- arch/x86/kernel/apic/msi.c | 2 +- drivers/pci/msi.c | 30 ++---------------------------- include/linux/msi.h | 3 --- 3 files changed, 3 insertions(+), 32 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/msi.c b/arch/x86/kernel/apic/msi.c index 015bbf30e3e3..c61aec7e65f4 100644 --- a/arch/x86/kernel/apic/msi.c +++ b/arch/x86/kernel/apic/msi.c @@ -82,7 +82,7 @@ int native_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) if (domain == NULL) return -ENOSYS; - return pci_msi_domain_alloc_irqs(domain, dev, nvec, type); + return msi_domain_alloc_irqs(domain, &dev->dev, nvec); } void native_teardown_msi_irq(unsigned int irq) diff --git a/drivers/pci/msi.c b/drivers/pci/msi.c index 79f20e4cb7bf..b44ad7c21b29 100644 --- a/drivers/pci/msi.c +++ b/drivers/pci/msi.c @@ -38,7 +38,7 @@ static int pci_msi_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) domain = dev_get_msi_domain(&dev->dev); if (domain && irq_domain_is_hierarchy(domain)) - return pci_msi_domain_alloc_irqs(domain, dev, nvec, type); + return msi_domain_alloc_irqs(domain, &dev->dev, nvec); return arch_setup_msi_irqs(dev, nvec, type); } @@ -49,7 +49,7 @@ static void pci_msi_teardown_msi_irqs(struct pci_dev *dev) domain = dev_get_msi_domain(&dev->dev); if (domain && irq_domain_is_hierarchy(domain)) - pci_msi_domain_free_irqs(domain, dev); + msi_domain_free_irqs(domain, &dev->dev); else arch_teardown_msi_irqs(dev); } @@ -1454,32 +1454,6 @@ struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, } EXPORT_SYMBOL_GPL(pci_msi_create_irq_domain); -/** - * pci_msi_domain_alloc_irqs - Allocate interrupts for @dev in @domain - * @domain: The interrupt domain to allocate from - * @dev: The device for which to allocate - * @nvec: The number of interrupts to allocate - * @type: Unused to allow simpler migration from the arch_XXX interfaces - * - * Returns: - * A virtual interrupt number or an error code in case of failure - */ -int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev, - int nvec, int type) -{ - return msi_domain_alloc_irqs(domain, &dev->dev, nvec); -} - -/** - * pci_msi_domain_free_irqs - Free interrupts for @dev in @domain - * @domain: The interrupt domain - * @dev: The device for which to free interrupts - */ -void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev) -{ - msi_domain_free_irqs(domain, &dev->dev); -} - static int get_msi_id_cb(struct pci_dev *pdev, u16 alias, void *data) { u32 *pa = data; diff --git a/include/linux/msi.h b/include/linux/msi.h index 18b8566b3ce3..1b6f3ebbe876 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -316,9 +316,6 @@ void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg); struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); -int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev, - int nvec, int type); -void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev); irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev, struct msi_desc *desc); int pci_msi_domain_check_cap(struct irq_domain *domain, -- cgit v1.2.3 From c459bd7beda0295ea67db0ce2004a49addb2f765 Mon Sep 17 00:00:00 2001 From: Tim Chen Date: Wed, 1 Feb 2017 10:45:02 -0800 Subject: crypto: sha512-mb - Protect sha512 mb ctx mgr access The flusher and regular multi-buffer computation via mcryptd may race with another. Add here a lock and turn off interrupt to to access multi-buffer computation state cstate->mgr before a round of computation. This should prevent the flusher code jumping in. Signed-off-by: Tim Chen Signed-off-by: Herbert Xu --- arch/x86/crypto/sha512-mb/sha512_mb.c | 64 +++++++++++++++++++++++------------ 1 file changed, 42 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/crypto/sha512-mb/sha512_mb.c b/arch/x86/crypto/sha512-mb/sha512_mb.c index 9c1bb6d58141..2dd3674b5a1e 100644 --- a/arch/x86/crypto/sha512-mb/sha512_mb.c +++ b/arch/x86/crypto/sha512-mb/sha512_mb.c @@ -221,7 +221,7 @@ static struct sha512_hash_ctx *sha512_ctx_mgr_resubmit } static struct sha512_hash_ctx - *sha512_ctx_mgr_get_comp_ctx(struct sha512_ctx_mgr *mgr) + *sha512_ctx_mgr_get_comp_ctx(struct mcryptd_alg_cstate *cstate) { /* * If get_comp_job returns NULL, there are no jobs complete. @@ -233,11 +233,17 @@ static struct sha512_hash_ctx * Otherwise, all jobs currently being managed by the hash_ctx_mgr * still need processing. */ + struct sha512_ctx_mgr *mgr; struct sha512_hash_ctx *ctx; + unsigned long flags; + mgr = cstate->mgr; + spin_lock_irqsave(&cstate->work_lock, flags); ctx = (struct sha512_hash_ctx *) sha512_job_mgr_get_comp_job(&mgr->mgr); - return sha512_ctx_mgr_resubmit(mgr, ctx); + ctx = sha512_ctx_mgr_resubmit(mgr, ctx); + spin_unlock_irqrestore(&cstate->work_lock, flags); + return ctx; } static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr) @@ -246,12 +252,17 @@ static void sha512_ctx_mgr_init(struct sha512_ctx_mgr *mgr) } static struct sha512_hash_ctx - *sha512_ctx_mgr_submit(struct sha512_ctx_mgr *mgr, + *sha512_ctx_mgr_submit(struct mcryptd_alg_cstate *cstate, struct sha512_hash_ctx *ctx, const void *buffer, uint32_t len, int flags) { + struct sha512_ctx_mgr *mgr; + unsigned long irqflags; + + mgr = cstate->mgr; + spin_lock_irqsave(&cstate->work_lock, irqflags); if (flags & (~HASH_ENTIRE)) { /* * User should not pass anything other than FIRST, UPDATE, or @@ -351,20 +362,26 @@ static struct sha512_hash_ctx } } - return sha512_ctx_mgr_resubmit(mgr, ctx); + ctx = sha512_ctx_mgr_resubmit(mgr, ctx); + spin_unlock_irqrestore(&cstate->work_lock, irqflags); + return ctx; } -static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr) +static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct mcryptd_alg_cstate *cstate) { + struct sha512_ctx_mgr *mgr; struct sha512_hash_ctx *ctx; + unsigned long flags; + mgr = cstate->mgr; + spin_lock_irqsave(&cstate->work_lock, flags); while (1) { ctx = (struct sha512_hash_ctx *) sha512_job_mgr_flush(&mgr->mgr); /* If flush returned 0, there are no more jobs in flight. */ if (!ctx) - return NULL; + break; /* * If flush returned a job, resubmit the job to finish @@ -378,8 +395,10 @@ static struct sha512_hash_ctx *sha512_ctx_mgr_flush(struct sha512_ctx_mgr *mgr) * the sha512_ctx_mgr still need processing. Loop. */ if (ctx) - return ctx; + break; } + spin_unlock_irqrestore(&cstate->work_lock, flags); + return ctx; } static int sha512_mb_init(struct ahash_request *areq) @@ -439,11 +458,11 @@ static int sha_finish_walk(struct mcryptd_hash_request_ctx **ret_rctx, sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(&rctx->areq); kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, + sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, nbytes, flag); if (!sha_ctx) { if (flush) - sha_ctx = sha512_ctx_mgr_flush(cstate->mgr); + sha_ctx = sha512_ctx_mgr_flush(cstate); } kernel_fpu_end(); if (sha_ctx) @@ -471,11 +490,12 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, struct sha512_hash_ctx *sha_ctx; struct mcryptd_hash_request_ctx *req_ctx; int ret; + unsigned long flags; /* remove from work list */ - spin_lock(&cstate->work_lock); + spin_lock_irqsave(&cstate->work_lock, flags); list_del(&rctx->waiter); - spin_unlock(&cstate->work_lock); + spin_unlock_irqrestore(&cstate->work_lock, flags); if (irqs_disabled()) rctx->complete(&req->base, err); @@ -486,14 +506,14 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, } /* check to see if there are other jobs that are done */ - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr); + sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); while (sha_ctx) { req_ctx = cast_hash_to_mcryptd_ctx(sha_ctx); ret = sha_finish_walk(&req_ctx, cstate, false); if (req_ctx) { - spin_lock(&cstate->work_lock); + spin_lock_irqsave(&cstate->work_lock, flags); list_del(&req_ctx->waiter); - spin_unlock(&cstate->work_lock); + spin_unlock_irqrestore(&cstate->work_lock, flags); req = cast_mcryptd_ctx_to_req(req_ctx); if (irqs_disabled()) @@ -504,7 +524,7 @@ static int sha_complete_job(struct mcryptd_hash_request_ctx *rctx, local_bh_enable(); } } - sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate->mgr); + sha_ctx = sha512_ctx_mgr_get_comp_ctx(cstate); } return 0; @@ -515,6 +535,7 @@ static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx, { unsigned long next_flush; unsigned long delay = usecs_to_jiffies(FLUSH_INTERVAL); + unsigned long flags; /* initialize tag */ rctx->tag.arrival = jiffies; /* tag the arrival time */ @@ -522,9 +543,9 @@ static void sha512_mb_add_list(struct mcryptd_hash_request_ctx *rctx, next_flush = rctx->tag.arrival + delay; rctx->tag.expire = next_flush; - spin_lock(&cstate->work_lock); + spin_lock_irqsave(&cstate->work_lock, flags); list_add_tail(&rctx->waiter, &cstate->work_list); - spin_unlock(&cstate->work_lock); + spin_unlock_irqrestore(&cstate->work_lock, flags); mcryptd_arm_flusher(cstate, delay); } @@ -565,7 +586,7 @@ static int sha512_mb_update(struct ahash_request *areq) sha_ctx = (struct sha512_hash_ctx *) ahash_request_ctx(areq); sha512_mb_add_list(rctx, cstate); kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, + sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, nbytes, HASH_UPDATE); kernel_fpu_end(); @@ -628,7 +649,7 @@ static int sha512_mb_finup(struct ahash_request *areq) sha512_mb_add_list(rctx, cstate); kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, rctx->walk.data, + sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, rctx->walk.data, nbytes, flag); kernel_fpu_end(); @@ -677,8 +698,7 @@ static int sha512_mb_final(struct ahash_request *areq) /* flag HASH_FINAL and 0 data size */ sha512_mb_add_list(rctx, cstate); kernel_fpu_begin(); - sha_ctx = sha512_ctx_mgr_submit(cstate->mgr, sha_ctx, &data, 0, - HASH_LAST); + sha_ctx = sha512_ctx_mgr_submit(cstate, sha_ctx, &data, 0, HASH_LAST); kernel_fpu_end(); /* check if anything is returned */ @@ -940,7 +960,7 @@ static unsigned long sha512_mb_flusher(struct mcryptd_alg_cstate *cstate) break; kernel_fpu_begin(); sha_ctx = (struct sha512_hash_ctx *) - sha512_ctx_mgr_flush(cstate->mgr); + sha512_ctx_mgr_flush(cstate); kernel_fpu_end(); if (!sha_ctx) { pr_err("sha512_mb error: nothing got flushed for" -- cgit v1.2.3 From f2029b1e47b607619d1dd2cb0bbb77f64ec6b7c2 Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 10 Feb 2017 11:38:37 -0800 Subject: perf/x86/intel: Add Kaby Lake support Add Kaby Lake mobile and desktop models for RAPL, CSTATE and UNCORE matching Skylake. Signed-off-by: Srinivas Pandruvada Cc: peterz@infradead.org Cc: kan.liang@intel.com Cc: bigeasy@linutronix.de Cc: dave.hansen@linux.intel.com Cc: piotr.luc@intel.com Cc: davidcc@google.com Cc: bp@suse.de Link: http://lkml.kernel.org/r/1486755517-17812-1-git-send-email-srinivas.pandruvada@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/events/intel/cstate.c | 3 +++ arch/x86/events/intel/rapl.c | 3 +++ arch/x86/events/intel/uncore.c | 2 ++ 3 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index 1076c9a77292..aff4b5b69d40 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -541,6 +541,9 @@ static const struct x86_cpu_id intel_cstates_match[] __initconst = { X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_MOBILE, snb_cstates), X86_CSTATES_MODEL(INTEL_FAM6_SKYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_MOBILE, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_KABYLAKE_DESKTOP, snb_cstates), + X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNL, knl_cstates), X86_CSTATES_MODEL(INTEL_FAM6_XEON_PHI_KNM, knl_cstates), { }, diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 22ef4f72cf32..22054ca49026 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -771,6 +771,9 @@ static const struct x86_cpu_id rapl_cpu_match[] __initconst = { X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP, skl_rapl_init), X86_RAPL_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, hsx_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_rapl_init), + X86_RAPL_MODEL_MATCH(INTEL_FAM6_ATOM_GOLDMONT, hsw_rapl_init), {}, }; diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index 1ab45976474d..758c1aa5009d 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -1328,6 +1328,8 @@ static const struct x86_cpu_id intel_uncore_match[] __initconst = { X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_DESKTOP,skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_MOBILE, skl_uncore_init), X86_UNCORE_MODEL_MATCH(INTEL_FAM6_SKYLAKE_X, skx_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_MOBILE, skl_uncore_init), + X86_UNCORE_MODEL_MATCH(INTEL_FAM6_KABYLAKE_DESKTOP, skl_uncore_init), {}, }; -- cgit v1.2.3 From 3ba5b5ea7dc3a10ef50819b43a9f8de2705f4eec Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 13 Feb 2017 15:52:28 +0300 Subject: x86/vm86: Fix unused variable warning if THP is disabled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit GCC complains about unused variable 'vma' in mark_screen_rdonly() if THP is disabled: arch/x86/kernel/vm86_32.c: In function ‘mark_screen_rdonly’: arch/x86/kernel/vm86_32.c:180:26: warning: unused variable ‘vma’ [-Wunused-variable] struct vm_area_struct *vma = find_vma(mm, 0xA0000); That's silly. pmd_trans_huge() resolves to 0 when THP is disabled, so the whole block should be eliminated. Moving the variable declaration outside the if() block shuts GCC up. Reported-by: Jérémy Lefaure Signed-off-by: Kirill A. Shutemov Tested-by: Borislav Petkov Cc: Carlos O'Donell Link: http://lkml.kernel.org/r/20170213125228.63645-1-kirill.shutemov@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/vm86_32.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index ec5d7545e6dc..0442d98367ae 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -160,11 +160,12 @@ void save_v86_state(struct kernel_vm86_regs *regs, int retval) static void mark_screen_rdonly(struct mm_struct *mm) { + struct vm_area_struct *vma; + spinlock_t *ptl; pgd_t *pgd; pud_t *pud; pmd_t *pmd; pte_t *pte; - spinlock_t *ptl; int i; down_write(&mm->mmap_sem); @@ -177,7 +178,7 @@ static void mark_screen_rdonly(struct mm_struct *mm) pmd = pmd_offset(pud, 0xA0000); if (pmd_trans_huge(*pmd)) { - struct vm_area_struct *vma = find_vma(mm, 0xA0000); + vma = find_vma(mm, 0xA0000); split_huge_pmd(vma, pmd, 0xA0000); } if (pmd_none_or_clear_bad(pmd)) -- cgit v1.2.3 From ab520be8cd5d56867fc95cfbc34b90880faf1f9d Mon Sep 17 00:00:00 2001 From: Paul Durrant Date: Mon, 13 Feb 2017 17:03:23 +0000 Subject: xen/privcmd: Add IOCTL_PRIVCMD_DM_OP Recently a new dm_op[1] hypercall was added to Xen to provide a mechanism for restricting device emulators (such as QEMU) to a limited set of hypervisor operations, and being able to audit those operations in the kernel of the domain in which they run. This patch adds IOCTL_PRIVCMD_DM_OP as gateway for __HYPERVISOR_dm_op. NOTE: There is no requirement for user-space code to bounce data through locked memory buffers (as with IOCTL_PRIVCMD_HYPERCALL) since privcmd has enough information to lock the original buffers directly. [1] http://xenbits.xen.org/gitweb/?p=xen.git;a=commit;h=524a98c2 Signed-off-by: Paul Durrant Acked-by: Stefano Stabellini Signed-off-by: Boris Ostrovsky --- arch/arm/xen/enlighten.c | 1 + arch/arm/xen/hypercall.S | 1 + arch/arm64/xen/hypercall.S | 1 + arch/x86/include/asm/xen/hypercall.h | 7 ++ drivers/xen/privcmd.c | 139 +++++++++++++++++++++++++++++++++++ include/uapi/xen/privcmd.h | 13 ++++ include/xen/arm/hypercall.h | 1 + include/xen/interface/hvm/dm_op.h | 32 ++++++++ include/xen/interface/xen.h | 1 + 9 files changed, 196 insertions(+) create mode 100644 include/xen/interface/hvm/dm_op.h (limited to 'arch/x86') diff --git a/arch/arm/xen/enlighten.c b/arch/arm/xen/enlighten.c index 11d9f2898b16..81e3217b12d3 100644 --- a/arch/arm/xen/enlighten.c +++ b/arch/arm/xen/enlighten.c @@ -457,4 +457,5 @@ EXPORT_SYMBOL_GPL(HYPERVISOR_tmem_op); EXPORT_SYMBOL_GPL(HYPERVISOR_platform_op); EXPORT_SYMBOL_GPL(HYPERVISOR_multicall); EXPORT_SYMBOL_GPL(HYPERVISOR_vm_assist); +EXPORT_SYMBOL_GPL(HYPERVISOR_dm_op); EXPORT_SYMBOL_GPL(privcmd_call); diff --git a/arch/arm/xen/hypercall.S b/arch/arm/xen/hypercall.S index a648dfc3be30..b0b80c0f09f3 100644 --- a/arch/arm/xen/hypercall.S +++ b/arch/arm/xen/hypercall.S @@ -92,6 +92,7 @@ HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); HYPERCALL2(vm_assist); +HYPERCALL3(dm_op); ENTRY(privcmd_call) stmdb sp!, {r4} diff --git a/arch/arm64/xen/hypercall.S b/arch/arm64/xen/hypercall.S index 947830a459d2..401ceb71540c 100644 --- a/arch/arm64/xen/hypercall.S +++ b/arch/arm64/xen/hypercall.S @@ -84,6 +84,7 @@ HYPERCALL1(tmem_op); HYPERCALL1(platform_op_raw); HYPERCALL2(multicall); HYPERCALL2(vm_assist); +HYPERCALL3(dm_op); ENTRY(privcmd_call) mov x16, x0 diff --git a/arch/x86/include/asm/xen/hypercall.h b/arch/x86/include/asm/xen/hypercall.h index a12a047184ee..f6d20f6cca12 100644 --- a/arch/x86/include/asm/xen/hypercall.h +++ b/arch/x86/include/asm/xen/hypercall.h @@ -472,6 +472,13 @@ HYPERVISOR_xenpmu_op(unsigned int op, void *arg) return _hypercall2(int, xenpmu_op, op, arg); } +static inline int +HYPERVISOR_dm_op( + domid_t dom, unsigned int nr_bufs, void *bufs) +{ + return _hypercall3(int, dm_op, dom, nr_bufs, bufs); +} + static inline void MULTI_fpu_taskswitch(struct multicall_entry *mcl, int set) { diff --git a/drivers/xen/privcmd.c b/drivers/xen/privcmd.c index 5e5c7aef0c9f..1a6f1860e008 100644 --- a/drivers/xen/privcmd.c +++ b/drivers/xen/privcmd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -32,6 +33,7 @@ #include #include #include +#include #include #include #include @@ -43,6 +45,17 @@ MODULE_LICENSE("GPL"); #define PRIV_VMA_LOCKED ((void *)1) +static unsigned int privcmd_dm_op_max_num = 16; +module_param_named(dm_op_max_nr_bufs, privcmd_dm_op_max_num, uint, 0644); +MODULE_PARM_DESC(dm_op_max_nr_bufs, + "Maximum number of buffers per dm_op hypercall"); + +static unsigned int privcmd_dm_op_buf_max_size = 4096; +module_param_named(dm_op_buf_max_size, privcmd_dm_op_buf_max_size, uint, + 0644); +MODULE_PARM_DESC(dm_op_buf_max_size, + "Maximum size of a dm_op hypercall buffer"); + static int privcmd_vma_range_is_mapped( struct vm_area_struct *vma, unsigned long addr, @@ -548,6 +561,128 @@ out_unlock: goto out; } +static int lock_pages( + struct privcmd_dm_op_buf kbufs[], unsigned int num, + struct page *pages[], unsigned int nr_pages) +{ + unsigned int i; + + for (i = 0; i < num; i++) { + unsigned int requested; + int pinned; + + requested = DIV_ROUND_UP( + offset_in_page(kbufs[i].uptr) + kbufs[i].size, + PAGE_SIZE); + if (requested > nr_pages) + return -ENOSPC; + + pinned = get_user_pages_fast( + (unsigned long) kbufs[i].uptr, + requested, FOLL_WRITE, pages); + if (pinned < 0) + return pinned; + + nr_pages -= pinned; + pages += pinned; + } + + return 0; +} + +static void unlock_pages(struct page *pages[], unsigned int nr_pages) +{ + unsigned int i; + + if (!pages) + return; + + for (i = 0; i < nr_pages; i++) { + if (pages[i]) + put_page(pages[i]); + } +} + +static long privcmd_ioctl_dm_op(void __user *udata) +{ + struct privcmd_dm_op kdata; + struct privcmd_dm_op_buf *kbufs; + unsigned int nr_pages = 0; + struct page **pages = NULL; + struct xen_dm_op_buf *xbufs = NULL; + unsigned int i; + long rc; + + if (copy_from_user(&kdata, udata, sizeof(kdata))) + return -EFAULT; + + if (kdata.num == 0) + return 0; + + if (kdata.num > privcmd_dm_op_max_num) + return -E2BIG; + + kbufs = kcalloc(kdata.num, sizeof(*kbufs), GFP_KERNEL); + if (!kbufs) + return -ENOMEM; + + if (copy_from_user(kbufs, kdata.ubufs, + sizeof(*kbufs) * kdata.num)) { + rc = -EFAULT; + goto out; + } + + for (i = 0; i < kdata.num; i++) { + if (kbufs[i].size > privcmd_dm_op_buf_max_size) { + rc = -E2BIG; + goto out; + } + + if (!access_ok(VERIFY_WRITE, kbufs[i].uptr, + kbufs[i].size)) { + rc = -EFAULT; + goto out; + } + + nr_pages += DIV_ROUND_UP( + offset_in_page(kbufs[i].uptr) + kbufs[i].size, + PAGE_SIZE); + } + + pages = kcalloc(nr_pages, sizeof(*pages), GFP_KERNEL); + if (!pages) { + rc = -ENOMEM; + goto out; + } + + xbufs = kcalloc(kdata.num, sizeof(*xbufs), GFP_KERNEL); + if (!xbufs) { + rc = -ENOMEM; + goto out; + } + + rc = lock_pages(kbufs, kdata.num, pages, nr_pages); + if (rc) + goto out; + + for (i = 0; i < kdata.num; i++) { + set_xen_guest_handle(xbufs[i].h, kbufs[i].uptr); + xbufs[i].size = kbufs[i].size; + } + + xen_preemptible_hcall_begin(); + rc = HYPERVISOR_dm_op(kdata.dom, kdata.num, xbufs); + xen_preemptible_hcall_end(); + +out: + unlock_pages(pages, nr_pages); + kfree(xbufs); + kfree(pages); + kfree(kbufs); + + return rc; +} + static long privcmd_ioctl(struct file *file, unsigned int cmd, unsigned long data) { @@ -571,6 +706,10 @@ static long privcmd_ioctl(struct file *file, ret = privcmd_ioctl_mmap_batch(udata, 2); break; + case IOCTL_PRIVCMD_DM_OP: + ret = privcmd_ioctl_dm_op(udata); + break; + default: break; } diff --git a/include/uapi/xen/privcmd.h b/include/uapi/xen/privcmd.h index 7ddeeda93809..f8c5d75b99e1 100644 --- a/include/uapi/xen/privcmd.h +++ b/include/uapi/xen/privcmd.h @@ -77,6 +77,17 @@ struct privcmd_mmapbatch_v2 { int __user *err; /* array of error codes */ }; +struct privcmd_dm_op_buf { + void __user *uptr; + size_t size; +}; + +struct privcmd_dm_op { + domid_t dom; + __u16 num; + const struct privcmd_dm_op_buf __user *ubufs; +}; + /* * @cmd: IOCTL_PRIVCMD_HYPERCALL * @arg: &privcmd_hypercall_t @@ -98,5 +109,7 @@ struct privcmd_mmapbatch_v2 { _IOC(_IOC_NONE, 'P', 3, sizeof(struct privcmd_mmapbatch)) #define IOCTL_PRIVCMD_MMAPBATCH_V2 \ _IOC(_IOC_NONE, 'P', 4, sizeof(struct privcmd_mmapbatch_v2)) +#define IOCTL_PRIVCMD_DM_OP \ + _IOC(_IOC_NONE, 'P', 5, sizeof(struct privcmd_dm_op)) #endif /* __LINUX_PUBLIC_PRIVCMD_H__ */ diff --git a/include/xen/arm/hypercall.h b/include/xen/arm/hypercall.h index 9d874db13c0e..73db4b2eeb89 100644 --- a/include/xen/arm/hypercall.h +++ b/include/xen/arm/hypercall.h @@ -53,6 +53,7 @@ int HYPERVISOR_physdev_op(int cmd, void *arg); int HYPERVISOR_vcpu_op(int cmd, int vcpuid, void *extra_args); int HYPERVISOR_tmem_op(void *arg); int HYPERVISOR_vm_assist(unsigned int cmd, unsigned int type); +int HYPERVISOR_dm_op(domid_t domid, unsigned int nr_bufs, void *bufs); int HYPERVISOR_platform_op_raw(void *arg); static inline int HYPERVISOR_platform_op(struct xen_platform_op *op) { diff --git a/include/xen/interface/hvm/dm_op.h b/include/xen/interface/hvm/dm_op.h new file mode 100644 index 000000000000..ee9e480bc559 --- /dev/null +++ b/include/xen/interface/hvm/dm_op.h @@ -0,0 +1,32 @@ +/* + * Copyright (c) 2016, Citrix Systems Inc + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifndef __XEN_PUBLIC_HVM_DM_OP_H__ +#define __XEN_PUBLIC_HVM_DM_OP_H__ + +struct xen_dm_op_buf { + GUEST_HANDLE(void) h; + xen_ulong_t size; +}; +DEFINE_GUEST_HANDLE_STRUCT(xen_dm_op_buf); + +#endif /* __XEN_PUBLIC_HVM_DM_OP_H__ */ diff --git a/include/xen/interface/xen.h b/include/xen/interface/xen.h index 1b0d189cd3d3..4f4830ef8f93 100644 --- a/include/xen/interface/xen.h +++ b/include/xen/interface/xen.h @@ -81,6 +81,7 @@ #define __HYPERVISOR_tmem_op 38 #define __HYPERVISOR_xc_reserved_op 39 /* reserved for XenClient */ #define __HYPERVISOR_xenpmu_op 40 +#define __HYPERVISOR_dm_op 41 /* Architecture-specific hypercall definitions. */ #define __HYPERVISOR_arch_0 48 -- cgit v1.2.3 From db1c056cee59f4a7670c3bd9ee1657468cf0b4c4 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 8 Dec 2016 15:31:41 +0100 Subject: kvm: vmx: Use the hardware provided GPA instead of page walk As in the SVM patch, the guest physical address is passed by VMX to x86_emulate_instruction already, so mark the GPA as available in vcpu->arch. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 7c3e42623090..0828b02b5af2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6382,6 +6382,7 @@ static int handle_ept_violation(struct kvm_vcpu *vcpu) EPT_VIOLATION_EXECUTABLE)) ? PFERR_PRESENT_MASK : 0; + vcpu->arch.gpa_available = true; vcpu->arch.exit_qualification = exit_qualification; return kvm_mmu_page_fault(vcpu, gpa, error_code, NULL, 0); @@ -6399,6 +6400,7 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) } ret = handle_mmio_page_fault(vcpu, gpa, true); + vcpu->arch.gpa_available = true; if (likely(ret == RET_MMIO_PF_EMULATE)) return x86_emulate_instruction(vcpu, gpa, 0, NULL, 0) == EMULATE_DONE; @@ -8517,6 +8519,7 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) u32 vectoring_info = vmx->idt_vectoring_info; trace_kvm_exit(exit_reason, vcpu, KVM_ISA_VMX); + vcpu->arch.gpa_available = false; /* * Flush logged GPAs PML buffer, this will make dirty_bitmap more -- cgit v1.2.3 From 967235d320329e4a7a2bd1a36b04293063e985ae Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 19 Dec 2016 14:03:45 +0100 Subject: KVM: vmx: clear pending interrupts on KVM_SET_LAPIC Pending interrupts might be in the PI descriptor when the LAPIC is restored from an external state; we do not want them to be injected. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 3 +-- arch/x86/kvm/vmx.c | 9 +++++++++ 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 33b799fd3a6e..8ddd0ed03880 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -2204,8 +2204,7 @@ int kvm_apic_set_state(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) 1 : count_vectors(apic->regs + APIC_ISR); apic->highest_isr_cache = -1; if (vcpu->arch.apicv_active) { - if (kvm_x86_ops->apicv_post_state_restore) - kvm_x86_ops->apicv_post_state_restore(vcpu); + kvm_x86_ops->apicv_post_state_restore(vcpu); kvm_x86_ops->hwapic_irr_update(vcpu, apic_find_highest_irr(apic)); kvm_x86_ops->hwapic_isr_update(vcpu, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0828b02b5af2..8d2e0cc8e83e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8749,6 +8749,14 @@ static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) vmcs_write64(EOI_EXIT_BITMAP3, eoi_exit_bitmap[3]); } +static void vmx_apicv_post_state_restore(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + pi_clear_on(&vmx->pi_desc); + memset(vmx->pi_desc.pir, 0, sizeof(vmx->pi_desc.pir)); +} + static void vmx_complete_atomic_exit(struct vcpu_vmx *vmx) { u32 exit_intr_info; @@ -11574,6 +11582,7 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .get_enable_apicv = vmx_get_enable_apicv, .refresh_apicv_exec_ctrl = vmx_refresh_apicv_exec_ctrl, .load_eoi_exitmap = vmx_load_eoi_exitmap, + .apicv_post_state_restore = vmx_apicv_post_state_restore, .hwapic_irr_update = vmx_hwapic_irr_update, .hwapic_isr_update = vmx_hwapic_isr_update, .sync_pir_to_irr = vmx_sync_pir_to_irr, -- cgit v1.2.3 From 0ad3bed6c5ec6dbb093a26802c85088a85fb9757 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 19 Dec 2016 15:23:54 +0100 Subject: kvm: nVMX: move nested events check to kvm_vcpu_running vcpu_run calls kvm_vcpu_running, not kvm_arch_vcpu_runnable, and the former does not call check_nested_events. Once KVM_REQ_EVENT is removed from the APICv interrupt injection path, however, this would leave no place to trigger a vmexit from L2 to L1, causing a missed interrupt delivery while in guest mode. This is caught by the "ack interrupt on exit" test in vmx.flat. [This does not change the calls to check_nested_events in inject_pending_event. That is material for a separate cleanup.] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 2f64e5d0ae53..204793f0f0e2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -7023,6 +7023,9 @@ static inline int vcpu_block(struct kvm *kvm, struct kvm_vcpu *vcpu) static inline bool kvm_vcpu_running(struct kvm_vcpu *vcpu) { + if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) + kvm_x86_ops->check_nested_events(vcpu, false); + return (vcpu->arch.mp_state == KVM_MP_STATE_RUNNABLE && !vcpu->arch.apf.halted); } @@ -8389,9 +8392,6 @@ static inline bool kvm_vcpu_has_events(struct kvm_vcpu *vcpu) int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu) && kvm_x86_ops->check_nested_events) - kvm_x86_ops->check_nested_events(vcpu, false); - return kvm_vcpu_running(vcpu) || kvm_vcpu_has_events(vcpu); } -- cgit v1.2.3 From 810e6defcca4d05275aa15c2872c0a4949178fcb Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 19 Dec 2016 13:05:46 +0100 Subject: KVM: x86: preparatory changes for APICv cleanups Add return value to __kvm_apic_update_irr/kvm_apic_update_irr. Move vmx_sync_pir_to_irr around. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 25 +++++++++++++++++-------- arch/x86/kvm/lapic.h | 4 ++-- arch/x86/kvm/vmx.c | 32 ++++++++++++++++---------------- 3 files changed, 35 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8ddd0ed03880..120afc2bcfd3 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -341,7 +341,7 @@ static int find_highest_vector(void *bitmap) vec >= 0; vec -= APIC_VECTORS_PER_REG) { reg = bitmap + REG_POS(vec); if (*reg) - return fls(*reg) - 1 + vec; + return __fls(*reg) + vec; } return -1; @@ -361,27 +361,36 @@ static u8 count_vectors(void *bitmap) return count; } -void __kvm_apic_update_irr(u32 *pir, void *regs) +int __kvm_apic_update_irr(u32 *pir, void *regs) { - u32 i, pir_val; + u32 i, vec; + u32 pir_val, irr_val; + int max_irr = -1; - for (i = 0; i <= 7; i++) { + for (i = vec = 0; i <= 7; i++, vec += 32) { pir_val = READ_ONCE(pir[i]); + irr_val = *((u32 *)(regs + APIC_IRR + i * 0x10)); if (pir_val) { - pir_val = xchg(&pir[i], 0); - *((u32 *)(regs + APIC_IRR + i * 0x10)) |= pir_val; + irr_val |= xchg(&pir[i], 0); + *((u32 *)(regs + APIC_IRR + i * 0x10)) = irr_val; } + if (irr_val) + max_irr = __fls(irr_val) + vec; } + + return max_irr; } EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); -void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) +int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) { struct kvm_lapic *apic = vcpu->arch.apic; + int max_irr; - __kvm_apic_update_irr(pir, apic->regs); + max_irr = __kvm_apic_update_irr(pir, apic->regs); kvm_make_request(KVM_REQ_EVENT, vcpu); + return max_irr; } EXPORT_SYMBOL_GPL(kvm_apic_update_irr); diff --git a/arch/x86/kvm/lapic.h b/arch/x86/kvm/lapic.h index 05abd837b78a..bcbe811f3b97 100644 --- a/arch/x86/kvm/lapic.h +++ b/arch/x86/kvm/lapic.h @@ -71,8 +71,8 @@ int kvm_lapic_reg_read(struct kvm_lapic *apic, u32 offset, int len, bool kvm_apic_match_dest(struct kvm_vcpu *vcpu, struct kvm_lapic *source, int short_hand, unsigned int dest, int dest_mode); -void __kvm_apic_update_irr(u32 *pir, void *regs); -void kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); +int __kvm_apic_update_irr(u32 *pir, void *regs); +int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir); void kvm_apic_update_ppr(struct kvm_vcpu *vcpu); int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, struct dest_map *dest_map); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 8d2e0cc8e83e..4ac9b484e244 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5057,22 +5057,6 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) kvm_vcpu_kick(vcpu); } -static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) -{ - struct vcpu_vmx *vmx = to_vmx(vcpu); - - if (!pi_test_on(&vmx->pi_desc)) - return; - - pi_clear_on(&vmx->pi_desc); - /* - * IOMMU can write to PIR.ON, so the barrier matters even on UP. - * But on x86 this is just a compiler barrier anyway. - */ - smp_mb__after_atomic(); - kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); -} - /* * Set up the vmcs's constant host-state fields, i.e., host-state fields that * will not change in the lifetime of the guest. @@ -8738,6 +8722,22 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) } } +static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + + if (!pi_test_on(&vmx->pi_desc)) + return; + + pi_clear_on(&vmx->pi_desc); + /* + * IOMMU can write to PIR.ON, so the barrier matters even on UP. + * But on x86 this is just a compiler barrier anyway. + */ + smp_mb__after_atomic(); + kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); +} + static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) { if (!kvm_vcpu_apicv_active(vcpu)) -- cgit v1.2.3 From 3d92789f69162ee5689f3766e5f50bb46b7e1d97 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 19 Dec 2016 13:29:03 +0100 Subject: KVM: vmx: move sync_pir_to_irr from apic_find_highest_irr to callers Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 7 ++++--- arch/x86/kvm/x86.c | 4 +++- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 120afc2bcfd3..8af6db9b64aa 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -410,8 +410,6 @@ static inline int apic_find_highest_irr(struct kvm_lapic *apic) if (!apic->irr_pending) return -1; - if (apic->vcpu->arch.apicv_active) - kvm_x86_ops->sync_pir_to_irr(apic->vcpu); result = apic_search_irr(apic); ASSERT(result == -1 || result >= 16); @@ -581,7 +579,10 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) { - int highest_irr = apic_find_highest_irr(apic); + int highest_irr; + if (apic->vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(apic->vcpu); + highest_irr = apic_find_highest_irr(apic); if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) return -1; return highest_irr; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 204793f0f0e2..8f80da161e80 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6822,9 +6822,11 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * Update architecture specific hints for APIC * virtual interrupt delivery. */ - if (vcpu->arch.apicv_active) + if (vcpu->arch.apicv_active) { + kvm_x86_ops->sync_pir_to_irr(vcpu); kvm_x86_ops->hwapic_irr_update(vcpu, kvm_lapic_find_highest_irr(vcpu)); + } } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { -- cgit v1.2.3 From 76dfafd536730ef9b9d99b1cf596916d52be76d1 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 19 Dec 2016 17:17:11 +0100 Subject: KVM: x86: do not scan IRR twice on APICv vmentry Calls to apic_find_highest_irr are scanning IRR twice, once in vmx_sync_pir_from_irr and once in apic_search_irr. Change sync_pir_from_irr to get the new maximum IRR from kvm_apic_update_irr; now that it does the computation, it can also do the RVI write. In order to avoid complications in svm.c, make the callback optional. Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 2 +- arch/x86/kvm/lapic.c | 8 +++++--- arch/x86/kvm/svm.c | 6 ------ arch/x86/kvm/vmx.c | 31 +++++++++++++++++++------------ arch/x86/kvm/x86.c | 9 +++------ 5 files changed, 28 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index 417502cf42b6..e4f13e714bcf 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -969,7 +969,7 @@ struct kvm_x86_ops { void (*set_virtual_x2apic_mode)(struct kvm_vcpu *vcpu, bool set); void (*set_apic_access_page_addr)(struct kvm_vcpu *vcpu, hpa_t hpa); void (*deliver_posted_interrupt)(struct kvm_vcpu *vcpu, int vector); - void (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); + int (*sync_pir_to_irr)(struct kvm_vcpu *vcpu); int (*set_tss_addr)(struct kvm *kvm, unsigned int addr); int (*get_tdp_level)(void); u64 (*get_mt_mask)(struct kvm_vcpu *vcpu, gfn_t gfn, bool is_mmio); diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 8af6db9b64aa..7ed2400b2777 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -515,6 +515,7 @@ int kvm_lapic_find_highest_irr(struct kvm_vcpu *vcpu) */ return apic_find_highest_irr(vcpu->arch.apic); } +EXPORT_SYMBOL_GPL(kvm_lapic_find_highest_irr); static int __apic_accept_irq(struct kvm_lapic *apic, int delivery_mode, int vector, int level, int trig_mode, @@ -580,9 +581,10 @@ static void pv_eoi_clr_pending(struct kvm_vcpu *vcpu) static int apic_has_interrupt_for_ppr(struct kvm_lapic *apic, u32 ppr) { int highest_irr; - if (apic->vcpu->arch.apicv_active) - kvm_x86_ops->sync_pir_to_irr(apic->vcpu); - highest_irr = apic_find_highest_irr(apic); + if (kvm_x86_ops->sync_pir_to_irr && apic->vcpu->arch.apicv_active) + highest_irr = kvm_x86_ops->sync_pir_to_irr(apic->vcpu); + else + highest_irr = apic_find_highest_irr(apic); if (highest_irr == -1 || (highest_irr & 0xF0) <= ppr) return -1; return highest_irr; diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d0414f054bdf..13cd06220b19 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -4359,11 +4359,6 @@ static void svm_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) return; } -static void svm_sync_pir_to_irr(struct kvm_vcpu *vcpu) -{ - return; -} - static void svm_deliver_avic_intr(struct kvm_vcpu *vcpu, int vec) { kvm_lapic_set_irr(vec, vcpu->arch.apic); @@ -5373,7 +5368,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .get_enable_apicv = svm_get_enable_apicv, .refresh_apicv_exec_ctrl = svm_refresh_apicv_exec_ctrl, .load_eoi_exitmap = svm_load_eoi_exitmap, - .sync_pir_to_irr = svm_sync_pir_to_irr, .hwapic_irr_update = svm_hwapic_irr_update, .hwapic_isr_update = svm_hwapic_isr_update, .apicv_post_state_restore = avic_post_state_restore, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4ac9b484e244..d03cb62b70d2 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6649,8 +6649,10 @@ static __init int hardware_setup(void) if (!cpu_has_vmx_ple()) ple_gap = 0; - if (!cpu_has_vmx_apicv()) + if (!cpu_has_vmx_apicv()) { enable_apicv = 0; + kvm_x86_ops->sync_pir_to_irr = NULL; + } if (cpu_has_vmx_tsc_scaling()) { kvm_has_tsc_control = true; @@ -8722,20 +8724,25 @@ static void vmx_hwapic_irr_update(struct kvm_vcpu *vcpu, int max_irr) } } -static void vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) +static int vmx_sync_pir_to_irr(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + int max_irr; - if (!pi_test_on(&vmx->pi_desc)) - return; - - pi_clear_on(&vmx->pi_desc); - /* - * IOMMU can write to PIR.ON, so the barrier matters even on UP. - * But on x86 this is just a compiler barrier anyway. - */ - smp_mb__after_atomic(); - kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); + WARN_ON(!vcpu->arch.apicv_active); + if (pi_test_on(&vmx->pi_desc)) { + pi_clear_on(&vmx->pi_desc); + /* + * IOMMU can write to PIR.ON, so the barrier matters even on UP. + * But on x86 this is just a compiler barrier anyway. + */ + smp_mb__after_atomic(); + max_irr = kvm_apic_update_irr(vcpu, vmx->pi_desc.pir); + } else { + max_irr = kvm_lapic_find_highest_irr(vcpu); + } + vmx_hwapic_irr_update(vcpu, max_irr); + return max_irr; } static void vmx_load_eoi_exitmap(struct kvm_vcpu *vcpu, u64 *eoi_exit_bitmap) diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8f80da161e80..75b0f30d75ee 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2909,7 +2909,7 @@ void kvm_arch_vcpu_put(struct kvm_vcpu *vcpu) static int kvm_vcpu_ioctl_get_lapic(struct kvm_vcpu *vcpu, struct kvm_lapic_state *s) { - if (vcpu->arch.apicv_active) + if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); return kvm_apic_get_state(vcpu, s); @@ -6659,7 +6659,7 @@ static void vcpu_scan_ioapic(struct kvm_vcpu *vcpu) if (irqchip_split(vcpu->kvm)) kvm_scan_ioapic_routes(vcpu, vcpu->arch.ioapic_handled_vectors); else { - if (vcpu->arch.apicv_active) + if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); kvm_ioapic_scan_entry(vcpu, vcpu->arch.ioapic_handled_vectors); } @@ -6822,11 +6822,8 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) * Update architecture specific hints for APIC * virtual interrupt delivery. */ - if (vcpu->arch.apicv_active) { + if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) kvm_x86_ops->sync_pir_to_irr(vcpu); - kvm_x86_ops->hwapic_irr_update(vcpu, - kvm_lapic_find_highest_irr(vcpu)); - } } if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { -- cgit v1.2.3 From b95234c840045b7c72380fd14c59416af28fcb02 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 19 Dec 2016 13:57:33 +0100 Subject: kvm: x86: do not use KVM_REQ_EVENT for APICv interrupt injection Since bf9f6ac8d749 ("KVM: Update Posted-Interrupts Descriptor when vCPU is blocked", 2015-09-18) the posted interrupt descriptor is checked unconditionally for PIR.ON. Therefore we don't need KVM_REQ_EVENT to trigger the scan and, if NMIs or SMIs are not involved, we can avoid the complicated event injection path. Calling kvm_vcpu_kick if PIR.ON=1 is also useless, though it has been there since APICv was introduced. However, without the KVM_REQ_EVENT safety net KVM needs to be much more careful about races between vmx_deliver_posted_interrupt and vcpu_enter_guest. First, the IPI for posted interrupts may be issued between setting vcpu->mode = IN_GUEST_MODE and disabling interrupts. If that happens, kvm_trigger_posted_interrupt returns true, but smp_kvm_posted_intr_ipi doesn't do anything about it. The guest is entered with PIR.ON, but the posted interrupt IPI has not been sent and the interrupt is only delivered to the guest on the next vmentry (if any). To fix this, disable interrupts before setting vcpu->mode. This ensures that the IPI is delayed until the guest enters non-root mode; it is then trapped by the processor causing the interrupt to be injected. Second, the IPI may be issued between kvm_x86_ops->sync_pir_to_irr(vcpu) and vcpu->mode = IN_GUEST_MODE. In this case, kvm_vcpu_kick is called but it (correctly) doesn't do anything because it sees vcpu->mode == OUTSIDE_GUEST_MODE. Again, the guest is entered with PIR.ON but no posted interrupt IPI is pending; this time, the fix for this is to move the RVI update after IN_GUEST_MODE. Both issues were mostly masked by the liberal usage of KVM_REQ_EVENT, though the second could actually happen with VT-d posted interrupts. In both race scenarios KVM_REQ_EVENT would cancel guest entry, resulting in another vmentry which would inject the interrupt. This saves about 300 cycles on the self_ipi_* tests of vmexit.flat. Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 11 ++++------- arch/x86/kvm/vmx.c | 8 +++++--- arch/x86/kvm/x86.c | 44 +++++++++++++++++++++++++------------------- 3 files changed, 34 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 7ed2400b2777..9fa5b8164961 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -385,12 +385,8 @@ EXPORT_SYMBOL_GPL(__kvm_apic_update_irr); int kvm_apic_update_irr(struct kvm_vcpu *vcpu, u32 *pir) { struct kvm_lapic *apic = vcpu->arch.apic; - int max_irr; - max_irr = __kvm_apic_update_irr(pir, apic->regs); - - kvm_make_request(KVM_REQ_EVENT, vcpu); - return max_irr; + return __kvm_apic_update_irr(pir, apic->regs); } EXPORT_SYMBOL_GPL(kvm_apic_update_irr); @@ -423,9 +419,10 @@ static inline void apic_clear_irr(int vec, struct kvm_lapic *apic) vcpu = apic->vcpu; if (unlikely(vcpu->arch.apicv_active)) { - /* try to update RVI */ + /* need to update RVI */ apic_clear_vector(vec, apic->regs + APIC_IRR); - kvm_make_request(KVM_REQ_EVENT, vcpu); + kvm_x86_ops->hwapic_irr_update(vcpu, + apic_find_highest_irr(apic)); } else { apic->irr_pending = false; apic_clear_vector(vec, apic->regs + APIC_IRR); diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d03cb62b70d2..fd8cd50e9dc6 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5051,9 +5051,11 @@ static void vmx_deliver_posted_interrupt(struct kvm_vcpu *vcpu, int vector) if (pi_test_and_set_pir(vector, &vmx->pi_desc)) return; - r = pi_test_and_set_on(&vmx->pi_desc); - kvm_make_request(KVM_REQ_EVENT, vcpu); - if (r || !kvm_vcpu_trigger_posted_interrupt(vcpu)) + /* If a previous notification has sent the IPI, nothing to do. */ + if (pi_test_and_set_on(&vmx->pi_desc)) + return; + + if (!kvm_vcpu_trigger_posted_interrupt(vcpu)) kvm_vcpu_kick(vcpu); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 75b0f30d75ee..63a89a51dcc9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6813,19 +6813,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_hv_process_stimers(vcpu); } - /* - * KVM_REQ_EVENT is not set when posted interrupts are set by - * VT-d hardware, so we have to update RVI unconditionally. - */ - if (kvm_lapic_enabled(vcpu)) { - /* - * Update architecture specific hints for APIC - * virtual interrupt delivery. - */ - if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) - kvm_x86_ops->sync_pir_to_irr(vcpu); - } - if (kvm_check_request(KVM_REQ_EVENT, vcpu) || req_int_win) { ++vcpu->stat.req_event; kvm_apic_accept_events(vcpu); @@ -6870,20 +6857,39 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) kvm_x86_ops->prepare_guest_switch(vcpu); if (vcpu->fpu_active) kvm_load_guest_fpu(vcpu); + + /* + * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt + * IPI are then delayed after guest entry, which ensures that they + * result in virtual interrupt delivery. + */ + local_irq_disable(); vcpu->mode = IN_GUEST_MODE; srcu_read_unlock(&vcpu->kvm->srcu, vcpu->srcu_idx); /* - * We should set ->mode before check ->requests, - * Please see the comment in kvm_make_all_cpus_request. - * This also orders the write to mode from any reads - * to the page tables done while the VCPU is running. - * Please see the comment in kvm_flush_remote_tlbs. + * 1) We should set ->mode before checking ->requests. Please see + * the comment in kvm_make_all_cpus_request. + * + * 2) For APICv, we should set ->mode before checking PIR.ON. This + * pairs with the memory barrier implicit in pi_test_and_set_on + * (see vmx_deliver_posted_interrupt). + * + * 3) This also orders the write to mode from any reads to the page + * tables done while the VCPU is running. Please see the comment + * in kvm_flush_remote_tlbs. */ smp_mb__after_srcu_read_unlock(); - local_irq_disable(); + /* + * This handles the case where a posted interrupt was + * notified with kvm_vcpu_kick. + */ + if (kvm_lapic_enabled(vcpu)) { + if (kvm_x86_ops->sync_pir_to_irr && vcpu->arch.apicv_active) + kvm_x86_ops->sync_pir_to_irr(vcpu); + } if (vcpu->mode == EXITING_GUEST_MODE || vcpu->requests || need_resched() || signal_pending(current)) { -- cgit v1.2.3 From cf8b84f48a5936f558a7a415f1d2f42161bf73eb Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 30 Nov 2016 12:03:42 -0800 Subject: kvm: nVMX: Prepare for checkpointing L2 state Split prepare_vmcs12 into two parts: the part that stores the current L2 guest state and the part that sets up the exit information fields. The former will be used when checkpointing the vCPU's VMX state. Modify prepare_vmcs02 so that it can construct a vmcs02 midway through L2 execution, using the checkpointed L2 guest state saved into the cached vmcs12 above. Signed-off-by: Jim Mattson [Rebasing: add from_vmentry argument to prepare_vmcs02 instead of using vmx->nested.nested_run_pending, because it is no longer 1 at the point prepare_vmcs02 is called. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 77 ++++++++++++++++++++++++++++++++++-------------------- 1 file changed, 48 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index fd8cd50e9dc6..4b4b59b34d44 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10040,7 +10040,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne * is assigned to entry_failure_code on failure. */ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - unsigned long *entry_failure_code) + bool from_vmentry, unsigned long *entry_failure_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control; @@ -10083,21 +10083,26 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base); vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base); - if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) { + if (from_vmentry && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) { kvm_set_dr(vcpu, 7, vmcs12->guest_dr7); vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl); } else { kvm_set_dr(vcpu, 7, vcpu->arch.dr7); vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl); } - vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, - vmcs12->vm_entry_intr_info_field); - vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, - vmcs12->vm_entry_exception_error_code); - vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, - vmcs12->vm_entry_instruction_len); - vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, - vmcs12->guest_interruptibility_info); + if (from_vmentry) { + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, + vmcs12->vm_entry_intr_info_field); + vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE, + vmcs12->vm_entry_exception_error_code); + vmcs_write32(VM_ENTRY_INSTRUCTION_LEN, + vmcs12->vm_entry_instruction_len); + vmcs_write32(GUEST_INTERRUPTIBILITY_INFO, + vmcs12->guest_interruptibility_info); + } else { + vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0); + } vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs); vmx_set_rflags(vcpu, vmcs12->guest_rflags); vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS, @@ -10290,16 +10295,18 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, ~VM_ENTRY_IA32E_MODE) | (vmcs_config.vmentry_ctrl & ~VM_ENTRY_IA32E_MODE)); - if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) { + if (from_vmentry && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) { vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat); vcpu->arch.pat = vmcs12->guest_ia32_pat; - } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) + } else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) { vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat); - + } set_cr4_guest_host_mask(vmx); - if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) + if (from_vmentry && + vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs); if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETING) @@ -10351,7 +10358,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmx_set_cr4(vcpu, vmcs12->guest_cr4); vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12)); - if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) + if (from_vmentry && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) vcpu->arch.efer = vmcs12->guest_ia32_efer; else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) vcpu->arch.efer |= (EFER_LMA | EFER_LME); @@ -10561,7 +10569,7 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmx_segment_cache_clear(vmx); - if (prepare_vmcs02(vcpu, vmcs12, &exit_qualification)) { + if (prepare_vmcs02(vcpu, vmcs12, true, &exit_qualification)) { leave_guest_mode(vcpu); vmx_load_vmcs01(vcpu); nested_vmx_entry_failure(vcpu, vmcs12, @@ -10733,21 +10741,13 @@ static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu) } /* - * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits - * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), - * and this function updates it to reflect the changes to the guest state while - * L2 was running (and perhaps made some exits which were handled directly by L0 - * without going back to L1), and to reflect the exit reason. - * Note that we do not have to copy here all VMCS fields, just those that - * could have changed by the L2 guest or the exit - i.e., the guest-state and - * exit-information fields only. Other fields are modified by L1 with VMWRITE, - * which already writes to vmcs12 directly. + * Update the guest state fields of vmcs12 to reflect changes that + * occurred while L2 was running. (The "IA-32e mode guest" bit of the + * VM-entry controls is also updated, since this is really a guest + * state bit.) */ -static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - u32 exit_reason, u32 exit_intr_info, - unsigned long exit_qualification) +static void sync_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - /* update guest state fields: */ vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12); vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12); @@ -10853,6 +10853,25 @@ static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS); if (nested_cpu_has_xsaves(vmcs12)) vmcs12->xss_exit_bitmap = vmcs_read64(XSS_EXIT_BITMAP); +} + +/* + * prepare_vmcs12 is part of what we need to do when the nested L2 guest exits + * and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12), + * and this function updates it to reflect the changes to the guest state while + * L2 was running (and perhaps made some exits which were handled directly by L0 + * without going back to L1), and to reflect the exit reason. + * Note that we do not have to copy here all VMCS fields, just those that + * could have changed by the L2 guest or the exit - i.e., the guest-state and + * exit-information fields only. Other fields are modified by L1 with VMWRITE, + * which already writes to vmcs12 directly. + */ +static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + u32 exit_reason, u32 exit_intr_info, + unsigned long exit_qualification) +{ + /* update guest state fields: */ + sync_vmcs12(vcpu, vmcs12); /* update exit information fields: */ -- cgit v1.2.3 From e29acc55bfc3afcfdf9ea3468f1217d08f0b2d2b Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 30 Nov 2016 12:03:43 -0800 Subject: kvm: nVMX: Refactor handle_vmon() Handle_vmon is split into two parts: the part that handles the VMXON instruction, and the part that modifies the vcpu state to transition from legacy mode to VMX operation. The latter will be used when restoring the checkpointed state of a vCPU that was in VMX operation when a snapshot was taken. Signed-off-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 93 ++++++++++++++++++++++++++++++------------------------ 1 file changed, 52 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 4b4b59b34d44..33cb8d2b9653 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7124,6 +7124,53 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, return 0; } +static int enter_vmx_operation(struct kvm_vcpu *vcpu) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs *shadow_vmcs; + + if (cpu_has_vmx_msr_bitmap()) { + vmx->nested.msr_bitmap = + (unsigned long *)__get_free_page(GFP_KERNEL); + if (!vmx->nested.msr_bitmap) + goto out_msr_bitmap; + } + + vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); + if (!vmx->nested.cached_vmcs12) + goto out_cached_vmcs12; + + if (enable_shadow_vmcs) { + shadow_vmcs = alloc_vmcs(); + if (!shadow_vmcs) + goto out_shadow_vmcs; + /* mark vmcs as shadow */ + shadow_vmcs->revision_id |= (1u << 31); + /* init shadow vmcs */ + vmcs_clear(shadow_vmcs); + vmx->vmcs01.shadow_vmcs = shadow_vmcs; + } + + INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); + vmx->nested.vmcs02_num = 0; + + hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, + HRTIMER_MODE_REL_PINNED); + vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; + + vmx->nested.vmxon = true; + return 0; + +out_shadow_vmcs: + kfree(vmx->nested.cached_vmcs12); + +out_cached_vmcs12: + free_page((unsigned long)vmx->nested.msr_bitmap); + +out_msr_bitmap: + return -ENOMEM; +} + /* * Emulate the VMXON instruction. * Currently, we just remember that VMX is active, and do not save or even @@ -7134,9 +7181,9 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, */ static int handle_vmon(struct kvm_vcpu *vcpu) { + int ret; struct kvm_segment cs; struct vcpu_vmx *vmx = to_vmx(vcpu); - struct vmcs *shadow_vmcs; const u64 VMXON_NEEDED_FEATURES = FEATURE_CONTROL_LOCKED | FEATURE_CONTROL_VMXON_ENABLED_OUTSIDE_SMX; @@ -7176,49 +7223,13 @@ static int handle_vmon(struct kvm_vcpu *vcpu) if (nested_vmx_check_vmptr(vcpu, EXIT_REASON_VMON, NULL)) return 1; - - if (cpu_has_vmx_msr_bitmap()) { - vmx->nested.msr_bitmap = - (unsigned long *)__get_free_page(GFP_KERNEL); - if (!vmx->nested.msr_bitmap) - goto out_msr_bitmap; - } - - vmx->nested.cached_vmcs12 = kmalloc(VMCS12_SIZE, GFP_KERNEL); - if (!vmx->nested.cached_vmcs12) - goto out_cached_vmcs12; - - if (enable_shadow_vmcs) { - shadow_vmcs = alloc_vmcs(); - if (!shadow_vmcs) - goto out_shadow_vmcs; - /* mark vmcs as shadow */ - shadow_vmcs->revision_id |= (1u << 31); - /* init shadow vmcs */ - vmcs_clear(shadow_vmcs); - vmx->vmcs01.shadow_vmcs = shadow_vmcs; - } - - INIT_LIST_HEAD(&(vmx->nested.vmcs02_pool)); - vmx->nested.vmcs02_num = 0; - - hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC, - HRTIMER_MODE_REL_PINNED); - vmx->nested.preemption_timer.function = vmx_preemption_timer_fn; - - vmx->nested.vmxon = true; + + ret = enter_vmx_operation(vcpu); + if (ret) + return ret; nested_vmx_succeed(vcpu); return kvm_skip_emulated_instruction(vcpu); - -out_shadow_vmcs: - kfree(vmx->nested.cached_vmcs12); - -out_cached_vmcs12: - free_page((unsigned long)vmx->nested.msr_bitmap); - -out_msr_bitmap: - return -ENOMEM; } /* -- cgit v1.2.3 From a8bc284eb70f58a4fd0f4a70d816cca28ca01973 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 30 Nov 2016 12:03:44 -0800 Subject: kvm: nVMX: Refactor handle_vmptrld() Handle_vmptrld is split into two parts: the part that handles the VMPTRLD instruction, and the part that establishes the current VMCS pointer. The latter will be used when restoring the checkpointed state of a vCPU that had a valid VMCS pointer when a snapshot was taken. Signed-off-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 33cb8d2b9653..ef9affcabdeb 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7678,6 +7678,18 @@ static int handle_vmwrite(struct kvm_vcpu *vcpu) return kvm_skip_emulated_instruction(vcpu); } +static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr) +{ + vmx->nested.current_vmptr = vmptr; + if (enable_shadow_vmcs) { + vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, + SECONDARY_EXEC_SHADOW_VMCS); + vmcs_write64(VMCS_LINK_POINTER, + __pa(vmx->vmcs01.shadow_vmcs)); + vmx->nested.sync_shadow_vmcs = true; + } +} + /* Emulate the VMPTRLD instruction */ static int handle_vmptrld(struct kvm_vcpu *vcpu) { @@ -7708,7 +7720,6 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) } nested_release_vmcs12(vmx); - vmx->nested.current_vmptr = vmptr; vmx->nested.current_vmcs12 = new_vmcs12; vmx->nested.current_vmcs12_page = page; /* @@ -7717,14 +7728,7 @@ static int handle_vmptrld(struct kvm_vcpu *vcpu) */ memcpy(vmx->nested.cached_vmcs12, vmx->nested.current_vmcs12, VMCS12_SIZE); - - if (enable_shadow_vmcs) { - vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, - SECONDARY_EXEC_SHADOW_VMCS); - vmcs_write64(VMCS_LINK_POINTER, - __pa(vmx->vmcs01.shadow_vmcs)); - vmx->nested.sync_shadow_vmcs = true; - } + set_current_vmptr(vmx, vmptr); } nested_vmx_succeed(vcpu); -- cgit v1.2.3 From 6beb7bd52e482c213dfa6c1c88e27a579df5bf4d Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 30 Nov 2016 12:03:45 -0800 Subject: kvm: nVMX: Refactor nested_get_vmcs12_pages() Perform the checks on vmcs12 state early, but defer the gpa->hpa lookups until after prepare_vmcs02. Later, when we restore the checkpointed state of a vCPU in guest mode, we will not be able to do the gpa->hpa lookups when the restore is done. Signed-off-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 138 ++++++++++++++++++++++++++--------------------------- 1 file changed, 69 insertions(+), 69 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ef9affcabdeb..650f34336fad 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9626,17 +9626,16 @@ static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu, kvm_inject_page_fault(vcpu, fault); } -static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, +static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, + struct vmcs12 *vmcs12); + +static void nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct vcpu_vmx *vmx = to_vmx(vcpu); - int maxphyaddr = cpuid_maxphyaddr(vcpu); + u64 hpa; if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { - if (!PAGE_ALIGNED(vmcs12->apic_access_addr) || - vmcs12->apic_access_addr >> maxphyaddr) - return false; - /* * Translate L1 physical address to host physical * address for vmcs02. Keep the page pinned, so this @@ -9647,59 +9646,80 @@ static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu, nested_release_page(vmx->nested.apic_access_page); vmx->nested.apic_access_page = nested_get_page(vcpu, vmcs12->apic_access_addr); + /* + * If translation failed, no matter: This feature asks + * to exit when accessing the given address, and if it + * can never be accessed, this feature won't do + * anything anyway. + */ + if (vmx->nested.apic_access_page) { + hpa = page_to_phys(vmx->nested.apic_access_page); + vmcs_write64(APIC_ACCESS_ADDR, hpa); + } else { + vmcs_clear_bits(SECONDARY_VM_EXEC_CONTROL, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); + } + } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && + cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { + vmcs_set_bits(SECONDARY_VM_EXEC_CONTROL, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES); + kvm_vcpu_reload_apic_access_page(vcpu); } if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) { - if (!PAGE_ALIGNED(vmcs12->virtual_apic_page_addr) || - vmcs12->virtual_apic_page_addr >> maxphyaddr) - return false; - if (vmx->nested.virtual_apic_page) /* shouldn't happen */ nested_release_page(vmx->nested.virtual_apic_page); vmx->nested.virtual_apic_page = nested_get_page(vcpu, vmcs12->virtual_apic_page_addr); /* - * Failing the vm entry is _not_ what the processor does - * but it's basically the only possibility we have. - * We could still enter the guest if CR8 load exits are - * enabled, CR8 store exits are enabled, and virtualize APIC - * access is disabled; in this case the processor would never - * use the TPR shadow and we could simply clear the bit from - * the execution control. But such a configuration is useless, - * so let's keep the code simple. + * If translation failed, VM entry will fail because + * prepare_vmcs02 set VIRTUAL_APIC_PAGE_ADDR to -1ull. + * Failing the vm entry is _not_ what the processor + * does but it's basically the only possibility we + * have. We could still enter the guest if CR8 load + * exits are enabled, CR8 store exits are enabled, and + * virtualize APIC access is disabled; in this case + * the processor would never use the TPR shadow and we + * could simply clear the bit from the execution + * control. But such a configuration is useless, so + * let's keep the code simple. */ - if (!vmx->nested.virtual_apic_page) - return false; + if (vmx->nested.virtual_apic_page) { + hpa = page_to_phys(vmx->nested.virtual_apic_page); + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, hpa); + } } if (nested_cpu_has_posted_intr(vmcs12)) { - if (!IS_ALIGNED(vmcs12->posted_intr_desc_addr, 64) || - vmcs12->posted_intr_desc_addr >> maxphyaddr) - return false; - if (vmx->nested.pi_desc_page) { /* shouldn't happen */ kunmap(vmx->nested.pi_desc_page); nested_release_page(vmx->nested.pi_desc_page); } vmx->nested.pi_desc_page = nested_get_page(vcpu, vmcs12->posted_intr_desc_addr); - if (!vmx->nested.pi_desc_page) - return false; - vmx->nested.pi_desc = (struct pi_desc *)kmap(vmx->nested.pi_desc_page); if (!vmx->nested.pi_desc) { nested_release_page_clean(vmx->nested.pi_desc_page); - return false; + return; } vmx->nested.pi_desc = (struct pi_desc *)((void *)vmx->nested.pi_desc + (unsigned long)(vmcs12->posted_intr_desc_addr & (PAGE_SIZE - 1))); + vmcs_write64(POSTED_INTR_DESC_ADDR, + page_to_phys(vmx->nested.pi_desc_page) + + (unsigned long)(vmcs12->posted_intr_desc_addr & + (PAGE_SIZE - 1))); } - - return true; + if (cpu_has_vmx_msr_bitmap() && + nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS) && + nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) + ; + else + vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_USE_MSR_BITMAPS); } static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu) @@ -10146,12 +10166,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv; vmx->nested.pi_pending = false; vmcs_write16(POSTED_INTR_NV, POSTED_INTR_VECTOR); - vmcs_write64(POSTED_INTR_DESC_ADDR, - page_to_phys(vmx->nested.pi_desc_page) + - (unsigned long)(vmcs12->posted_intr_desc_addr & - (PAGE_SIZE - 1))); - } else + } else { exec_control &= ~PIN_BASED_POSTED_INTR; + } vmcs_write32(PIN_BASED_VM_EXEC_CONTROL, exec_control); @@ -10196,26 +10213,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)) exec_control |= vmcs12->secondary_vm_exec_control; - if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) { - /* - * If translation failed, no matter: This feature asks - * to exit when accessing the given address, and if it - * can never be accessed, this feature won't do - * anything anyway. - */ - if (!vmx->nested.apic_access_page) - exec_control &= - ~SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - else - vmcs_write64(APIC_ACCESS_ADDR, - page_to_phys(vmx->nested.apic_access_page)); - } else if (!(nested_cpu_has_virt_x2apic_mode(vmcs12)) && - cpu_need_virtualize_apic_accesses(&vmx->vcpu)) { - exec_control |= - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; - kvm_vcpu_reload_apic_access_page(vcpu); - } - if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY) { vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0); @@ -10230,6 +10227,15 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; + + /* + * Write an illegal value to APIC_ACCESS_ADDR. Later, + * nested_get_vmcs12_pages will either fix it up or + * remove the VM execution control. + */ + if (exec_control & SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) + vmcs_write64(APIC_ACCESS_ADDR, -1ull); + vmcs_write32(SECONDARY_VM_EXEC_CONTROL, exec_control); } @@ -10266,19 +10272,16 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, exec_control &= ~CPU_BASED_TPR_SHADOW; exec_control |= vmcs12->cpu_based_vm_exec_control; + /* + * Write an illegal value to VIRTUAL_APIC_PAGE_ADDR. Later, if + * nested_get_vmcs12_pages can't fix it up, the illegal value + * will result in a VM entry failure. + */ if (exec_control & CPU_BASED_TPR_SHADOW) { - vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, - page_to_phys(vmx->nested.virtual_apic_page)); + vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull); vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold); } - if (cpu_has_vmx_msr_bitmap() && - exec_control & CPU_BASED_USE_MSR_BITMAPS && - nested_vmx_merge_msr_bitmap(vcpu, vmcs12)) - ; /* MSR_BITMAP will be set by following vmx_set_efer. */ - else - exec_control &= ~CPU_BASED_USE_MSR_BITMAPS; - /* * Merging of IO bitmap not currently supported. * Rather, exit every time. @@ -10456,11 +10459,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) goto out; } - if (!nested_get_vmcs12_pages(vcpu, vmcs12)) { - nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); - goto out; - } - if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) { nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); goto out; @@ -10592,6 +10590,8 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) return 1; } + nested_get_vmcs12_pages(vcpu, vmcs12); + msr_entry_idx = nested_vmx_load_msr(vcpu, vmcs12->vm_entry_msr_load_addr, vmcs12->vm_entry_msr_load_count); -- cgit v1.2.3 From ca0bde28f2ed66c2229ecfb7f4bfa0defa3da4b5 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 30 Nov 2016 12:03:46 -0800 Subject: kvm: nVMX: Split VMCS checks from nested_vmx_run() The checks performed on the contents of the vmcs12 are extracted from nested_vmx_run so that they can be used to validate a vmcs12 that has been restored from a checkpoint. Signed-off-by: Jim Mattson [Change prepare_vmcs02 and nested_vmx_load_cr3's last argument to u32, to match check_vmentry_postreqs. Update comments for singlestep handling. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 194 ++++++++++++++++++++++++++++------------------------- 1 file changed, 103 insertions(+), 91 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 650f34336fad..71df7411959f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10035,7 +10035,7 @@ static bool nested_cr3_valid(struct kvm_vcpu *vcpu, unsigned long val) * is assigned to entry_failure_code on failure. */ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool nested_ept, - unsigned long *entry_failure_code) + u32 *entry_failure_code) { if (cr3 != kvm_read_cr3(vcpu) || (!nested_ept && pdptrs_changed(vcpu))) { if (!nested_cr3_valid(vcpu, cr3)) { @@ -10075,7 +10075,7 @@ static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3, bool ne * is assigned to entry_failure_code on failure. */ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, - bool from_vmentry, unsigned long *entry_failure_code) + bool from_vmentry, u32 *entry_failure_code) { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control; @@ -10411,68 +10411,22 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 0; } -/* - * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 - * for running an L2 nested guest. - */ -static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) +static int check_vmentry_prereqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { - struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); - int cpu; - struct loaded_vmcs *vmcs02; - bool ia32e; - u32 msr_entry_idx; - unsigned long exit_qualification; - - if (!nested_vmx_check_permission(vcpu)) - return 1; - - if (!nested_vmx_check_vmcs12(vcpu)) - goto out; - - vmcs12 = get_vmcs12(vcpu); - - if (enable_shadow_vmcs) - copy_shadow_to_vmcs12(vmx); - - /* - * The nested entry process starts with enforcing various prerequisites - * on vmcs12 as required by the Intel SDM, and act appropriately when - * they fail: As the SDM explains, some conditions should cause the - * instruction to fail, while others will cause the instruction to seem - * to succeed, but return an EXIT_REASON_INVALID_STATE. - * To speed up the normal (success) code path, we should avoid checking - * for misconfigurations which will anyway be caught by the processor - * when using the merged vmcs02. - */ - if (vmcs12->launch_state == launch) { - nested_vmx_failValid(vcpu, - launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS - : VMXERR_VMRESUME_NONLAUNCHED_VMCS); - goto out; - } if (vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE && - vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) { - nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); - goto out; - } + vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; - if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) { - nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); - goto out; - } + if (nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; - if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) { - nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); - goto out; - } + if (nested_vmx_check_apicv_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; - if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) { - nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); - goto out; - } + if (nested_vmx_check_msr_switch_controls(vcpu, vmcs12)) + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; if (!vmx_control_verify(vmcs12->cpu_based_vm_exec_control, vmx->nested.nested_vmx_procbased_ctls_low, @@ -10489,28 +10443,30 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) !vmx_control_verify(vmcs12->vm_entry_controls, vmx->nested.nested_vmx_entry_ctls_low, vmx->nested.nested_vmx_entry_ctls_high)) - { - nested_vmx_failValid(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD); - goto out; - } + return VMXERR_ENTRY_INVALID_CONTROL_FIELD; if (!nested_host_cr0_valid(vcpu, vmcs12->host_cr0) || !nested_host_cr4_valid(vcpu, vmcs12->host_cr4) || - !nested_cr3_valid(vcpu, vmcs12->host_cr3)) { - nested_vmx_failValid(vcpu, - VMXERR_ENTRY_INVALID_HOST_STATE_FIELD); - goto out; - } + !nested_cr3_valid(vcpu, vmcs12->host_cr3)) + return VMXERR_ENTRY_INVALID_HOST_STATE_FIELD; + + return 0; +} + +static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, + u32 *exit_qual) +{ + bool ia32e; + + *exit_qual = ENTRY_FAIL_DEFAULT; if (!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0) || - !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) { - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); + !nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)) return 1; - } - if (vmcs12->vmcs_link_pointer != -1ull) { - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_INVALID_STATE, ENTRY_FAIL_VMCS_LINK_PTR); + + if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_SHADOW_VMCS) && + vmcs12->vmcs_link_pointer != -1ull) { + *exit_qual = ENTRY_FAIL_VMCS_LINK_PTR; return 1; } @@ -10523,16 +10479,14 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * to bit 8 (LME) if bit 31 in the CR0 field (corresponding to * CR0.PG) is 1. */ - if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER) { + if (to_vmx(vcpu)->nested.nested_run_pending && + (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) { ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0; if (!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer) || ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA) || ((vmcs12->guest_cr0 & X86_CR0_PG) && - ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) { - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); + ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))) return 1; - } } /* @@ -10546,11 +10500,75 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) VM_EXIT_HOST_ADDR_SPACE_SIZE) != 0; if (!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer) || ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA) || - ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) { - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_INVALID_STATE, ENTRY_FAIL_DEFAULT); + ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)) return 1; - } + } + + return 0; +} + +/* + * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 + * for running an L2 nested guest. + */ +static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) +{ + struct vmcs12 *vmcs12; + struct vcpu_vmx *vmx = to_vmx(vcpu); + int cpu; + struct loaded_vmcs *vmcs02; + u32 msr_entry_idx; + u32 exit_qual; + int ret; + + if (!nested_vmx_check_permission(vcpu)) + return 1; + + if (!nested_vmx_check_vmcs12(vcpu)) + goto out; + + vmcs12 = get_vmcs12(vcpu); + + if (enable_shadow_vmcs) + copy_shadow_to_vmcs12(vmx); + + /* + * The nested entry process starts with enforcing various prerequisites + * on vmcs12 as required by the Intel SDM, and act appropriately when + * they fail: As the SDM explains, some conditions should cause the + * instruction to fail, while others will cause the instruction to seem + * to succeed, but return an EXIT_REASON_INVALID_STATE. + * To speed up the normal (success) code path, we should avoid checking + * for misconfigurations which will anyway be caught by the processor + * when using the merged vmcs02. + */ + if (vmcs12->launch_state == launch) { + nested_vmx_failValid(vcpu, + launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS + : VMXERR_VMRESUME_NONLAUNCHED_VMCS); + goto out; + } + + ret = check_vmentry_prereqs(vcpu, vmcs12); + if (ret) { + nested_vmx_failValid(vcpu, ret); + goto out; + } + + /* + * After this point, the trap flag no longer triggers a singlestep trap + * on the vm entry instructions; don't call kvm_skip_emulated_instruction. + * This is not 100% correct; for performance reasons, we delegate most + * of the checks on host state to the processor. If those fail, + * the singlestep trap is missed. + */ + skip_emulated_instruction(vcpu); + + ret = check_vmentry_postreqs(vcpu, vmcs12, &exit_qual); + if (ret) { + nested_vmx_entry_failure(vcpu, vmcs12, + EXIT_REASON_INVALID_STATE, exit_qual); + return 1; } /* @@ -10562,12 +10580,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) if (!vmcs02) return -ENOMEM; - /* - * After this point, the trap flag no longer triggers a singlestep trap - * on the vm entry instructions. Don't call - * kvm_skip_emulated_instruction. - */ - skip_emulated_instruction(vcpu); enter_guest_mode(vcpu); if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) @@ -10582,11 +10594,11 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) vmx_segment_cache_clear(vmx); - if (prepare_vmcs02(vcpu, vmcs12, true, &exit_qualification)) { + if (prepare_vmcs02(vcpu, vmcs12, true, &exit_qual)) { leave_guest_mode(vcpu); vmx_load_vmcs01(vcpu); nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_INVALID_STATE, exit_qualification); + EXIT_REASON_INVALID_STATE, exit_qual); return 1; } @@ -10937,7 +10949,7 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12) { struct kvm_segment seg; - unsigned long entry_failure_code; + u32 entry_failure_code; if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) vcpu->arch.efer = vmcs12->host_ia32_efer; -- cgit v1.2.3 From 858e25c06fb0bc4b39b2f01c14c990348e3a9b67 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Wed, 30 Nov 2016 12:03:47 -0800 Subject: kvm: nVMX: Refactor nested_vmx_run() Nested_vmx_run is split into two parts: the part that handles the VMLAUNCH/VMRESUME instruction, and the part that modifies the vcpu state to transition from VMX root mode to VMX non-root mode. The latter will be used when restoring the checkpointed state of a vCPU that was in VMX operation when a snapshot was taken. Signed-off-by: Jim Mattson Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 111 ++++++++++++++++++++++++++++++----------------------- 1 file changed, 62 insertions(+), 49 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 71df7411959f..bca60665d55d 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10507,6 +10507,65 @@ static int check_vmentry_postreqs(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, return 0; } +static int enter_vmx_non_root_mode(struct kvm_vcpu *vcpu, bool from_vmentry) +{ + struct vcpu_vmx *vmx = to_vmx(vcpu); + struct vmcs12 *vmcs12 = get_vmcs12(vcpu); + struct loaded_vmcs *vmcs02; + int cpu; + u32 msr_entry_idx; + u32 exit_qual; + + vmcs02 = nested_get_current_vmcs02(vmx); + if (!vmcs02) + return -ENOMEM; + + enter_guest_mode(vcpu); + + if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) + vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); + + cpu = get_cpu(); + vmx->loaded_vmcs = vmcs02; + vmx_vcpu_put(vcpu); + vmx_vcpu_load(vcpu, cpu); + vcpu->cpu = cpu; + put_cpu(); + + vmx_segment_cache_clear(vmx); + + if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &exit_qual)) { + leave_guest_mode(vcpu); + vmx_load_vmcs01(vcpu); + nested_vmx_entry_failure(vcpu, vmcs12, + EXIT_REASON_INVALID_STATE, exit_qual); + return 1; + } + + nested_get_vmcs12_pages(vcpu, vmcs12); + + msr_entry_idx = nested_vmx_load_msr(vcpu, + vmcs12->vm_entry_msr_load_addr, + vmcs12->vm_entry_msr_load_count); + if (msr_entry_idx) { + leave_guest_mode(vcpu); + vmx_load_vmcs01(vcpu); + nested_vmx_entry_failure(vcpu, vmcs12, + EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx); + return 1; + } + + vmcs12->launch_state = 1; + + /* + * Note no nested_vmx_succeed or nested_vmx_fail here. At this point + * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet + * returned as far as L1 is concerned. It will only return (and set + * the success flag) when L2 exits (see nested_vmx_vmexit()). + */ + return 0; +} + /* * nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1 * for running an L2 nested guest. @@ -10515,9 +10574,6 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) { struct vmcs12 *vmcs12; struct vcpu_vmx *vmx = to_vmx(vcpu); - int cpu; - struct loaded_vmcs *vmcs02; - u32 msr_entry_idx; u32 exit_qual; int ret; @@ -10576,58 +10632,15 @@ static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch) * the nested entry. */ - vmcs02 = nested_get_current_vmcs02(vmx); - if (!vmcs02) - return -ENOMEM; - - enter_guest_mode(vcpu); - - if (!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) - vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL); - - cpu = get_cpu(); - vmx->loaded_vmcs = vmcs02; - vmx_vcpu_put(vcpu); - vmx_vcpu_load(vcpu, cpu); - vcpu->cpu = cpu; - put_cpu(); - - vmx_segment_cache_clear(vmx); - - if (prepare_vmcs02(vcpu, vmcs12, true, &exit_qual)) { - leave_guest_mode(vcpu); - vmx_load_vmcs01(vcpu); - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_INVALID_STATE, exit_qual); - return 1; - } - - nested_get_vmcs12_pages(vcpu, vmcs12); - - msr_entry_idx = nested_vmx_load_msr(vcpu, - vmcs12->vm_entry_msr_load_addr, - vmcs12->vm_entry_msr_load_count); - if (msr_entry_idx) { - leave_guest_mode(vcpu); - vmx_load_vmcs01(vcpu); - nested_vmx_entry_failure(vcpu, vmcs12, - EXIT_REASON_MSR_LOAD_FAIL, msr_entry_idx); - return 1; - } - - vmcs12->launch_state = 1; + ret = enter_vmx_non_root_mode(vcpu, true); + if (ret) + return ret; if (vmcs12->guest_activity_state == GUEST_ACTIVITY_HLT) return kvm_vcpu_halt(vcpu); vmx->nested.nested_run_pending = 1; - /* - * Note no nested_vmx_succeed or nested_vmx_fail here. At this point - * we are no longer running L1, and VMLAUNCH/VMRESUME has not yet - * returned as far as L1 is concerned. It will only return (and set - * the success flag) when L2 exits (see nested_vmx_vmexit()). - */ return 1; out: -- cgit v1.2.3 From 681bcea802f2bf0a28cdf0fd0f813de7bb5cd3c7 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Tue, 24 Jan 2017 22:21:16 +0100 Subject: KVM: svm: inititalize hash table structures directly MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The hashtable and guarding spinlock are global data structures, we can inititalize them statically. Signed-off-by: David Hildenbrand Message-Id: <20170124212116.4568-1-david@redhat.com> Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/svm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 13cd06220b19..4e5905a1ce70 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -971,8 +971,8 @@ static void svm_disable_lbrv(struct vcpu_svm *svm) * a particular vCPU. */ #define SVM_VM_DATA_HASH_BITS 8 -DECLARE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); -static spinlock_t svm_vm_data_hash_lock; +static DEFINE_HASHTABLE(svm_vm_data_hash, SVM_VM_DATA_HASH_BITS); +static DEFINE_SPINLOCK(svm_vm_data_hash_lock); /* Note: * This function is called from IOMMU driver to notify @@ -1077,8 +1077,6 @@ static __init int svm_hardware_setup(void) } else { pr_info("AVIC enabled\n"); - hash_init(svm_vm_data_hash); - spin_lock_init(&svm_vm_data_hash_lock); amd_iommu_register_ga_log_notifier(&avic_ga_log_notifier); } } -- cgit v1.2.3 From 47512cfd0d7a8bd6ab71d01cd89fca19eb2093eb Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 15 Feb 2017 11:11:50 +0100 Subject: x86/platform/goldfish: Prevent unconditional loading The goldfish platform code registers the platform device unconditionally which causes havoc in several ways if the goldfish_pdev_bus driver is enabled: - Access to the hardcoded physical memory region, which is either not available or contains stuff which is completely unrelated. - Prevents that the interrupt of the serial port can be requested - In case of a spurious interrupt it goes into a infinite loop in the interrupt handler of the pdev_bus driver (which needs to be fixed seperately). Add a 'goldfish' command line option to make the registration opt-in when the platform is compiled in. I'm seriously grumpy about this engineering trainwreck, which has seven SOBs from Intel developers for 50 lines of code. And none of them figured out that this is broken. Impressive fail! Fixes: ddd70cf93d78 ("goldfish: platform device for x86") Reported-by: Gabriel C Signed-off-by: Thomas Gleixner Cc: stable@vger.kernel.org Acked-by: Linus Torvalds Signed-off-by: Greg Kroah-Hartman --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ arch/x86/platform/goldfish/goldfish.c | 14 +++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index be7c0d9506b1..18eefa860f76 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1201,6 +1201,10 @@ When zero, profiling data is discarded and associated debugfs files are removed at module unload time. + goldfish [X86] Enable the goldfish android emulator platform. + Don't use this when you are not running on the + android emulator + gpt [EFI] Forces disk with valid GPT signature but invalid Protective MBR to be treated as GPT. If the primary GPT is corrupted, it enables the backup/alternate diff --git a/arch/x86/platform/goldfish/goldfish.c b/arch/x86/platform/goldfish/goldfish.c index 1693107a518e..0d17c0aafeb1 100644 --- a/arch/x86/platform/goldfish/goldfish.c +++ b/arch/x86/platform/goldfish/goldfish.c @@ -42,10 +42,22 @@ static struct resource goldfish_pdev_bus_resources[] = { } }; +static bool goldfish_enable __initdata; + +static int __init goldfish_setup(char *str) +{ + goldfish_enable = true; + return 0; +} +__setup("goldfish", goldfish_setup); + static int __init goldfish_init(void) { + if (!goldfish_enable) + return -ENODEV; + platform_device_register_simple("goldfish_pdev_bus", -1, - goldfish_pdev_bus_resources, 2); + goldfish_pdev_bus_resources, 2); return 0; } device_initcall(goldfish_init); -- cgit v1.2.3 From d48085f0716f195ee7432de2dd110e2093c40fd5 Mon Sep 17 00:00:00 2001 From: "travis@sgi.com" Date: Tue, 14 Feb 2017 18:11:29 -0600 Subject: x86/platform/UV/NMI: Fix uneccessary kABI breakage The addition of support for UV Hubless systems unneccessarily broke the kABI for a symbol that is not used by external kernel modules. Remove the symbol from the EXPORT list. Signed-off-by: Mike Travis Reviewed-by: Russ Anderson Link: http://lkml.kernel.org/r/20170215001129.068078379@asylum.americas.sgi.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/uv/uv_nmi.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 0ecd7bf7d2d3..9743d0ccfec6 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -65,7 +65,6 @@ static struct uv_hub_nmi_s **uv_hub_nmi_list; DEFINE_PER_CPU(struct uv_cpu_nmi_s, uv_cpu_nmi); -EXPORT_PER_CPU_SYMBOL_GPL(uv_cpu_nmi); /* UV hubless values */ #define NMI_CONTROL_PORT 0x70 -- cgit v1.2.3 From 3bba73b1b7a88d88c3ea16b7914c13d475e4a87b Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 16 Feb 2017 15:12:04 +0100 Subject: x86/cpufeature: Move RING3MWAIT feature to avoid conflicts The original feature bit is used in a different branch already. Move it to scattered bits. Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/cpufeatures.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 56e5184514c6..43c4ea9cd907 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -100,7 +100,7 @@ #define X86_FEATURE_XTOPOLOGY ( 3*32+22) /* cpu topology enum extensions */ #define X86_FEATURE_TSC_RELIABLE ( 3*32+23) /* TSC is known to be reliable */ #define X86_FEATURE_NONSTOP_TSC ( 3*32+24) /* TSC does not stop in C states */ -#define X86_FEATURE_RING3MWAIT ( 3*32+25) /* ring 3 MONITOR/MWAIT */ +/* free, was #define X86_FEATURE_CLFLUSH_MONITOR ( 3*32+25) * "" clflush reqd with monitor */ #define X86_FEATURE_EXTD_APICID ( 3*32+26) /* has extended APICID (8 bits) */ #define X86_FEATURE_AMD_DCM ( 3*32+27) /* multi-node processor */ #define X86_FEATURE_APERFMPERF ( 3*32+28) /* APERFMPERF */ @@ -186,7 +186,7 @@ * * Reuse free bits when adding new feature flags! */ - +#define X86_FEATURE_RING3MWAIT ( 7*32+ 0) /* Ring 3 MONITOR/MWAIT */ #define X86_FEATURE_CPB ( 7*32+ 2) /* AMD Core Performance Boost */ #define X86_FEATURE_EPB ( 7*32+ 3) /* IA32_ENERGY_PERF_BIAS support */ #define X86_FEATURE_CAT_L3 ( 7*32+ 4) /* Cache Allocation Technology L3 */ -- cgit v1.2.3 From 47c0152e0f8bd325869417e0aaff032e62bcf6f2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 19 Dec 2016 11:44:07 +0100 Subject: KVM: VMX: use vmcs_set/clear_bits for CPU-based execution controls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 30 ++++++++---------------------- 1 file changed, 8 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index bca60665d55d..0e0b5d09597e 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -5459,26 +5459,20 @@ static bool nested_exit_on_nmi(struct kvm_vcpu *vcpu) static void enable_irq_window(struct kvm_vcpu *vcpu) { - u32 cpu_based_vm_exec_control; - - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_VIRTUAL_INTR_PENDING); } static void enable_nmi_window(struct kvm_vcpu *vcpu) { - u32 cpu_based_vm_exec_control; - if (!cpu_has_virtual_nmis() || vmcs_read32(GUEST_INTERRUPTIBILITY_INFO) & GUEST_INTR_STATE_STI) { enable_irq_window(vcpu); return; } - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control |= CPU_BASED_VIRTUAL_NMI_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + vmcs_set_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_VIRTUAL_NMI_PENDING); } static void vmx_inject_irq(struct kvm_vcpu *vcpu) @@ -6137,12 +6131,8 @@ static int handle_tpr_below_threshold(struct kvm_vcpu *vcpu) static int handle_interrupt_window(struct kvm_vcpu *vcpu) { - u32 cpu_based_vm_exec_control; - - /* clear pending irq */ - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_INTR_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_VIRTUAL_INTR_PENDING); kvm_make_request(KVM_REQ_EVENT, vcpu); @@ -6408,12 +6398,8 @@ static int handle_ept_misconfig(struct kvm_vcpu *vcpu) static int handle_nmi_window(struct kvm_vcpu *vcpu) { - u32 cpu_based_vm_exec_control; - - /* clear pending NMI */ - cpu_based_vm_exec_control = vmcs_read32(CPU_BASED_VM_EXEC_CONTROL); - cpu_based_vm_exec_control &= ~CPU_BASED_VIRTUAL_NMI_PENDING; - vmcs_write32(CPU_BASED_VM_EXEC_CONTROL, cpu_based_vm_exec_control); + vmcs_clear_bits(CPU_BASED_VM_EXEC_CONTROL, + CPU_BASED_VIRTUAL_NMI_PENDING); ++vcpu->stat.nmi_window_exits; kvm_make_request(KVM_REQ_EVENT, vcpu); -- cgit v1.2.3 From bbd6411513aa8ef3ea02abab61318daf87c1af1e Mon Sep 17 00:00:00 2001 From: "Cao, Lei" Date: Fri, 3 Feb 2017 20:04:35 +0000 Subject: KVM: Support vCPU-based gfn->hva cache MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Provide versions of struct gfn_to_hva_cache functions that take vcpu as a parameter instead of struct kvm. The existing functions are not needed anymore, so delete them. This allows dirty pages to be logged in the vcpu dirty ring, instead of the global dirty ring, for ring-based dirty memory tracking. Signed-off-by: Lei Cao Message-Id: Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- arch/x86/kvm/lapic.c | 22 ++++++++++------------ arch/x86/kvm/x86.c | 41 ++++++++++++++++++++--------------------- include/linux/kvm_host.h | 16 ++++++++-------- virt/kvm/kvm_main.c | 34 +++++++++++++++++----------------- 4 files changed, 55 insertions(+), 58 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/lapic.c b/arch/x86/kvm/lapic.c index 9fa5b8164961..bad6a25067bc 100644 --- a/arch/x86/kvm/lapic.c +++ b/arch/x86/kvm/lapic.c @@ -529,16 +529,14 @@ int kvm_apic_set_irq(struct kvm_vcpu *vcpu, struct kvm_lapic_irq *irq, static int pv_eoi_put_user(struct kvm_vcpu *vcpu, u8 val) { - - return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, &val, - sizeof(val)); + return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, &val, + sizeof(val)); } static int pv_eoi_get_user(struct kvm_vcpu *vcpu, u8 *val) { - - return kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.pv_eoi.data, val, - sizeof(*val)); + return kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.pv_eoi.data, val, + sizeof(*val)); } static inline bool pv_eoi_enabled(struct kvm_vcpu *vcpu) @@ -2287,8 +2285,8 @@ void kvm_lapic_sync_from_vapic(struct kvm_vcpu *vcpu) if (!test_bit(KVM_APIC_CHECK_VAPIC, &vcpu->arch.apic_attention)) return; - if (kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, - sizeof(u32))) + if (kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32))) return; apic_set_tpr(vcpu->arch.apic, data & 0xff); @@ -2340,14 +2338,14 @@ void kvm_lapic_sync_to_vapic(struct kvm_vcpu *vcpu) max_isr = 0; data = (tpr & 0xff) | ((max_isr & 0xf0) << 8) | (max_irr << 24); - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apic->vapic_cache, &data, - sizeof(u32)); + kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apic->vapic_cache, &data, + sizeof(u32)); } int kvm_lapic_set_vapic_addr(struct kvm_vcpu *vcpu, gpa_t vapic_addr) { if (vapic_addr) { - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, + if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.apic->vapic_cache, vapic_addr, sizeof(u32))) return -EINVAL; @@ -2441,7 +2439,7 @@ int kvm_lapic_enable_pv_eoi(struct kvm_vcpu *vcpu, u64 data) vcpu->arch.pv_eoi.msr_val = data; if (!pv_eoi_enabled(vcpu)) return 0; - return kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.pv_eoi.data, + return kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.pv_eoi.data, addr, sizeof(u8)); } diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 63a89a51dcc9..0aa8db229e0a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -1811,7 +1811,7 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) struct kvm_vcpu_arch *vcpu = &v->arch; struct pvclock_vcpu_time_info guest_hv_clock; - if (unlikely(kvm_read_guest_cached(v->kvm, &vcpu->pv_time, + if (unlikely(kvm_vcpu_read_guest_cached(v, &vcpu->pv_time, &guest_hv_clock, sizeof(guest_hv_clock)))) return; @@ -1832,9 +1832,9 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) BUILD_BUG_ON(offsetof(struct pvclock_vcpu_time_info, version) != 0); vcpu->hv_clock.version = guest_hv_clock.version + 1; - kvm_write_guest_cached(v->kvm, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock.version)); + kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); smp_wmb(); @@ -1848,16 +1848,16 @@ static void kvm_setup_pvclock_page(struct kvm_vcpu *v) trace_kvm_pvclock_update(v->vcpu_id, &vcpu->hv_clock); - kvm_write_guest_cached(v->kvm, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock)); + kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock)); smp_wmb(); vcpu->hv_clock.version++; - kvm_write_guest_cached(v->kvm, &vcpu->pv_time, - &vcpu->hv_clock, - sizeof(vcpu->hv_clock.version)); + kvm_vcpu_write_guest_cached(v, &vcpu->pv_time, + &vcpu->hv_clock, + sizeof(vcpu->hv_clock.version)); } static int kvm_guest_time_update(struct kvm_vcpu *v) @@ -2090,7 +2090,7 @@ static int kvm_pv_enable_async_pf(struct kvm_vcpu *vcpu, u64 data) return 0; } - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.apf.data, gpa, + if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.apf.data, gpa, sizeof(u32))) return 1; @@ -2109,7 +2109,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) if (!(vcpu->arch.st.msr_val & KVM_MSR_ENABLED)) return; - if (unlikely(kvm_read_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, + if (unlikely(kvm_vcpu_read_guest_cached(vcpu, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)))) return; @@ -2120,7 +2120,7 @@ static void record_steal_time(struct kvm_vcpu *vcpu) vcpu->arch.st.steal.version += 1; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, + kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); smp_wmb(); @@ -2129,14 +2129,14 @@ static void record_steal_time(struct kvm_vcpu *vcpu) vcpu->arch.st.last_steal; vcpu->arch.st.last_steal = current->sched_info.run_delay; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, + kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); smp_wmb(); vcpu->arch.st.steal.version += 1; - kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.st.stime, + kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.st.stime, &vcpu->arch.st.steal, sizeof(struct kvm_steal_time)); } @@ -2241,7 +2241,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (!(data & 1)) break; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, + if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.pv_time, data & ~1ULL, sizeof(struct pvclock_vcpu_time_info))) vcpu->arch.pv_time_enabled = false; @@ -2262,7 +2262,7 @@ int kvm_set_msr_common(struct kvm_vcpu *vcpu, struct msr_data *msr_info) if (data & KVM_STEAL_RESERVED_MASK) return 1; - if (kvm_gfn_to_hva_cache_init(vcpu->kvm, &vcpu->arch.st.stime, + if (kvm_vcpu_gfn_to_hva_cache_init(vcpu, &vcpu->arch.st.stime, data & KVM_STEAL_VALID_BITS, sizeof(struct kvm_steal_time))) return 1; @@ -2875,7 +2875,7 @@ static void kvm_steal_time_set_preempted(struct kvm_vcpu *vcpu) vcpu->arch.st.steal.preempted = 1; - kvm_write_guest_offset_cached(vcpu->kvm, &vcpu->arch.st.stime, + kvm_vcpu_write_guest_offset_cached(vcpu, &vcpu->arch.st.stime, &vcpu->arch.st.steal.preempted, offsetof(struct kvm_steal_time, preempted), sizeof(vcpu->arch.st.steal.preempted)); @@ -8533,9 +8533,8 @@ static void kvm_del_async_pf_gfn(struct kvm_vcpu *vcpu, gfn_t gfn) static int apf_put_user(struct kvm_vcpu *vcpu, u32 val) { - - return kvm_write_guest_cached(vcpu->kvm, &vcpu->arch.apf.data, &val, - sizeof(val)); + return kvm_vcpu_write_guest_cached(vcpu, &vcpu->arch.apf.data, &val, + sizeof(val)); } void kvm_arch_async_page_not_present(struct kvm_vcpu *vcpu, diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cda457bcedc1..2db458ee94b0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -641,18 +641,18 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); -int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len); +int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len); int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, int offset, int len); int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len); -int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len); -int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len); -int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - gpa_t gpa, unsigned long len); +int kvm_vcpu_write_guest_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len); +int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len); +int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, + gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index a83c186cefc1..263a80513ad9 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -1981,18 +1981,18 @@ static int __kvm_gfn_to_hva_cache_init(struct kvm_memslots *slots, return 0; } -int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, +int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, gpa_t gpa, unsigned long len) { - struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); return __kvm_gfn_to_hva_cache_init(slots, ghc, gpa, len); } -EXPORT_SYMBOL_GPL(kvm_gfn_to_hva_cache_init); +EXPORT_SYMBOL_GPL(kvm_vcpu_gfn_to_hva_cache_init); -int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len) +int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len) { - struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); int r; gpa_t gpa = ghc->gpa + offset; @@ -2002,7 +2002,7 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_write_guest(kvm, gpa, data, len); + return kvm_vcpu_write_guest(vcpu, gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -2014,19 +2014,19 @@ int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, return 0; } -EXPORT_SYMBOL_GPL(kvm_write_guest_offset_cached); +EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_offset_cached); -int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_vcpu_write_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) { - return kvm_write_guest_offset_cached(kvm, ghc, data, 0, len); + return kvm_vcpu_write_guest_offset_cached(vcpu, ghc, data, 0, len); } -EXPORT_SYMBOL_GPL(kvm_write_guest_cached); +EXPORT_SYMBOL_GPL(kvm_vcpu_write_guest_cached); -int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len) +int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len) { - struct kvm_memslots *slots = kvm_memslots(kvm); + struct kvm_memslots *slots = kvm_vcpu_memslots(vcpu); int r; BUG_ON(len > ghc->len); @@ -2035,7 +2035,7 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, __kvm_gfn_to_hva_cache_init(slots, ghc, ghc->gpa, ghc->len); if (unlikely(!ghc->memslot)) - return kvm_read_guest(kvm, ghc->gpa, data, len); + return kvm_vcpu_read_guest(vcpu, ghc->gpa, data, len); if (kvm_is_error_hva(ghc->hva)) return -EFAULT; @@ -2046,7 +2046,7 @@ int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, return 0; } -EXPORT_SYMBOL_GPL(kvm_read_guest_cached); +EXPORT_SYMBOL_GPL(kvm_vcpu_read_guest_cached); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len) { -- cgit v1.2.3 From 243b72aae28ca1032284028323bb81c9235b15c9 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 14 Feb 2017 13:08:38 +0300 Subject: x86/mm/ptdump: Optimize check for W+X mappings for CONFIG_KASAN=y Enabling both DEBUG_WX=y and KASAN=y options significantly increases boot time (dozens of seconds at least). KASAN fills kernel page tables with repeated values to map several TBs of the virtual memory to the single kasan_zero_page: kasan_zero_pud -> kasan_zero_pmd-> kasan_zero_pte-> kasan_zero_page So, the page table walker used to find W+X mapping check the same kasan_zero_p?d page table entries a lot more than once. With patch pud walker will skip the pud if it has the same value as the previous one . Skipping done iff we search for W+X mappings, so this optimization won't affect the page table dump via debugfs. This dropped time spend in W+X check from ~30 sec to reasonable 0.1 sec: Before: [ 4.579991] Freeing unused kernel memory: 1000K [ 35.257523] x86/mm: Checked W+X mappings: passed, no W+X pages found. After: [ 5.138756] Freeing unused kernel memory: 1000K [ 5.266496] x86/mm: Checked W+X mappings: passed, no W+X pages found. Signed-off-by: Andrey Ryabinin Reviewed-by: Alexander Potapenko Cc: Mark Rutland Cc: kasan-dev@googlegroups.com Cc: Tobias Regnery Cc: Dmitry Vyukov Link: http://lkml.kernel.org/r/20170214100839.17186-1-aryabinin@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/dump_pagetables.c | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 8aa6bea1cd6c..08135341798c 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -327,18 +327,31 @@ static void walk_pmd_level(struct seq_file *m, struct pg_state *st, pud_t addr, #if PTRS_PER_PUD > 1 +/* + * This is an optimization for CONFIG_DEBUG_WX=y + CONFIG_KASAN=y + * KASAN fills page tables with the same values. Since there is no + * point in checking page table more than once we just skip repeated + * entries. This saves us dozens of seconds during boot. + */ +static bool pud_already_checked(pud_t *prev_pud, pud_t *pud, bool checkwx) +{ + return checkwx && prev_pud && (pud_val(*prev_pud) == pud_val(*pud)); +} + static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, unsigned long P) { int i; pud_t *start; pgprotval_t prot; + pud_t *prev_pud = NULL; start = (pud_t *) pgd_page_vaddr(addr); for (i = 0; i < PTRS_PER_PUD; i++) { st->current_address = normalize_addr(P + i * PUD_LEVEL_MULT); - if (!pud_none(*start)) { + if (!pud_none(*start) && + !pud_already_checked(prev_pud, start, st->check_wx)) { if (pud_large(*start) || !pud_present(*start)) { prot = pud_flags(*start); note_page(m, st, __pgprot(prot), 2); @@ -349,6 +362,7 @@ static void walk_pud_level(struct seq_file *m, struct pg_state *st, pgd_t addr, } else note_page(m, st, __pgprot(0), 2); + prev_pud = start; start++; } } -- cgit v1.2.3 From 025205f8f30c6ab52b69bf34fb359ac80360fefd Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Tue, 14 Feb 2017 13:08:39 +0300 Subject: x86/mm/ptdump: Add address marker for KASAN shadow region Annotate the KASAN shadow with address markers in page table dump output: $ cat /sys/kernel/debug/kernel_page_tables ... ---[ Vmemmap ]--- 0xffffea0000000000-0xffffea0003000000 48M RW PSE GLB NX pmd 0xffffea0003000000-0xffffea0004000000 16M pmd 0xffffea0004000000-0xffffea0005000000 16M RW PSE GLB NX pmd 0xffffea0005000000-0xffffea0040000000 944M pmd 0xffffea0040000000-0xffffea8000000000 511G pud 0xffffea8000000000-0xffffec0000000000 1536G pgd ---[ KASAN shadow ]--- 0xffffec0000000000-0xffffed0000000000 1T ro GLB NX pte 0xffffed0000000000-0xffffed0018000000 384M RW PSE GLB NX pmd 0xffffed0018000000-0xffffed0020000000 128M pmd 0xffffed0020000000-0xffffed0028200000 130M RW PSE GLB NX pmd 0xffffed0028200000-0xffffed0040000000 382M pmd 0xffffed0040000000-0xffffed8000000000 511G pud 0xffffed8000000000-0xfffff50000000000 7680G pgd 0xfffff50000000000-0xfffffbfff0000000 7339776M ro GLB NX pte 0xfffffbfff0000000-0xfffffbfff0200000 2M pmd 0xfffffbfff0200000-0xfffffbfff0a00000 8M RW PSE GLB NX pmd 0xfffffbfff0a00000-0xfffffbffffe00000 244M pmd 0xfffffbffffe00000-0xfffffc0000000000 2M ro GLB NX pte ---[ KASAN shadow end ]--- 0xfffffc0000000000-0xffffff0000000000 3T pgd ---[ ESPfix Area ]--- ... Signed-off-by: Andrey Ryabinin Reviewed-by: Alexander Potapenko Cc: Mark Rutland Cc: kasan-dev@googlegroups.com Cc: Tobias Regnery Cc: Andrey Ryabinin Cc: Dmitry Vyukov Link: http://lkml.kernel.org/r/20170214100839.17186-2-aryabinin@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/mm/dump_pagetables.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/mm/dump_pagetables.c b/arch/x86/mm/dump_pagetables.c index 08135341798c..58b5bee7ea27 100644 --- a/arch/x86/mm/dump_pagetables.c +++ b/arch/x86/mm/dump_pagetables.c @@ -18,6 +18,7 @@ #include #include +#include #include /* @@ -51,6 +52,10 @@ enum address_markers_idx { LOW_KERNEL_NR, VMALLOC_START_NR, VMEMMAP_START_NR, +#ifdef CONFIG_KASAN + KASAN_SHADOW_START_NR, + KASAN_SHADOW_END_NR, +#endif # ifdef CONFIG_X86_ESPFIX64 ESPFIX_START_NR, # endif @@ -76,6 +81,10 @@ static struct addr_marker address_markers[] = { { 0/* PAGE_OFFSET */, "Low Kernel Mapping" }, { 0/* VMALLOC_START */, "vmalloc() Area" }, { 0/* VMEMMAP_START */, "Vmemmap" }, +#ifdef CONFIG_KASAN + { KASAN_SHADOW_START, "KASAN shadow" }, + { KASAN_SHADOW_END, "KASAN shadow end" }, +#endif # ifdef CONFIG_X86_ESPFIX64 { ESPFIX_BASE_ADDR, "ESPfix Area", 16 }, # endif -- cgit v1.2.3 From 460df4c1fc7c00829050c08d6368dc6e6beef307 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 8 Feb 2017 11:50:15 +0100 Subject: KVM: race-free exit from KVM_RUN without POSIX signals MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The purpose of the KVM_SET_SIGNAL_MASK API is to let userspace "kick" a VCPU out of KVM_RUN through a POSIX signal. A signal is attached to a dummy signal handler; by blocking the signal outside KVM_RUN and unblocking it inside, this possible race is closed: VCPU thread service thread -------------------------------------------------------------- check flag set flag raise signal (signal handler does nothing) KVM_RUN However, one issue with KVM_SET_SIGNAL_MASK is that it has to take tsk->sighand->siglock on every KVM_RUN. This lock is often on a remote NUMA node, because it is on the node of a thread's creator. Taking this lock can be very expensive if there are many userspace exits (as is the case for SMP Windows VMs without Hyper-V reference time counter). As an alternative, we can put the flag directly in kvm_run so that KVM can see it: VCPU thread service thread -------------------------------------------------------------- raise signal signal handler set run->immediate_exit KVM_RUN check run->immediate_exit Reviewed-by: Radim Krčmář Reviewed-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- Documentation/virtual/kvm/api.txt | 13 ++++++++++++- arch/arm/kvm/arm.c | 4 ++++ arch/mips/kvm/mips.c | 7 ++++++- arch/powerpc/kvm/powerpc.c | 6 +++++- arch/s390/kvm/kvm-s390.c | 4 ++++ arch/x86/kvm/x86.c | 6 +++++- include/uapi/linux/kvm.h | 4 +++- 7 files changed, 39 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/virtual/kvm/api.txt b/Documentation/virtual/kvm/api.txt index e4f2cdcf78eb..069450938b79 100644 --- a/Documentation/virtual/kvm/api.txt +++ b/Documentation/virtual/kvm/api.txt @@ -3389,7 +3389,18 @@ struct kvm_run { Request that KVM_RUN return when it becomes possible to inject external interrupts into the guest. Useful in conjunction with KVM_INTERRUPT. - __u8 padding1[7]; + __u8 immediate_exit; + +This field is polled once when KVM_RUN starts; if non-zero, KVM_RUN +exits immediately, returning -EINTR. In the common scenario where a +signal is used to "kick" a VCPU out of KVM_RUN, this field can be used +to avoid usage of KVM_SET_SIGNAL_MASK, which has worse scalability. +Rather than blocking the signal outside KVM_RUN, userspace can set up +a signal handler that sets run->immediate_exit to a non-zero value. + +This field is ignored if KVM_CAP_IMMEDIATE_EXIT is not available. + + __u8 padding1[6]; /* out */ __u32 exit_reason; diff --git a/arch/arm/kvm/arm.c b/arch/arm/kvm/arm.c index 21c493a9e5c9..c9a2103faeb9 100644 --- a/arch/arm/kvm/arm.c +++ b/arch/arm/kvm/arm.c @@ -206,6 +206,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ARM_PSCI_0_2: case KVM_CAP_READONLY_MEM: case KVM_CAP_MP_STATE: + case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; case KVM_CAP_COALESCED_MMIO: @@ -604,6 +605,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) return ret; } + if (run->immediate_exit) + return -EINTR; + if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &vcpu->sigset, &sigsaved); diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index 31ee5ee0010b..ed81e5ac1426 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -397,7 +397,7 @@ int kvm_arch_vcpu_ioctl_set_guest_debug(struct kvm_vcpu *vcpu, int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) { - int r = 0; + int r = -EINTR; sigset_t sigsaved; if (vcpu->sigset_active) @@ -409,6 +409,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) vcpu->mmio_needed = 0; } + if (run->immediate_exit) + goto out; + lose_fpu(1); local_irq_disable(); @@ -429,6 +432,7 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) guest_exit_irqoff(); local_irq_enable(); +out: if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); @@ -1021,6 +1025,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ENABLE_CAP: case KVM_CAP_READONLY_MEM: case KVM_CAP_SYNC_MMU: + case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; case KVM_CAP_COALESCED_MMIO: diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2b3e4e620078..1fe1391ba2c2 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -511,6 +511,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_ONE_REG: case KVM_CAP_IOEVENTFD: case KVM_CAP_DEVICE_CTRL: + case KVM_CAP_IMMEDIATE_EXIT: r = 1; break; case KVM_CAP_PPC_PAIRED_SINGLES: @@ -1117,7 +1118,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *run) #endif } - r = kvmppc_vcpu_run(run, vcpu); + if (run->immediate_exit) + r = -EINTR; + else + r = kvmppc_vcpu_run(run, vcpu); if (vcpu->sigset_active) sigprocmask(SIG_SETMASK, &sigsaved, NULL); diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index 502de74ea984..99e35fe0dea8 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -370,6 +370,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_S390_IRQCHIP: case KVM_CAP_VM_ATTRIBUTES: case KVM_CAP_MP_STATE: + case KVM_CAP_IMMEDIATE_EXIT: case KVM_CAP_S390_INJECT_IRQ: case KVM_CAP_S390_USER_SIGP: case KVM_CAP_S390_USER_STSI: @@ -2798,6 +2799,9 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) int rc; sigset_t sigsaved; + if (kvm_run->immediate_exit) + return -EINTR; + if (guestdbg_exit_pending(vcpu)) { kvm_s390_prepare_debug_exit(vcpu); return 0; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 0aa8db229e0a..8d3047c8cce7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -2672,6 +2672,7 @@ int kvm_vm_ioctl_check_extension(struct kvm *kvm, long ext) case KVM_CAP_DISABLE_QUIRKS: case KVM_CAP_SET_BOOT_CPU_ID: case KVM_CAP_SPLIT_IRQCHIP: + case KVM_CAP_IMMEDIATE_EXIT: #ifdef CONFIG_KVM_DEVICE_ASSIGNMENT case KVM_CAP_ASSIGN_DEV_IRQ: case KVM_CAP_PCI_2_3: @@ -7202,7 +7203,10 @@ int kvm_arch_vcpu_ioctl_run(struct kvm_vcpu *vcpu, struct kvm_run *kvm_run) } else WARN_ON(vcpu->arch.pio.count || vcpu->mmio_needed); - r = vcpu_run(vcpu); + if (kvm_run->immediate_exit) + r = -EINTR; + else + r = vcpu_run(vcpu); out: post_kvm_run_save(vcpu); diff --git a/include/uapi/linux/kvm.h b/include/uapi/linux/kvm.h index 7964b970b9ad..f51d5082a377 100644 --- a/include/uapi/linux/kvm.h +++ b/include/uapi/linux/kvm.h @@ -218,7 +218,8 @@ struct kvm_hyperv_exit { struct kvm_run { /* in */ __u8 request_interrupt_window; - __u8 padding1[7]; + __u8 immediate_exit; + __u8 padding1[6]; /* out */ __u32 exit_reason; @@ -881,6 +882,7 @@ struct kvm_ppc_resize_hpt { #define KVM_CAP_SPAPR_RESIZE_HPT 133 #define KVM_CAP_PPC_MMU_RADIX 134 #define KVM_CAP_PPC_MMU_HASH_V3 135 +#define KVM_CAP_IMMEDIATE_EXIT 136 #ifdef KVM_CAP_IRQ_ROUTING -- cgit v1.2.3 From bd7e5b0899a429445cc6e3037c13f8b5ae3be903 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 3 Feb 2017 21:18:52 -0800 Subject: KVM: x86: remove code for lazy FPU handling The FPU is always active now when running KVM. Reviewed-by: David Matlack Reviewed-by: Bandan Das Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_host.h | 3 -- arch/x86/kvm/cpuid.c | 2 - arch/x86/kvm/svm.c | 43 ++------------- arch/x86/kvm/vmx.c | 112 ++++++---------------------------------- arch/x86/kvm/x86.c | 7 +-- include/linux/kvm_host.h | 1 - 6 files changed, 19 insertions(+), 149 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_host.h b/arch/x86/include/asm/kvm_host.h index e4f13e714bcf..74ef58c8ff53 100644 --- a/arch/x86/include/asm/kvm_host.h +++ b/arch/x86/include/asm/kvm_host.h @@ -55,7 +55,6 @@ #define KVM_REQ_TRIPLE_FAULT 10 #define KVM_REQ_MMU_SYNC 11 #define KVM_REQ_CLOCK_UPDATE 12 -#define KVM_REQ_DEACTIVATE_FPU 13 #define KVM_REQ_EVENT 14 #define KVM_REQ_APF_HALT 15 #define KVM_REQ_STEAL_UPDATE 16 @@ -936,8 +935,6 @@ struct kvm_x86_ops { unsigned long (*get_rflags)(struct kvm_vcpu *vcpu); void (*set_rflags)(struct kvm_vcpu *vcpu, unsigned long rflags); u32 (*get_pkru)(struct kvm_vcpu *vcpu); - void (*fpu_activate)(struct kvm_vcpu *vcpu); - void (*fpu_deactivate)(struct kvm_vcpu *vcpu); void (*tlb_flush)(struct kvm_vcpu *vcpu); diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index c0e2036217ad..1d155cc56629 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -123,8 +123,6 @@ int kvm_update_cpuid(struct kvm_vcpu *vcpu) if (best && (best->eax & (F(XSAVES) | F(XSAVEC)))) best->ebx = xstate_required_size(vcpu->arch.xcr0, true); - kvm_x86_ops->fpu_activate(vcpu); - /* * The existing code assumes virtual address is 48-bit in the canonical * address checks; exit if it is ever changed. diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index 4e5905a1ce70..d1efe2c62b3f 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1157,7 +1157,6 @@ static void init_vmcb(struct vcpu_svm *svm) struct vmcb_control_area *control = &svm->vmcb->control; struct vmcb_save_area *save = &svm->vmcb->save; - svm->vcpu.fpu_active = 1; svm->vcpu.arch.hflags = 0; set_cr_intercept(svm, INTERCEPT_CR0_READ); @@ -1899,15 +1898,12 @@ static void update_cr0_intercept(struct vcpu_svm *svm) ulong gcr0 = svm->vcpu.arch.cr0; u64 *hcr0 = &svm->vmcb->save.cr0; - if (!svm->vcpu.fpu_active) - *hcr0 |= SVM_CR0_SELECTIVE_MASK; - else - *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) - | (gcr0 & SVM_CR0_SELECTIVE_MASK); + *hcr0 = (*hcr0 & ~SVM_CR0_SELECTIVE_MASK) + | (gcr0 & SVM_CR0_SELECTIVE_MASK); mark_dirty(svm->vmcb, VMCB_CR); - if (gcr0 == *hcr0 && svm->vcpu.fpu_active) { + if (gcr0 == *hcr0) { clr_cr_intercept(svm, INTERCEPT_CR0_READ); clr_cr_intercept(svm, INTERCEPT_CR0_WRITE); } else { @@ -1938,8 +1934,6 @@ static void svm_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (!npt_enabled) cr0 |= X86_CR0_PG | X86_CR0_WP; - if (!vcpu->fpu_active) - cr0 |= X86_CR0_TS; /* * re-enable caching here because the QEMU bios * does not do it - this results in some delay at @@ -2158,22 +2152,6 @@ static int ac_interception(struct vcpu_svm *svm) return 1; } -static void svm_fpu_activate(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - clr_exception_intercept(svm, NM_VECTOR); - - svm->vcpu.fpu_active = 1; - update_cr0_intercept(svm); -} - -static int nm_interception(struct vcpu_svm *svm) -{ - svm_fpu_activate(&svm->vcpu); - return 1; -} - static bool is_erratum_383(void) { int err, i; @@ -2571,9 +2549,6 @@ static int nested_svm_exit_special(struct vcpu_svm *svm) if (!npt_enabled && svm->apf_reason == 0) return NESTED_EXIT_HOST; break; - case SVM_EXIT_EXCP_BASE + NM_VECTOR: - nm_interception(svm); - break; default: break; } @@ -4018,7 +3993,6 @@ static int (*const svm_exit_handlers[])(struct vcpu_svm *svm) = { [SVM_EXIT_EXCP_BASE + BP_VECTOR] = bp_interception, [SVM_EXIT_EXCP_BASE + UD_VECTOR] = ud_interception, [SVM_EXIT_EXCP_BASE + PF_VECTOR] = pf_interception, - [SVM_EXIT_EXCP_BASE + NM_VECTOR] = nm_interception, [SVM_EXIT_EXCP_BASE + MC_VECTOR] = mc_interception, [SVM_EXIT_EXCP_BASE + AC_VECTOR] = ac_interception, [SVM_EXIT_INTR] = intr_interception, @@ -5072,14 +5046,6 @@ static bool svm_has_wbinvd_exit(void) return true; } -static void svm_fpu_deactivate(struct kvm_vcpu *vcpu) -{ - struct vcpu_svm *svm = to_svm(vcpu); - - set_exception_intercept(svm, NM_VECTOR); - update_cr0_intercept(svm); -} - #define PRE_EX(exit) { .exit_code = (exit), \ .stage = X86_ICPT_PRE_EXCEPT, } #define POST_EX(exit) { .exit_code = (exit), \ @@ -5340,9 +5306,6 @@ static struct kvm_x86_ops svm_x86_ops __ro_after_init = { .get_pkru = svm_get_pkru, - .fpu_activate = svm_fpu_activate, - .fpu_deactivate = svm_fpu_deactivate, - .tlb_flush = svm_flush_tlb, .run = svm_vcpu_run, diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 0e0b5d09597e..9856b73a21ad 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1856,7 +1856,7 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) u32 eb; eb = (1u << PF_VECTOR) | (1u << UD_VECTOR) | (1u << MC_VECTOR) | - (1u << NM_VECTOR) | (1u << DB_VECTOR) | (1u << AC_VECTOR); + (1u << DB_VECTOR) | (1u << AC_VECTOR); if ((vcpu->guest_debug & (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) == (KVM_GUESTDBG_ENABLE | KVM_GUESTDBG_USE_SW_BP)) @@ -1865,8 +1865,6 @@ static void update_exception_bitmap(struct kvm_vcpu *vcpu) eb = ~0; if (enable_ept) eb &= ~(1u << PF_VECTOR); /* bypass_guest_pf = 0 */ - if (vcpu->fpu_active) - eb &= ~(1u << NM_VECTOR); /* When we are running a nested L2 guest and L1 specified for it a * certain exception bitmap, we must trap the same exceptions and pass @@ -2340,25 +2338,6 @@ static void vmx_vcpu_put(struct kvm_vcpu *vcpu) } } -static void vmx_fpu_activate(struct kvm_vcpu *vcpu) -{ - ulong cr0; - - if (vcpu->fpu_active) - return; - vcpu->fpu_active = 1; - cr0 = vmcs_readl(GUEST_CR0); - cr0 &= ~(X86_CR0_TS | X86_CR0_MP); - cr0 |= kvm_read_cr0_bits(vcpu, X86_CR0_TS | X86_CR0_MP); - vmcs_writel(GUEST_CR0, cr0); - update_exception_bitmap(vcpu); - vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; - if (is_guest_mode(vcpu)) - vcpu->arch.cr0_guest_owned_bits &= - ~get_vmcs12(vcpu)->cr0_guest_host_mask; - vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); -} - static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu); /* @@ -2377,33 +2356,6 @@ static inline unsigned long nested_read_cr4(struct vmcs12 *fields) (fields->cr4_read_shadow & fields->cr4_guest_host_mask); } -static void vmx_fpu_deactivate(struct kvm_vcpu *vcpu) -{ - /* Note that there is no vcpu->fpu_active = 0 here. The caller must - * set this *before* calling this function. - */ - vmx_decache_cr0_guest_bits(vcpu); - vmcs_set_bits(GUEST_CR0, X86_CR0_TS | X86_CR0_MP); - update_exception_bitmap(vcpu); - vcpu->arch.cr0_guest_owned_bits = 0; - vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); - if (is_guest_mode(vcpu)) { - /* - * L1's specified read shadow might not contain the TS bit, - * so now that we turned on shadowing of this bit, we need to - * set this bit of the shadow. Like in nested_vmx_run we need - * nested_read_cr0(vmcs12), but vmcs12->guest_cr0 is not yet - * up-to-date here because we just decached cr0.TS (and we'll - * only update vmcs12->guest_cr0 on nested exit). - */ - struct vmcs12 *vmcs12 = get_vmcs12(vcpu); - vmcs12->guest_cr0 = (vmcs12->guest_cr0 & ~X86_CR0_TS) | - (vcpu->arch.cr0 & X86_CR0_TS); - vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12)); - } else - vmcs_writel(CR0_READ_SHADOW, vcpu->arch.cr0); -} - static unsigned long vmx_get_rflags(struct kvm_vcpu *vcpu) { unsigned long rflags, save_rflags; @@ -4232,9 +4184,6 @@ static void vmx_set_cr0(struct kvm_vcpu *vcpu, unsigned long cr0) if (enable_ept) ept_update_paging_mode_cr0(&hw_cr0, cr0, vcpu); - if (!vcpu->fpu_active) - hw_cr0 |= X86_CR0_TS | X86_CR0_MP; - vmcs_writel(CR0_READ_SHADOW, cr0); vmcs_writel(GUEST_CR0, hw_cr0); vcpu->arch.cr0 = cr0; @@ -5321,7 +5270,9 @@ static int vmx_vcpu_setup(struct vcpu_vmx *vmx) /* 22.2.1, 20.8.1 */ vm_entry_controls_init(vmx, vmcs_config.vmentry_ctrl); - vmcs_writel(CR0_GUEST_HOST_MASK, ~0UL); + vmx->vcpu.arch.cr0_guest_owned_bits = X86_CR0_TS; + vmcs_writel(CR0_GUEST_HOST_MASK, ~X86_CR0_TS); + set_cr4_guest_host_mask(vmx); if (vmx_xsaves_supported()) @@ -5425,7 +5376,7 @@ static void vmx_vcpu_reset(struct kvm_vcpu *vcpu, bool init_event) vmx_set_cr0(vcpu, cr0); /* enter rmode */ vmx_set_cr4(vcpu, 0); vmx_set_efer(vcpu, 0); - vmx_fpu_activate(vcpu); + update_exception_bitmap(vcpu); vpid_sync_context(vmx->vpid); @@ -5698,11 +5649,6 @@ static int handle_exception(struct kvm_vcpu *vcpu) if (is_nmi(intr_info)) return 1; /* already handled by vmx_vcpu_run() */ - if (is_no_device(intr_info)) { - vmx_fpu_activate(vcpu); - return 1; - } - if (is_invalid_opcode(intr_info)) { if (is_guest_mode(vcpu)) { kvm_queue_exception(vcpu, UD_VECTOR); @@ -5892,22 +5838,6 @@ static int handle_set_cr4(struct kvm_vcpu *vcpu, unsigned long val) return kvm_set_cr4(vcpu, val); } -/* called to set cr0 as appropriate for clts instruction exit. */ -static void handle_clts(struct kvm_vcpu *vcpu) -{ - if (is_guest_mode(vcpu)) { - /* - * We get here when L2 did CLTS, and L1 didn't shadow CR0.TS - * but we did (!fpu_active). We need to keep GUEST_CR0.TS on, - * just pretend it's off (also in arch.cr0 for fpu_activate). - */ - vmcs_writel(CR0_READ_SHADOW, - vmcs_readl(CR0_READ_SHADOW) & ~X86_CR0_TS); - vcpu->arch.cr0 &= ~X86_CR0_TS; - } else - vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); -} - static int handle_cr(struct kvm_vcpu *vcpu) { unsigned long exit_qualification, val; @@ -5953,9 +5883,9 @@ static int handle_cr(struct kvm_vcpu *vcpu) } break; case 2: /* clts */ - handle_clts(vcpu); + WARN_ONCE(1, "Guest should always own CR0.TS"); + vmx_set_cr0(vcpu, kvm_read_cr0_bits(vcpu, ~X86_CR0_TS)); trace_kvm_cr_write(0, kvm_read_cr0(vcpu)); - vmx_fpu_activate(vcpu); return kvm_skip_emulated_instruction(vcpu); case 1: /*mov from cr*/ switch (cr) { @@ -10349,8 +10279,8 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, } /* - * This sets GUEST_CR0 to vmcs12->guest_cr0, with possibly a modified - * TS bit (for lazy fpu) and bits which we consider mandatory enabled. + * This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those + * bits which we consider mandatory enabled. * The CR0_READ_SHADOW is what L2 should have expected to read given * the specifications by L1; It's not enough to take * vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we @@ -10963,24 +10893,15 @@ static void load_vmcs12_host_state(struct kvm_vcpu *vcpu, vmx_set_rflags(vcpu, X86_EFLAGS_FIXED); /* * Note that calling vmx_set_cr0 is important, even if cr0 hasn't - * actually changed, because it depends on the current state of - * fpu_active (which may have changed). - * Note that vmx_set_cr0 refers to efer set above. + * actually changed, because vmx_set_cr0 refers to efer set above. + * + * CR0_GUEST_HOST_MASK is already set in the original vmcs01 + * (KVM doesn't change it); */ + vcpu->arch.cr0_guest_owned_bits = X86_CR0_TS; vmx_set_cr0(vcpu, vmcs12->host_cr0); - /* - * If we did fpu_activate()/fpu_deactivate() during L2's run, we need - * to apply the same changes to L1's vmcs. We just set cr0 correctly, - * but we also need to update cr0_guest_host_mask and exception_bitmap. - */ - update_exception_bitmap(vcpu); - vcpu->arch.cr0_guest_owned_bits = (vcpu->fpu_active ? X86_CR0_TS : 0); - vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits); - /* - * Note that CR4_GUEST_HOST_MASK is already set in the original vmcs01 - * (KVM doesn't change it)- no reason to call set_cr4_guest_host_mask(); - */ + /* Same as above - no reason to call set_cr4_guest_host_mask(). */ vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK); kvm_set_cr4(vcpu, vmcs12->host_cr4); @@ -11609,9 +11530,6 @@ static struct kvm_x86_ops vmx_x86_ops __ro_after_init = { .get_pkru = vmx_get_pkru, - .fpu_activate = vmx_fpu_activate, - .fpu_deactivate = vmx_fpu_deactivate, - .tlb_flush = vmx_flush_tlb, .run = vmx_vcpu_run, diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 8d3047c8cce7..c48404017e4f 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -6751,10 +6751,6 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) r = 0; goto out; } - if (kvm_check_request(KVM_REQ_DEACTIVATE_FPU, vcpu)) { - vcpu->fpu_active = 0; - kvm_x86_ops->fpu_deactivate(vcpu); - } if (kvm_check_request(KVM_REQ_APF_HALT, vcpu)) { /* Page is swapped out. Do synthetic halt */ vcpu->arch.apf.halted = true; @@ -6856,8 +6852,7 @@ static int vcpu_enter_guest(struct kvm_vcpu *vcpu) preempt_disable(); kvm_x86_ops->prepare_guest_switch(vcpu); - if (vcpu->fpu_active) - kvm_load_guest_fpu(vcpu); + kvm_load_guest_fpu(vcpu); /* * Disable IRQs before setting IN_GUEST_MODE. Posted interrupt diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2db458ee94b0..8d69d5150748 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -221,7 +221,6 @@ struct kvm_vcpu { struct mutex mutex; struct kvm_run *run; - int fpu_active; int guest_fpu_loaded, guest_xcr0_loaded; struct swait_queue_head wq; struct pid *pid; -- cgit v1.2.3 From 9383191da4e40360a5d880fbe6bb03911c61621b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 Feb 2017 22:24:49 +0100 Subject: bpf: remove stubs for cBPF from arch code Remove the dummy bpf_jit_compile() stubs for eBPF JITs and make that a single __weak function in the core that can be overridden similarly to the eBPF one. Also remove stale pr_err() mentions of bpf_jit_compile. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/arm64/net/bpf_jit_comp.c | 5 ----- arch/powerpc/net/bpf_jit_comp64.c | 2 -- arch/s390/net/bpf_jit_comp.c | 8 -------- arch/x86/net/bpf_jit_comp.c | 8 ++------ include/linux/filter.h | 6 +----- kernel/bpf/core.c | 12 +++++++++++- 6 files changed, 14 insertions(+), 27 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index b2fc97a2c56c..c444408d5a8c 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -813,11 +813,6 @@ static inline void bpf_flush_icache(void *start, void *end) flush_icache_range((unsigned long)start, (unsigned long)end); } -void bpf_jit_compile(struct bpf_prog *prog) -{ - /* Nothing to do here. We support Internal BPF. */ -} - struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_prog *tmp, *orig_prog = prog; diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 73a5cf18fd84..f9ebd02260da 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -961,8 +961,6 @@ common_load: return 0; } -void bpf_jit_compile(struct bpf_prog *fp) { } - struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { u32 proglen; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 167b31b186c1..6454efd22e63 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1262,14 +1262,6 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp) return 0; } -/* - * Classic BPF function stub. BPF programs will be converted into - * eBPF and then bpf_int_jit_compile() will be called. - */ -void bpf_jit_compile(struct bpf_prog *fp) -{ -} - /* * Compile eBPF program "fp" */ diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index bb660e53cbd6..26123d0ae13a 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1067,13 +1067,13 @@ common_load: ilen = prog - temp; if (ilen > BPF_MAX_INSN_SIZE) { - pr_err("bpf_jit_compile fatal insn size error\n"); + pr_err("bpf_jit: fatal insn size error\n"); return -EFAULT; } if (image) { if (unlikely(proglen + ilen > oldproglen)) { - pr_err("bpf_jit_compile fatal error\n"); + pr_err("bpf_jit: fatal error\n"); return -EFAULT; } memcpy(image + proglen, temp, ilen); @@ -1085,10 +1085,6 @@ common_load: return proglen; } -void bpf_jit_compile(struct bpf_prog *prog) -{ -} - struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { struct bpf_binary_header *header = NULL; diff --git a/include/linux/filter.h b/include/linux/filter.h index e4eb2546339a..c7a70e0cc3a0 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -607,6 +607,7 @@ void sk_filter_uncharge(struct sock *sk, struct sk_filter *fp); u64 __bpf_call_base(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog); +void bpf_jit_compile(struct bpf_prog *prog); bool bpf_helper_changes_pkt_data(void *func); struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, @@ -625,7 +626,6 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, bpf_jit_fill_hole_t bpf_fill_ill_insns); void bpf_jit_binary_free(struct bpf_binary_header *hdr); -void bpf_jit_compile(struct bpf_prog *fp); void bpf_jit_free(struct bpf_prog *fp); struct bpf_prog *bpf_jit_blind_constants(struct bpf_prog *fp); @@ -669,10 +669,6 @@ static inline bool bpf_jit_blinding_enabled(void) return true; } #else -static inline void bpf_jit_compile(struct bpf_prog *fp) -{ -} - static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index fddd76b1b627..2831ba1e71c1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -1154,12 +1154,22 @@ const struct bpf_func_proto bpf_tail_call_proto = { .arg3_type = ARG_ANYTHING, }; -/* For classic BPF JITs that don't implement bpf_int_jit_compile(). */ +/* Stub for JITs that only support cBPF. eBPF programs are interpreted. + * It is encouraged to implement bpf_int_jit_compile() instead, so that + * eBPF and implicitly also cBPF can get JITed! + */ struct bpf_prog * __weak bpf_int_jit_compile(struct bpf_prog *prog) { return prog; } +/* Stub for JITs that support eBPF. All cBPF code gets transformed into + * eBPF by the kernel and is later compiled by bpf_int_jit_compile(). + */ +void __weak bpf_jit_compile(struct bpf_prog *prog) +{ +} + bool __weak bpf_helper_changes_pkt_data(void *func) { return false; -- cgit v1.2.3 From 74451e66d516c55e309e8d89a4a1e7596e46aacd Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 16 Feb 2017 22:24:50 +0100 Subject: bpf: make jited programs visible in traces Long standing issue with JITed programs is that stack traces from function tracing check whether a given address is kernel code through {__,}kernel_text_address(), which checks for code in core kernel, modules and dynamically allocated ftrace trampolines. But what is still missing is BPF JITed programs (interpreted programs are not an issue as __bpf_prog_run() will be attributed to them), thus when a stack trace is triggered, the code walking the stack won't see any of the JITed ones. The same for address correlation done from user space via reading /proc/kallsyms. This is read by tools like perf, but the latter is also useful for permanent live tracing with eBPF itself in combination with stack maps when other eBPF types are part of the callchain. See offwaketime example on dumping stack from a map. This work tries to tackle that issue by making the addresses and symbols known to the kernel. The lookup from *kernel_text_address() is implemented through a latched RB tree that can be read under RCU in fast-path that is also shared for symbol/size/offset lookup for a specific given address in kallsyms. The slow-path iteration through all symbols in the seq file done via RCU list, which holds a tiny fraction of all exported ksyms, usually below 0.1 percent. Function symbols are exported as bpf_prog_, in order to aide debugging and attribution. This facility is currently enabled for root-only when bpf_jit_kallsyms is set to 1, and disabled if hardening is active in any mode. The rationale behind this is that still a lot of systems ship with world read permissions on kallsyms thus addresses should not get suddenly exposed for them. If that situation gets much better in future, we always have the option to change the default on this. Likewise, unprivileged programs are not allowed to add entries there either, but that is less of a concern as most such programs types relevant in this context are for root-only anyway. If enabled, call graphs and stack traces will then show a correct attribution; one example is illustrated below, where the trace is now visible in tooling such as perf script --kallsyms=/proc/kallsyms and friends. Before: 7fff8166889d bpf_clone_redirect+0x80007f0020ed (/lib/modules/4.9.0-rc8+/build/vmlinux) f5d80 __sendmsg_nocancel+0xffff006451f1a007 (/usr/lib64/libc-2.18.so) After: 7fff816688b7 bpf_clone_redirect+0x80007f002107 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fffa0575728 bpf_prog_33c45a467c9e061a+0x8000600020fb (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fffa07ef1fc cls_bpf_classify+0x8000600020dc (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff81678b68 tc_classify+0x80007f002078 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164d40b __netif_receive_skb_core+0x80007f0025fb (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164d718 __netif_receive_skb+0x80007f002018 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164e565 process_backlog+0x80007f002095 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8164dc71 net_rx_action+0x80007f002231 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff81767461 __softirqentry_text_start+0x80007f0020d1 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff817658ac do_softirq_own_stack+0x80007f00201c (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff810a2c20 do_softirq+0x80007f002050 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff810a2cb5 __local_bh_enable_ip+0x80007f002085 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168d452 ip_finish_output2+0x80007f002152 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168ea3d ip_finish_output+0x80007f00217d (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff8168f2af ip_output+0x80007f00203f (/lib/modules/4.9.0-rc8+/build/vmlinux) [...] 7fff81005854 do_syscall_64+0x80007f002054 (/lib/modules/4.9.0-rc8+/build/vmlinux) 7fff817649eb return_from_SYSCALL_64+0x80007f002000 (/lib/modules/4.9.0-rc8+/build/vmlinux) f5d80 __sendmsg_nocancel+0xffff01c484812007 (/usr/lib64/libc-2.18.so) Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: linux-kernel@vger.kernel.org Signed-off-by: David S. Miller --- Documentation/sysctl/net.txt | 12 ++ arch/arm64/net/bpf_jit_comp.c | 15 --- arch/powerpc/net/bpf_jit_comp64.c | 1 + arch/s390/net/bpf_jit_comp.c | 18 --- arch/x86/net/bpf_jit_comp.c | 15 --- include/linux/bpf.h | 4 + include/linux/filter.h | 112 ++++++++++++++++++- kernel/bpf/core.c | 223 ++++++++++++++++++++++++++++++++++++++ kernel/bpf/syscall.c | 2 + kernel/extable.c | 9 +- kernel/kallsyms.c | 61 +++++++++-- net/Kconfig | 3 +- net/core/sysctl_net_core.c | 7 ++ 13 files changed, 419 insertions(+), 63 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/sysctl/net.txt b/Documentation/sysctl/net.txt index b80fbd4e5575..2ebabc93014a 100644 --- a/Documentation/sysctl/net.txt +++ b/Documentation/sysctl/net.txt @@ -54,6 +54,18 @@ Values : 1 - enable JIT hardening for unprivileged users only 2 - enable JIT hardening for all users +bpf_jit_kallsyms +---------------- + +When Berkeley Packet Filter Just in Time compiler is enabled, then compiled +images are unknown addresses to the kernel, meaning they neither show up in +traces nor in /proc/kallsyms. This enables export of these addresses, which +can be used for debugging/tracing. If bpf_jit_harden is enabled, this feature +is disabled. +Values : + 0 - disable JIT kallsyms export (default value) + 1 - enable JIT kallsyms export for privileged users only + dev_weight -------------- diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index c444408d5a8c..05d12104d270 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -910,18 +910,3 @@ out: tmp : orig_prog); return prog; } - -void bpf_jit_free(struct bpf_prog *prog) -{ - unsigned long addr = (unsigned long)prog->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - if (!prog->jited) - goto free_filter; - - set_memory_rw(addr, header->pages); - bpf_jit_binary_free(header); - -free_filter: - bpf_prog_unlock_free(prog); -} diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index f9ebd02260da..c34166ef76fc 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -1064,6 +1064,7 @@ out: return fp; } +/* Overriding bpf_jit_free() as we don't set images read-only. */ void bpf_jit_free(struct bpf_prog *fp) { unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 6454efd22e63..f1d0e62ec1dd 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1339,21 +1339,3 @@ out: tmp : orig_fp); return fp; } - -/* - * Free eBPF program - */ -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - if (!fp->jited) - goto free_filter; - - set_memory_rw(addr, header->pages); - bpf_jit_binary_free(header); - -free_filter: - bpf_prog_unlock_free(fp); -} diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 26123d0ae13a..18a62e208826 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1180,18 +1180,3 @@ out: tmp : orig_prog); return prog; } - -void bpf_jit_free(struct bpf_prog *fp) -{ - unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; - struct bpf_binary_header *header = (void *)addr; - - if (!fp->jited) - goto free_filter; - - set_memory_rw(addr, header->pages); - bpf_jit_binary_free(header); - -free_filter: - bpf_prog_unlock_free(fp); -} diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 57d60dc5b600..909fc033173a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -8,10 +8,12 @@ #define _LINUX_BPF_H 1 #include + #include #include #include #include +#include struct perf_event; struct bpf_map; @@ -177,6 +179,8 @@ struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; u32 max_ctx_offset; + struct latch_tree_node ksym_tnode; + struct list_head ksym_lnode; const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; diff --git a/include/linux/filter.h b/include/linux/filter.h index c7a70e0cc3a0..0c1cc9143cb2 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -54,6 +54,12 @@ struct bpf_prog_aux; #define BPF_REG_AX MAX_BPF_REG #define MAX_BPF_JIT_REG (MAX_BPF_REG + 1) +/* As per nm, we expose JITed images as text (code) section for + * kallsyms. That way, tools like perf can find it to match + * addresses. + */ +#define BPF_SYM_ELF_TYPE 't' + /* BPF program can access up to 512 bytes of stack space. */ #define MAX_BPF_STACK 512 @@ -555,6 +561,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { set_memory_rw((unsigned long)fp, fp->pages); } + +static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) +{ + set_memory_rw((unsigned long)hdr, hdr->pages); +} #else static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { @@ -563,8 +574,21 @@ static inline void bpf_prog_lock_ro(struct bpf_prog *fp) static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { } + +static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) +{ +} #endif /* CONFIG_DEBUG_SET_MODULE_RONX */ +static inline struct bpf_binary_header * +bpf_jit_binary_hdr(const struct bpf_prog *fp) +{ + unsigned long real_start = (unsigned long)fp->bpf_func; + unsigned long addr = real_start & PAGE_MASK; + + return (void *)addr; +} + int sk_filter_trim_cap(struct sock *sk, struct sk_buff *skb, unsigned int cap); static inline int sk_filter(struct sock *sk, struct sk_buff *skb) { @@ -617,6 +641,7 @@ void bpf_warn_invalid_xdp_action(u32 act); #ifdef CONFIG_BPF_JIT extern int bpf_jit_enable; extern int bpf_jit_harden; +extern int bpf_jit_kallsyms; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); @@ -651,6 +676,11 @@ static inline bool bpf_jit_is_ebpf(void) # endif } +static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) +{ + return fp->jited && bpf_jit_is_ebpf(); +} + static inline bool bpf_jit_blinding_enabled(void) { /* These are the prerequisites, should someone ever have the @@ -668,11 +698,91 @@ static inline bool bpf_jit_blinding_enabled(void) return true; } -#else + +static inline bool bpf_jit_kallsyms_enabled(void) +{ + /* There are a couple of corner cases where kallsyms should + * not be enabled f.e. on hardening. + */ + if (bpf_jit_harden) + return false; + if (!bpf_jit_kallsyms) + return false; + if (bpf_jit_kallsyms == 1) + return true; + + return false; +} + +const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym); +bool is_bpf_text_address(unsigned long addr); +int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *sym); + +static inline const char * +bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + const char *ret = __bpf_address_lookup(addr, size, off, sym); + + if (ret && modname) + *modname = NULL; + return ret; +} + +void bpf_prog_kallsyms_add(struct bpf_prog *fp); +void bpf_prog_kallsyms_del(struct bpf_prog *fp); + +#else /* CONFIG_BPF_JIT */ + +static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) +{ + return false; +} + static inline void bpf_jit_free(struct bpf_prog *fp) { bpf_prog_unlock_free(fp); } + +static inline bool bpf_jit_kallsyms_enabled(void) +{ + return false; +} + +static inline const char * +__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) +{ + return NULL; +} + +static inline bool is_bpf_text_address(unsigned long addr) +{ + return false; +} + +static inline int bpf_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *sym) +{ + return -ERANGE; +} + +static inline const char * +bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + return NULL; +} + +static inline void bpf_prog_kallsyms_add(struct bpf_prog *fp) +{ +} + +static inline void bpf_prog_kallsyms_del(struct bpf_prog *fp) +{ +} #endif /* CONFIG_BPF_JIT */ #define BPF_ANC BIT(15) diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2831ba1e71c1..f45827e205d3 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -28,6 +28,9 @@ #include #include #include +#include +#include +#include #include @@ -95,6 +98,8 @@ struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) fp->aux = aux; fp->aux->prog = fp; + INIT_LIST_HEAD_RCU(&fp->aux->ksym_lnode); + return fp; } EXPORT_SYMBOL_GPL(bpf_prog_alloc); @@ -290,6 +295,206 @@ struct bpf_prog *bpf_patch_insn_single(struct bpf_prog *prog, u32 off, } #ifdef CONFIG_BPF_JIT +static __always_inline void +bpf_get_prog_addr_region(const struct bpf_prog *prog, + unsigned long *symbol_start, + unsigned long *symbol_end) +{ + const struct bpf_binary_header *hdr = bpf_jit_binary_hdr(prog); + unsigned long addr = (unsigned long)hdr; + + WARN_ON_ONCE(!bpf_prog_ebpf_jited(prog)); + + *symbol_start = addr; + *symbol_end = addr + hdr->pages * PAGE_SIZE; +} + +static void bpf_get_prog_name(const struct bpf_prog *prog, char *sym) +{ + BUILD_BUG_ON(sizeof("bpf_prog_") + + sizeof(prog->tag) * 2 + 1 > KSYM_NAME_LEN); + + sym += snprintf(sym, KSYM_NAME_LEN, "bpf_prog_"); + sym = bin2hex(sym, prog->tag, sizeof(prog->tag)); + *sym = 0; +} + +static __always_inline unsigned long +bpf_get_prog_addr_start(struct latch_tree_node *n) +{ + unsigned long symbol_start, symbol_end; + const struct bpf_prog_aux *aux; + + aux = container_of(n, struct bpf_prog_aux, ksym_tnode); + bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); + + return symbol_start; +} + +static __always_inline bool bpf_tree_less(struct latch_tree_node *a, + struct latch_tree_node *b) +{ + return bpf_get_prog_addr_start(a) < bpf_get_prog_addr_start(b); +} + +static __always_inline int bpf_tree_comp(void *key, struct latch_tree_node *n) +{ + unsigned long val = (unsigned long)key; + unsigned long symbol_start, symbol_end; + const struct bpf_prog_aux *aux; + + aux = container_of(n, struct bpf_prog_aux, ksym_tnode); + bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); + + if (val < symbol_start) + return -1; + if (val >= symbol_end) + return 1; + + return 0; +} + +static const struct latch_tree_ops bpf_tree_ops = { + .less = bpf_tree_less, + .comp = bpf_tree_comp, +}; + +static DEFINE_SPINLOCK(bpf_lock); +static LIST_HEAD(bpf_kallsyms); +static struct latch_tree_root bpf_tree __cacheline_aligned; + +int bpf_jit_kallsyms __read_mostly; + +static void bpf_prog_ksym_node_add(struct bpf_prog_aux *aux) +{ + WARN_ON_ONCE(!list_empty(&aux->ksym_lnode)); + list_add_tail_rcu(&aux->ksym_lnode, &bpf_kallsyms); + latch_tree_insert(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); +} + +static void bpf_prog_ksym_node_del(struct bpf_prog_aux *aux) +{ + if (list_empty(&aux->ksym_lnode)) + return; + + latch_tree_erase(&aux->ksym_tnode, &bpf_tree, &bpf_tree_ops); + list_del_rcu(&aux->ksym_lnode); +} + +static bool bpf_prog_kallsyms_candidate(const struct bpf_prog *fp) +{ + return fp->jited && !bpf_prog_was_classic(fp); +} + +static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) +{ + return list_empty(&fp->aux->ksym_lnode) || + fp->aux->ksym_lnode.prev == LIST_POISON2; +} + +void bpf_prog_kallsyms_add(struct bpf_prog *fp) +{ + unsigned long flags; + + if (!bpf_prog_kallsyms_candidate(fp) || + !capable(CAP_SYS_ADMIN)) + return; + + spin_lock_irqsave(&bpf_lock, flags); + bpf_prog_ksym_node_add(fp->aux); + spin_unlock_irqrestore(&bpf_lock, flags); +} + +void bpf_prog_kallsyms_del(struct bpf_prog *fp) +{ + unsigned long flags; + + if (!bpf_prog_kallsyms_candidate(fp)) + return; + + spin_lock_irqsave(&bpf_lock, flags); + bpf_prog_ksym_node_del(fp->aux); + spin_unlock_irqrestore(&bpf_lock, flags); +} + +static struct bpf_prog *bpf_prog_kallsyms_find(unsigned long addr) +{ + struct latch_tree_node *n; + + if (!bpf_jit_kallsyms_enabled()) + return NULL; + + n = latch_tree_find((void *)addr, &bpf_tree, &bpf_tree_ops); + return n ? + container_of(n, struct bpf_prog_aux, ksym_tnode)->prog : + NULL; +} + +const char *__bpf_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char *sym) +{ + unsigned long symbol_start, symbol_end; + struct bpf_prog *prog; + char *ret = NULL; + + rcu_read_lock(); + prog = bpf_prog_kallsyms_find(addr); + if (prog) { + bpf_get_prog_addr_region(prog, &symbol_start, &symbol_end); + bpf_get_prog_name(prog, sym); + + ret = sym; + if (size) + *size = symbol_end - symbol_start; + if (off) + *off = addr - symbol_start; + } + rcu_read_unlock(); + + return ret; +} + +bool is_bpf_text_address(unsigned long addr) +{ + bool ret; + + rcu_read_lock(); + ret = bpf_prog_kallsyms_find(addr) != NULL; + rcu_read_unlock(); + + return ret; +} + +int bpf_get_kallsym(unsigned int symnum, unsigned long *value, char *type, + char *sym) +{ + unsigned long symbol_start, symbol_end; + struct bpf_prog_aux *aux; + unsigned int it = 0; + int ret = -ERANGE; + + if (!bpf_jit_kallsyms_enabled()) + return ret; + + rcu_read_lock(); + list_for_each_entry_rcu(aux, &bpf_kallsyms, ksym_lnode) { + if (it++ != symnum) + continue; + + bpf_get_prog_addr_region(aux->prog, &symbol_start, &symbol_end); + bpf_get_prog_name(aux->prog, sym); + + *value = symbol_start; + *type = BPF_SYM_ELF_TYPE; + + ret = 0; + break; + } + rcu_read_unlock(); + + return ret; +} + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -326,6 +531,24 @@ void bpf_jit_binary_free(struct bpf_binary_header *hdr) module_memfree(hdr); } +/* This symbol is only overridden by archs that have different + * requirements than the usual eBPF JITs, f.e. when they only + * implement cBPF JIT, do not set images read-only, etc. + */ +void __weak bpf_jit_free(struct bpf_prog *fp) +{ + if (fp->jited) { + struct bpf_binary_header *hdr = bpf_jit_binary_hdr(fp); + + bpf_jit_binary_unlock_ro(hdr); + bpf_jit_binary_free(hdr); + + WARN_ON_ONCE(!bpf_prog_kallsyms_verify_off(fp)); + } + + bpf_prog_unlock_free(fp); +} + int bpf_jit_harden __read_mostly; static int bpf_jit_blind_insn(const struct bpf_insn *from, diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f74ca17af64a..461eb1e66a0f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -707,6 +707,7 @@ void bpf_prog_put(struct bpf_prog *prog) { if (atomic_dec_and_test(&prog->aux->refcnt)) { trace_bpf_prog_put_rcu(prog); + bpf_prog_kallsyms_del(prog); call_rcu(&prog->aux->rcu, __bpf_prog_put_rcu); } } @@ -903,6 +904,7 @@ static int bpf_prog_load(union bpf_attr *attr) /* failed to allocate fd */ goto free_used_maps; + bpf_prog_kallsyms_add(prog); trace_bpf_prog_load(prog, err); return err; diff --git a/kernel/extable.c b/kernel/extable.c index e3beec4a2339..bd82117ad424 100644 --- a/kernel/extable.c +++ b/kernel/extable.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include @@ -104,6 +105,8 @@ int __kernel_text_address(unsigned long addr) return 1; if (is_ftrace_trampoline(addr)) return 1; + if (is_bpf_text_address(addr)) + return 1; /* * There might be init symbols in saved stacktraces. * Give those symbols a chance to be printed in @@ -123,7 +126,11 @@ int kernel_text_address(unsigned long addr) return 1; if (is_module_text_address(addr)) return 1; - return is_ftrace_trampoline(addr); + if (is_ftrace_trampoline(addr)) + return 1; + if (is_bpf_text_address(addr)) + return 1; + return 0; } /* diff --git a/kernel/kallsyms.c b/kernel/kallsyms.c index fafd1a3ef0da..6a3b249a2ae1 100644 --- a/kernel/kallsyms.c +++ b/kernel/kallsyms.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -300,10 +301,11 @@ int kallsyms_lookup_size_offset(unsigned long addr, unsigned long *symbolsize, unsigned long *offset) { char namebuf[KSYM_NAME_LEN]; + if (is_ksym_addr(addr)) return !!get_symbol_pos(addr, symbolsize, offset); - - return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf); + return !!module_address_lookup(addr, symbolsize, offset, NULL, namebuf) || + !!__bpf_address_lookup(addr, symbolsize, offset, namebuf); } /* @@ -318,6 +320,8 @@ const char *kallsyms_lookup(unsigned long addr, unsigned long *offset, char **modname, char *namebuf) { + const char *ret; + namebuf[KSYM_NAME_LEN - 1] = 0; namebuf[0] = 0; @@ -333,9 +337,13 @@ const char *kallsyms_lookup(unsigned long addr, return namebuf; } - /* See if it's in a module. */ - return module_address_lookup(addr, symbolsize, offset, modname, - namebuf); + /* See if it's in a module or a BPF JITed image. */ + ret = module_address_lookup(addr, symbolsize, offset, + modname, namebuf); + if (!ret) + ret = bpf_address_lookup(addr, symbolsize, + offset, modname, namebuf); + return ret; } int lookup_symbol_name(unsigned long addr, char *symname) @@ -471,6 +479,7 @@ EXPORT_SYMBOL(__print_symbol); /* To avoid using get_symbol_offset for every symbol, we carry prefix along. */ struct kallsym_iter { loff_t pos; + loff_t pos_mod_end; unsigned long value; unsigned int nameoff; /* If iterating in core kernel symbols. */ char type; @@ -481,13 +490,27 @@ struct kallsym_iter { static int get_ksymbol_mod(struct kallsym_iter *iter) { - if (module_get_kallsym(iter->pos - kallsyms_num_syms, &iter->value, - &iter->type, iter->name, iter->module_name, - &iter->exported) < 0) + int ret = module_get_kallsym(iter->pos - kallsyms_num_syms, + &iter->value, &iter->type, + iter->name, iter->module_name, + &iter->exported); + if (ret < 0) { + iter->pos_mod_end = iter->pos; return 0; + } + return 1; } +static int get_ksymbol_bpf(struct kallsym_iter *iter) +{ + iter->module_name[0] = '\0'; + iter->exported = 0; + return bpf_get_kallsym(iter->pos - iter->pos_mod_end, + &iter->value, &iter->type, + iter->name) < 0 ? 0 : 1; +} + /* Returns space to next name. */ static unsigned long get_ksymbol_core(struct kallsym_iter *iter) { @@ -508,16 +531,30 @@ static void reset_iter(struct kallsym_iter *iter, loff_t new_pos) iter->name[0] = '\0'; iter->nameoff = get_symbol_offset(new_pos); iter->pos = new_pos; + if (new_pos == 0) + iter->pos_mod_end = 0; +} + +static int update_iter_mod(struct kallsym_iter *iter, loff_t pos) +{ + iter->pos = pos; + + if (iter->pos_mod_end > 0 && + iter->pos_mod_end < iter->pos) + return get_ksymbol_bpf(iter); + + if (!get_ksymbol_mod(iter)) + return get_ksymbol_bpf(iter); + + return 1; } /* Returns false if pos at or past end of file. */ static int update_iter(struct kallsym_iter *iter, loff_t pos) { /* Module symbols can be accessed randomly. */ - if (pos >= kallsyms_num_syms) { - iter->pos = pos; - return get_ksymbol_mod(iter); - } + if (pos >= kallsyms_num_syms) + return update_iter_mod(iter, pos); /* If we're not on the desired position, reset to new position. */ if (pos != iter->pos) diff --git a/net/Kconfig b/net/Kconfig index f19c0c3b9589..102f781a0131 100644 --- a/net/Kconfig +++ b/net/Kconfig @@ -297,7 +297,8 @@ config BPF_JIT Note, admin should enable this feature changing: /proc/sys/net/core/bpf_jit_enable - /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_harden (optional) + /proc/sys/net/core/bpf_jit_kallsyms (optional) config NET_FLOW_LIMIT bool diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index eaa72eb0399c..4ead336e14ea 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -334,6 +334,13 @@ static struct ctl_table net_core_table[] = { .mode = 0600, .proc_handler = proc_dointvec, }, + { + .procname = "bpf_jit_kallsyms", + .data = &bpf_jit_kallsyms, + .maxlen = sizeof(int), + .mode = 0600, + .proc_handler = proc_dointvec, + }, # endif #endif { -- cgit v1.2.3 From 06ce521af9558814b8606c0476c54497cf83a653 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 24 Jan 2017 11:56:21 +0100 Subject: kvm: fix page struct leak in handle_vmon handle_vmon gets a reference on VMXON region page, but does not release it. Release the reference. Found by syzkaller; based on a patch by Dmitry. Reported-by: Dmitry Vyukov Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 9856b73a21ad..d13073c841ff 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -6996,13 +6996,18 @@ static int nested_vmx_check_vmptr(struct kvm_vcpu *vcpu, int exit_reason, } page = nested_get_page(vcpu, vmptr); - if (page == NULL || - *(u32 *)kmap(page) != VMCS12_REVISION) { + if (page == NULL) { nested_vmx_failInvalid(vcpu); + return kvm_skip_emulated_instruction(vcpu); + } + if (*(u32 *)kmap(page) != VMCS12_REVISION) { kunmap(page); + nested_release_page_clean(page); + nested_vmx_failInvalid(vcpu); return kvm_skip_emulated_instruction(vcpu); } kunmap(page); + nested_release_page_clean(page); vmx->nested.vmxon_ptr = vmptr; break; case EXIT_REASON_VMCLEAR: -- cgit v1.2.3 From 4f53ab14285802b298261f8b52af322039d1dfd0 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 20 Feb 2017 08:56:09 -0800 Subject: x86/asm: Define the kernel TSS limit in a macro Rather than open-coding the kernel TSS limit in set_tss_desc(), make it a real macro near the TSS layout definition. This is purely a cleanup. Cc: Thomas Garnier Cc: Jim Mattson Signed-off-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/desc.h | 10 +--------- arch/x86/include/asm/processor.h | 10 ++++++++++ 2 files changed, 11 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 12080d87da3b..2e781bcc5e12 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -177,16 +177,8 @@ static inline void __set_tss_desc(unsigned cpu, unsigned int entry, void *addr) struct desc_struct *d = get_cpu_gdt_table(cpu); tss_desc tss; - /* - * sizeof(unsigned long) coming from an extra "long" at the end - * of the iobitmap. See tss_struct definition in processor.h - * - * -1? seg base+limit should be pointing to the address of the - * last valid byte - */ set_tssldt_descriptor(&tss, (unsigned long)addr, DESC_TSS, - IO_BITMAP_OFFSET + IO_BITMAP_BYTES + - sizeof(unsigned long) - 1); + __KERNEL_TSS_LIMIT); write_gdt_entry(d, entry, &tss, DESC_TSS); } diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 1be64da0384e..f8f1b7537abe 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -341,6 +341,16 @@ struct tss_struct { DECLARE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss); +/* + * sizeof(unsigned long) coming from an extra "long" at the end + * of the iobitmap. + * + * -1? seg base+limit should be pointing to the address of the + * last valid byte + */ +#define __KERNEL_TSS_LIMIT \ + (IO_BITMAP_OFFSET + IO_BITMAP_BYTES + sizeof(unsigned long) - 1) + #ifdef CONFIG_X86_32 DECLARE_PER_CPU(unsigned long, cpu_current_top_of_stack); #endif -- cgit v1.2.3 From e0c230634af99967da79a6ed1faecc720fb623ca Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 20 Feb 2017 08:56:10 -0800 Subject: x86/kvm/vmx: Don't fetch the TSS base from the GDT MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The current CPU's TSS base is a foregone conclusion, so there's no need to parse it out of the segment tables. This should save a couple cycles (as STR is surely microcoded and poorly optimized) but, more importantly, it's a cleanup and it means that segment_base() will never be called on 64-bit kernels. Cc: Thomas Garnier Cc: Jim Mattson Cc: Radim Krčmář Cc: Paolo Bonzini Signed-off-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 14 ++++---------- 1 file changed, 4 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index d13073c841ff..3dbbf4ec471f 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2086,13 +2086,6 @@ static unsigned long segment_base(u16 selector) return v; } -static inline unsigned long kvm_read_tr_base(void) -{ - u16 tr; - asm("str %0" : "=g"(tr)); - return segment_base(tr); -} - static void vmx_save_host_state(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); @@ -2292,10 +2285,11 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) /* * Linux uses per-cpu TSS and GDT, so set these when switching - * processors. + * processors. See 22.2.4. */ - vmcs_writel(HOST_TR_BASE, kvm_read_tr_base()); /* 22.2.4 */ - vmcs_writel(HOST_GDTR_BASE, gdt->address); /* 22.2.4 */ + vmcs_writel(HOST_TR_BASE, + (unsigned long)this_cpu_ptr(&cpu_tss)); + vmcs_writel(HOST_GDTR_BASE, gdt->address); rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ -- cgit v1.2.3 From e28baeadcf0d657c6b6e849ae1b4faccb4faf326 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 20 Feb 2017 08:56:11 -0800 Subject: x86/kvm/vmx: Get rid of segment_base() on 64-bit kernels MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit It was a bit buggy (it didn't list all segment types that needed 64-bit fixups), but the bug was irrelevant because it wasn't called in any interesting context on 64-bit kernels and was only used for data segents on 32-bit kernels. To avoid confusion, make it explicitly 32-bit only. Cc: Thomas Garnier Cc: Jim Mattson Cc: Radim Krčmář Cc: Paolo Bonzini Signed-off-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3dbbf4ec471f..3ddd72303fe4 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2057,6 +2057,12 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) } } +#ifdef CONFIG_X86_32 +/* + * On 32-bit kernels, VM exits still load the FS and GS bases from the + * VMCS rather than the segment table. KVM uses this helper to figure + * out the current bases to poke them into the VMCS before entry. + */ static unsigned long segment_base(u16 selector) { struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); @@ -2079,12 +2085,9 @@ static unsigned long segment_base(u16 selector) } d = (struct desc_struct *)(table_base + (selector & ~7)); v = get_desc_base(d); -#ifdef CONFIG_X86_64 - if (d->s == 0 && (d->type == 2 || d->type == 9 || d->type == 11)) - v |= ((unsigned long)((struct ldttss_desc64 *)d)->base3) << 32; -#endif return v; } +#endif static void vmx_save_host_state(struct kvm_vcpu *vcpu) { -- cgit v1.2.3 From 8c2e41f7ae1234c192ef497472ad306227c77c03 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 20 Feb 2017 08:56:12 -0800 Subject: x86/kvm/vmx: Simplify segment_base() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Use actual pointer types for pointers (instead of unsigned long) and replace hardcoded constants with the appropriate self-documenting macros. The function is still a bit messy, but this seems a lot better than before to me. This is mostly borrowed from a patch by Thomas Garnier. Cc: Thomas Garnier Cc: Jim Mattson Cc: Radim Krčmář Cc: Paolo Bonzini Signed-off-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3ddd72303fe4..2dd94cf597cc 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2067,24 +2067,23 @@ static unsigned long segment_base(u16 selector) { struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); struct desc_struct *d; - unsigned long table_base; + struct desc_struct *table; unsigned long v; - if (!(selector & ~3)) + if (!(selector & ~SEGMENT_RPL_MASK)) return 0; - table_base = gdt->address; + table = (struct desc_struct *)gdt->address; - if (selector & 4) { /* from ldt */ + if ((selector & SEGMENT_TI_MASK) == SEGMENT_LDT) { u16 ldt_selector = kvm_read_ldt(); - if (!(ldt_selector & ~3)) + if (!(ldt_selector & ~SEGMENT_RPL_MASK)) return 0; - table_base = segment_base(ldt_selector); + table = (struct desc_struct *)segment_base(ldt_selector); } - d = (struct desc_struct *)(table_base + (selector & ~7)); - v = get_desc_base(d); + v = get_desc_base(&table[selector >> 3]); return v; } #endif -- cgit v1.2.3 From d3273deac9c0cdae32eb46f928487433eaa37f87 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 20 Feb 2017 08:56:13 -0800 Subject: x86/asm/64: Drop __cacheline_aligned from struct x86_hw_tss Historically, the entire TSS + io bitmap structure was cacheline aligned, but commit ca241c75037b ("x86: unify tss_struct") changed it (presumably inadvertently) so that the fixed-layout hardware part is cacheline-aligned and the io bitmap is after the padding. This wastes 24 bytes (the hardware part should be 104 bytes, but this pads it to 128 bytes) and, serves no purpose, and causes sizeof(struct x86_hw_tss) to have a confusing value. Drop the pointless alignment. Signed-off-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/processor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index f8f1b7537abe..1879cdf2b6ae 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -303,7 +303,7 @@ struct x86_hw_tss { u16 reserved5; u16 io_bitmap_base; -} __attribute__((packed)) ____cacheline_aligned; +} __attribute__((packed)); #endif /* -- cgit v1.2.3 From b7ffc44d5b2ea163899d09289ca7743d5c32e926 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Mon, 20 Feb 2017 08:56:14 -0800 Subject: x86/kvm/vmx: Defer TR reload after VM exit Intel's VMX is daft and resets the hidden TSS limit register to 0x67 on VMX reload, and the 0x67 is not configurable. KVM currently reloads TR using the LTR instruction on every exit, but this is quite slow because LTR is serializing. The 0x67 limit is entirely harmless unless ioperm() is in use, so defer the reload until a task using ioperm() is actually running. Here's some poorly done benchmarking using kvm-unit-tests: Before: cpuid 1313 vmcall 1195 mov_from_cr8 11 mov_to_cr8 17 inl_from_pmtimer 6770 inl_from_qemu 6856 inl_from_kernel 2435 outl_to_kernel 1402 After: cpuid 1291 vmcall 1181 mov_from_cr8 11 mov_to_cr8 16 inl_from_pmtimer 6457 inl_from_qemu 6209 inl_from_kernel 2339 outl_to_kernel 1391 Signed-off-by: Andy Lutomirski [Force-reload TR in invalidate_tss_limit. - Paolo] Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/desc.h | 48 +++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kernel/ioport.c | 5 +++++ arch/x86/kernel/process.c | 10 ++++++++++ arch/x86/kvm/vmx.c | 23 +++++++++------------- 4 files changed, 72 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index 2e781bcc5e12..cb8f9149f6c8 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -205,6 +205,54 @@ static inline void native_load_tr_desc(void) asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); } +static inline void force_reload_TR(void) +{ + struct desc_struct *d = get_cpu_gdt_table(smp_processor_id()); + tss_desc tss; + + memcpy(&tss, &d[GDT_ENTRY_TSS], sizeof(tss_desc)); + + /* + * LTR requires an available TSS, and the TSS is currently + * busy. Make it be available so that LTR will work. + */ + tss.type = DESC_TSS; + write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS); + + load_TR_desc(); +} + +DECLARE_PER_CPU(bool, need_tr_refresh); + +static inline void refresh_TR(void) +{ + DEBUG_LOCKS_WARN_ON(preemptible()); + + if (unlikely(this_cpu_read(need_tr_refresh))) { + force_reload_TR(); + this_cpu_write(need_tr_refresh, false); + } +} + +/* + * If you do something evil that corrupts the cached TSS limit (I'm looking + * at you, VMX exits), call this function. + * + * The optimization here is that the TSS limit only matters for Linux if the + * IO bitmap is in use. If the TSS limit gets forced to its minimum value, + * everything works except that IO bitmap will be ignored and all CPL 3 IO + * instructions will #GP, which is exactly what we want for normal tasks. + */ +static inline void invalidate_tss_limit(void) +{ + DEBUG_LOCKS_WARN_ON(preemptible()); + + if (unlikely(test_thread_flag(TIF_IO_BITMAP))) + force_reload_TR(); + else + this_cpu_write(need_tr_refresh, true); +} + static inline void native_load_gdt(const struct desc_ptr *dtr) { asm volatile("lgdt %0"::"m" (*dtr)); diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index 589b3193f102..b01bc8517450 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -16,6 +16,7 @@ #include #include #include +#include /* * this changes the io permissions bitmap in the current task. @@ -45,6 +46,10 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) memset(bitmap, 0xff, IO_BITMAP_BYTES); t->io_bitmap_ptr = bitmap; set_thread_flag(TIF_IO_BITMAP); + + preempt_disable(); + refresh_TR(); + preempt_enable(); } /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index b615a1113f58..7780efa635b9 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -32,6 +32,7 @@ #include #include #include +#include /* * per-CPU TSS segments. Threads are completely 'soft' on Linux, @@ -64,6 +65,9 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { }; EXPORT_PER_CPU_SYMBOL(cpu_tss); +DEFINE_PER_CPU(bool, need_tr_refresh); +EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh); + /* * this gets called so that we can store lazy state into memory and copy the * current task into the new thread. @@ -209,6 +213,12 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, */ memcpy(tss->io_bitmap, next->io_bitmap_ptr, max(prev->io_bitmap_max, next->io_bitmap_max)); + + /* + * Make sure that the TSS limit is correct for the CPU + * to notice the IO bitmap. + */ + refresh_TR(); } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { /* * Clear any possible leftover bits: diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 2dd94cf597cc..acf6013a0caf 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1990,19 +1990,6 @@ static void add_atomic_switch_msr(struct vcpu_vmx *vmx, unsigned msr, m->host[i].value = host_val; } -static void reload_tss(void) -{ - /* - * VT restores TR but not its size. Useless. - */ - struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); - struct desc_struct *descs; - - descs = (void *)gdt->address; - descs[GDT_ENTRY_TSS].type = 9; /* available TSS */ - load_TR_desc(); -} - static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) { u64 guest_efer = vmx->vcpu.arch.efer; @@ -2172,7 +2159,7 @@ static void __vmx_load_host_state(struct vcpu_vmx *vmx) loadsegment(es, vmx->host_state.es_sel); } #endif - reload_tss(); + invalidate_tss_limit(); #ifdef CONFIG_X86_64 wrmsrl(MSR_KERNEL_GS_BASE, vmx->msr_host_kernel_gs_base); #endif @@ -2293,6 +2280,14 @@ static void vmx_vcpu_load(struct kvm_vcpu *vcpu, int cpu) (unsigned long)this_cpu_ptr(&cpu_tss)); vmcs_writel(HOST_GDTR_BASE, gdt->address); + /* + * VM exits change the host TR limit to 0x67 after a VM + * exit. This is okay, since 0x67 covers everything except + * the IO bitmap and have have code to handle the IO bitmap + * being lost after a VM exit. + */ + BUILD_BUG_ON(IO_BITMAP_OFFSET - 1 != 0x67); + rdmsrl(MSR_IA32_SYSENTER_ESP, sysenter_esp); vmcs_writel(HOST_IA32_SYSENTER_ESP, sysenter_esp); /* 22.2.3 */ -- cgit v1.2.3 From 96794e4ed4d758272c486e1529e431efb7045265 Mon Sep 17 00:00:00 2001 From: Chao Peng Date: Tue, 21 Feb 2017 03:50:01 -0500 Subject: KVM: VMX: use correct vmcs_read/write for guest segment selector/base Guest segment selector is 16 bit field and guest segment base is natural width field. Fix two incorrect invocations accordingly. Without this patch, build fails when aggressive inlining is used with ICC. Cc: stable@vger.kernel.org Signed-off-by: Chao Peng Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index acf6013a0caf..ef4ba71dbb66 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3905,7 +3905,7 @@ static void fix_rmode_seg(int seg, struct kvm_segment *save) } vmcs_write16(sf->selector, var.selector); - vmcs_write32(sf->base, var.base); + vmcs_writel(sf->base, var.base); vmcs_write32(sf->limit, var.limit); vmcs_write32(sf->ar_bytes, vmx_segment_access_rights(&var)); } @@ -8270,7 +8270,7 @@ static void kvm_flush_pml_buffers(struct kvm *kvm) static void vmx_dump_sel(char *name, uint32_t sel) { pr_err("%s sel=0x%04x, attr=0x%05x, limit=0x%08x, base=0x%016lx\n", - name, vmcs_read32(sel), + name, vmcs_read16(sel), vmcs_read32(sel + GUEST_ES_AR_BYTES - GUEST_ES_SELECTOR), vmcs_read32(sel + GUEST_ES_LIMIT - GUEST_ES_SELECTOR), vmcs_readl(sel + GUEST_ES_BASE - GUEST_ES_SELECTOR)); -- cgit v1.2.3 From 6c62985d576c8a816f528c39204207b9f449d923 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 20 Feb 2017 13:36:03 -0500 Subject: x86/paravirt: Change vcp_is_preempted() arg type to long The cpu argument in the function prototype of vcpu_is_preempted() is changed from int to long. That makes it easier to provide a better optimized assembly version of that function. For Xen, vcpu_is_preempted(long) calls xen_vcpu_stolen(int), the downcast from long to int is not a problem as vCPU number won't exceed 32 bits. Signed-off-by: Waiman Long Acked-by: Peter Zijlstra (Intel) Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/paravirt.h | 2 +- arch/x86/include/asm/qspinlock.h | 2 +- arch/x86/kernel/kvm.c | 2 +- arch/x86/kernel/paravirt-spinlocks.c | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index 1eea6ca40694..f75fbfe550f2 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -673,7 +673,7 @@ static __always_inline void pv_kick(int cpu) PVOP_VCALL1(pv_lock_ops.kick, cpu); } -static __always_inline bool pv_vcpu_is_preempted(int cpu) +static __always_inline bool pv_vcpu_is_preempted(long cpu) { return PVOP_CALLEE1(bool, pv_lock_ops.vcpu_is_preempted, cpu); } diff --git a/arch/x86/include/asm/qspinlock.h b/arch/x86/include/asm/qspinlock.h index c343ab52579f..48a706f641f2 100644 --- a/arch/x86/include/asm/qspinlock.h +++ b/arch/x86/include/asm/qspinlock.h @@ -34,7 +34,7 @@ static inline void queued_spin_unlock(struct qspinlock *lock) } #define vcpu_is_preempted vcpu_is_preempted -static inline bool vcpu_is_preempted(int cpu) +static inline bool vcpu_is_preempted(long cpu) { return pv_vcpu_is_preempted(cpu); } diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 36bc66416021..334173d2665a 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -589,7 +589,7 @@ out: local_irq_restore(flags); } -__visible bool __kvm_vcpu_is_preempted(int cpu) +__visible bool __kvm_vcpu_is_preempted(long cpu) { struct kvm_steal_time *src = &per_cpu(steal_time, cpu); diff --git a/arch/x86/kernel/paravirt-spinlocks.c b/arch/x86/kernel/paravirt-spinlocks.c index 6d4bf812af45..8caa8a18472b 100644 --- a/arch/x86/kernel/paravirt-spinlocks.c +++ b/arch/x86/kernel/paravirt-spinlocks.c @@ -20,7 +20,7 @@ bool pv_is_native_spin_unlock(void) __raw_callee_save___native_queued_spin_unlock; } -__visible bool __native_vcpu_is_preempted(int cpu) +__visible bool __native_vcpu_is_preempted(long cpu) { return false; } -- cgit v1.2.3 From dd0fd8bca1850ddadf5d33a9ed28f3707cd98ac7 Mon Sep 17 00:00:00 2001 From: Waiman Long Date: Mon, 20 Feb 2017 13:36:04 -0500 Subject: x86/kvm: Provide optimized version of vcpu_is_preempted() for x86-64 It was found when running fio sequential write test with a XFS ramdisk on a KVM guest running on a 2-socket x86-64 system, the %CPU times as reported by perf were as follows: 69.75% 0.59% fio [k] down_write 69.15% 0.01% fio [k] call_rwsem_down_write_failed 67.12% 1.12% fio [k] rwsem_down_write_failed 63.48% 52.77% fio [k] osq_lock 9.46% 7.88% fio [k] __raw_callee_save___kvm_vcpu_is_preempt 3.93% 3.93% fio [k] __kvm_vcpu_is_preempted Making vcpu_is_preempted() a callee-save function has a relatively high cost on x86-64 primarily due to at least one more cacheline of data access from the saving and restoring of registers (8 of them) to and from stack as well as one more level of function call. To reduce this performance overhead, an optimized assembly version of the the __raw_callee_save___kvm_vcpu_is_preempt() function is provided for x86-64. With this patch applied on a KVM guest on a 2-socket 16-core 32-thread system with 16 parallel jobs (8 on each socket), the aggregrate bandwidth of the fio test on an XFS ramdisk were as follows: I/O Type w/o patch with patch -------- --------- ---------- random read 8141.2 MB/s 8497.1 MB/s seq read 8229.4 MB/s 8304.2 MB/s random write 1675.5 MB/s 1701.5 MB/s seq write 1681.3 MB/s 1699.9 MB/s There are some increases in the aggregated bandwidth because of the patch. The perf data now became: 70.78% 0.58% fio [k] down_write 70.20% 0.01% fio [k] call_rwsem_down_write_failed 69.70% 1.17% fio [k] rwsem_down_write_failed 59.91% 55.42% fio [k] osq_lock 10.14% 10.14% fio [k] __kvm_vcpu_is_preempted The assembly code was verified by using a test kernel module to compare the output of C __kvm_vcpu_is_preempted() and that of assembly __raw_callee_save___kvm_vcpu_is_preempt() to verify that they matched. Suggested-by: Peter Zijlstra Signed-off-by: Waiman Long Acked-by: Peter Zijlstra (Intel) Signed-off-by: Paolo Bonzini --- arch/x86/kernel/asm-offsets_64.c | 9 +++++++++ arch/x86/kernel/kvm.c | 24 ++++++++++++++++++++++++ 2 files changed, 33 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/asm-offsets_64.c b/arch/x86/kernel/asm-offsets_64.c index 210927ee2e74..99332f550c48 100644 --- a/arch/x86/kernel/asm-offsets_64.c +++ b/arch/x86/kernel/asm-offsets_64.c @@ -13,6 +13,10 @@ static char syscalls_ia32[] = { #include }; +#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS) +#include +#endif + int main(void) { #ifdef CONFIG_PARAVIRT @@ -22,6 +26,11 @@ int main(void) BLANK(); #endif +#if defined(CONFIG_KVM_GUEST) && defined(CONFIG_PARAVIRT_SPINLOCKS) + OFFSET(KVM_STEAL_TIME_preempted, kvm_steal_time, preempted); + BLANK(); +#endif + #define ENTRY(entry) OFFSET(pt_regs_ ## entry, pt_regs, entry) ENTRY(bx); ENTRY(cx); diff --git a/arch/x86/kernel/kvm.c b/arch/x86/kernel/kvm.c index 334173d2665a..d05797be2f64 100644 --- a/arch/x86/kernel/kvm.c +++ b/arch/x86/kernel/kvm.c @@ -589,6 +589,7 @@ out: local_irq_restore(flags); } +#ifdef CONFIG_X86_32 __visible bool __kvm_vcpu_is_preempted(long cpu) { struct kvm_steal_time *src = &per_cpu(steal_time, cpu); @@ -597,6 +598,29 @@ __visible bool __kvm_vcpu_is_preempted(long cpu) } PV_CALLEE_SAVE_REGS_THUNK(__kvm_vcpu_is_preempted); +#else + +#include + +extern bool __raw_callee_save___kvm_vcpu_is_preempted(long); + +/* + * Hand-optimize version for x86-64 to avoid 8 64-bit register saving and + * restoring to/from the stack. + */ +asm( +".pushsection .text;" +".global __raw_callee_save___kvm_vcpu_is_preempted;" +".type __raw_callee_save___kvm_vcpu_is_preempted, @function;" +"__raw_callee_save___kvm_vcpu_is_preempted:" +"movq __per_cpu_offset(,%rdi,8), %rax;" +"cmpb $0, " __stringify(KVM_STEAL_TIME_preempted) "+steal_time(%rax);" +"setne %al;" +"ret;" +".popsection"); + +#endif + /* * Setup pv_lock_ops to exploit KVM_FEATURE_PV_UNHALT if present. */ -- cgit v1.2.3 From d2852a2240509e512712e25de2d0796cda435ecb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 21 Feb 2017 16:09:33 +0100 Subject: arch: add ARCH_HAS_SET_MEMORY config Currently, there's no good way to test for the presence of set_memory_ro/rw/x/nx() helpers implemented by archs such as x86, arm, arm64 and s390. There's DEBUG_SET_MODULE_RONX and DEBUG_RODATA, however both don't really reflect that: set_memory_*() are also available even when DEBUG_SET_MODULE_RONX is turned off, and DEBUG_RODATA is set by parisc, but doesn't implement above functions. Thus, add ARCH_HAS_SET_MEMORY that is selected by mentioned archs, where generic code can test against this. This also allows later on to move DEBUG_SET_MODULE_RONX out of the arch specific Kconfig to define it only once depending on ARCH_HAS_SET_MEMORY. Suggested-by: Laura Abbott Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/Kconfig | 4 ++++ arch/arm/Kconfig | 1 + arch/arm64/Kconfig | 1 + arch/s390/Kconfig | 1 + arch/x86/Kconfig | 1 + 5 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/Kconfig b/arch/Kconfig index bd04eace455c..e8ada79ec71f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -222,6 +222,10 @@ config GENERIC_SMP_IDLE_THREAD config GENERIC_IDLE_POLL_SETUP bool +# Select if arch has all set_memory_ro/rw/x/nx() functions in asm/cacheflush.h +config ARCH_HAS_SET_MEMORY + bool + # Select if arch init_task initializer is different to init/init_task.c config ARCH_INIT_TASK bool diff --git a/arch/arm/Kconfig b/arch/arm/Kconfig index 186c4c214e0a..edae056b2af0 100644 --- a/arch/arm/Kconfig +++ b/arch/arm/Kconfig @@ -4,6 +4,7 @@ config ARM select ARCH_CLOCKSOURCE_DATA select ARCH_HAS_DEVMEM_IS_ALLOWED select ARCH_HAS_ELF_RANDOMIZE + select ARCH_HAS_SET_MEMORY select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_HAVE_CUSTOM_GPIO_H select ARCH_HAS_GCOV_PROFILE_ALL diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index 111742126897..1853405a897e 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -12,6 +12,7 @@ config ARM64 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_TICK_BROADCAST if GENERIC_CLOCKEVENTS_BROADCAST select ARCH_USE_CMPXCHG_LOCKREF diff --git a/arch/s390/Kconfig b/arch/s390/Kconfig index c6722112527d..094deb1abbe7 100644 --- a/arch/s390/Kconfig +++ b/arch/s390/Kconfig @@ -72,6 +72,7 @@ config S390 select ARCH_HAS_GCOV_PROFILE_ALL select ARCH_HAS_GIGANTIC_PAGE select ARCH_HAS_KCOV + select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e487493bbd47..434dd2a1c5f2 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -53,6 +53,7 @@ config X86 select ARCH_HAS_KCOV if X86_64 select ARCH_HAS_MMIO_FLUSH select ARCH_HAS_PMEM_API if X86_64 + select ARCH_HAS_SET_MEMORY select ARCH_HAS_SG_CHAIN select ARCH_HAS_UBSAN_SANITIZE_ALL select ARCH_HAVE_NMI_SAFE_CMPXCHG -- cgit v1.2.3 From 9d876e79df6a2f364b9f2737eacd72ceb27da53a Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 21 Feb 2017 16:09:34 +0100 Subject: bpf: fix unlocking of jited image when module ronx not set Eric and Willem reported that they recently saw random crashes when JIT was in use and bisected this to 74451e66d516 ("bpf: make jited programs visible in traces"). Issue was that the consolidation part added bpf_jit_binary_unlock_ro() that would unlock previously made read-only memory back to read-write. However, DEBUG_SET_MODULE_RONX cannot be used for this to test for presence of set_memory_*() functions. We need to use ARCH_HAS_SET_MEMORY instead to fix this; also add the corresponding bpf_jit_binary_lock_ro() to filter.h. Fixes: 74451e66d516 ("bpf: make jited programs visible in traces") Reported-by: Eric Dumazet Reported-by: Willem de Bruijn Bisected-by: Eric Dumazet Signed-off-by: Daniel Borkmann Tested-by: Willem de Bruijn Signed-off-by: David S. Miller --- arch/arm64/net/bpf_jit_comp.c | 2 +- arch/s390/net/bpf_jit_comp.c | 2 +- arch/x86/net/bpf_jit_comp.c | 2 +- include/linux/filter.h | 13 +++++++++++-- 4 files changed, 14 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 05d12104d270..a785554916c0 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -898,7 +898,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) bpf_flush_icache(header, ctx.image + ctx.idx); - set_memory_ro((unsigned long)header, header->pages); + bpf_jit_binary_lock_ro(header); prog->bpf_func = (void *)ctx.image; prog->jited = 1; diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index f1d0e62ec1dd..b49c52a02087 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -1327,7 +1327,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) print_fn_code(jit.prg_buf, jit.size_prg); } if (jit.prg_buf) { - set_memory_ro((unsigned long)header, header->pages); + bpf_jit_binary_lock_ro(header); fp->bpf_func = (void *) jit.prg_buf; fp->jited = 1; } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 18a62e208826..32322ce9b405 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1165,7 +1165,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) if (image) { bpf_flush_icache(header, image + proglen); - set_memory_ro((unsigned long)header, header->pages); + bpf_jit_binary_lock_ro(header); prog->bpf_func = (void *)image; prog->jited = 1; } else { diff --git a/include/linux/filter.h b/include/linux/filter.h index 0c1cc9143cb2..0c167fdee5f7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -551,7 +551,7 @@ static inline bool bpf_prog_was_classic(const struct bpf_prog *prog) #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) -#ifdef CONFIG_DEBUG_SET_MODULE_RONX +#ifdef CONFIG_ARCH_HAS_SET_MEMORY static inline void bpf_prog_lock_ro(struct bpf_prog *fp) { set_memory_ro((unsigned long)fp, fp->pages); @@ -562,6 +562,11 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) set_memory_rw((unsigned long)fp, fp->pages); } +static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) +{ + set_memory_ro((unsigned long)hdr, hdr->pages); +} + static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { set_memory_rw((unsigned long)hdr, hdr->pages); @@ -575,10 +580,14 @@ static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) { } +static inline void bpf_jit_binary_lock_ro(struct bpf_binary_header *hdr) +{ +} + static inline void bpf_jit_binary_unlock_ro(struct bpf_binary_header *hdr) { } -#endif /* CONFIG_DEBUG_SET_MODULE_RONX */ +#endif /* CONFIG_ARCH_HAS_SET_MEMORY */ static inline struct bpf_binary_header * bpf_jit_binary_hdr(const struct bpf_prog *fp) -- cgit v1.2.3 From c74fd80f2f41d05f350bb478151021f88551afe8 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 13 Jan 2017 15:07:51 -0500 Subject: xen: do not re-use pirq number cached in pci device msi msg data Revert the main part of commit: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") That commit introduced reading the pci device's msi message data to see if a pirq was previously configured for the device's msi/msix, and re-use that pirq. At the time, that was the correct behavior. However, a later change to Qemu caused it to call into the Xen hypervisor to unmap all pirqs for a pci device, when the pci device disables its MSI/MSIX vectors; specifically the Qemu commit: c976437c7dba9c7444fb41df45468968aaa326ad ("qemu-xen: free all the pirqs for msi/msix when driver unload") Once Qemu added this pirq unmapping, it was no longer correct for the kernel to re-use the pirq number cached in the pci device msi message data. All Qemu releases since 2.1.0 contain the patch that unmaps the pirqs when the pci device disables its MSI/MSIX vectors. This bug is causing failures to initialize multiple NVMe controllers under Xen, because the NVMe driver sets up a single MSIX vector for each controller (concurrently), and then after using that to talk to the controller for some configuration data, it disables the single MSIX vector and re-configures all the MSIX vectors it needs. So the MSIX setup code tries to re-use the cached pirq from the first vector for each controller, but the hypervisor has already given away that pirq to another controller, and its initialization fails. This is discussed in more detail at: https://lists.xen.org/archives/html/xen-devel/2017-01/msg00447.html Fixes: af42b8d12f8a ("xen: fix MSI setup and teardown for PV on HVM guests") Signed-off-by: Dan Streetman Reviewed-by: Stefano Stabellini Acked-by: Konrad Rzeszutek Wilk Signed-off-by: Boris Ostrovsky --- arch/x86/pci/xen.c | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/pci/xen.c b/arch/x86/pci/xen.c index e1fb269c87af..292ab0364a89 100644 --- a/arch/x86/pci/xen.c +++ b/arch/x86/pci/xen.c @@ -234,23 +234,14 @@ static int xen_hvm_setup_msi_irqs(struct pci_dev *dev, int nvec, int type) return 1; for_each_pci_msi_entry(msidesc, dev) { - __pci_read_msi_msg(msidesc, &msg); - pirq = MSI_ADDR_EXT_DEST_ID(msg.address_hi) | - ((msg.address_lo >> MSI_ADDR_DEST_ID_SHIFT) & 0xff); - if (msg.data != XEN_PIRQ_MSI_DATA || - xen_irq_from_pirq(pirq) < 0) { - pirq = xen_allocate_pirq_msi(dev, msidesc); - if (pirq < 0) { - irq = -ENODEV; - goto error; - } - xen_msi_compose_msg(dev, pirq, &msg); - __pci_write_msi_msg(msidesc, &msg); - dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); - } else { - dev_dbg(&dev->dev, - "xen: msi already bound to pirq=%d\n", pirq); + pirq = xen_allocate_pirq_msi(dev, msidesc); + if (pirq < 0) { + irq = -ENODEV; + goto error; } + xen_msi_compose_msg(dev, pirq, &msg); + __pci_write_msi_msg(msidesc, &msg); + dev_dbg(&dev->dev, "xen: msi bound to pirq=%d\n", pirq); irq = xen_bind_pirq_msi_to_irq(dev, msidesc, pirq, (type == PCI_CAP_ID_MSI) ? nvec : 1, (type == PCI_CAP_ID_MSIX) ? -- cgit v1.2.3 From ddffe98d166f4a93d996d5aa628fd745311fc1e7 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Wed, 22 Feb 2017 15:45:13 -0800 Subject: mm/memory_hotplug: set magic number to page->freelist instead of page->lru.next To identify that pages of page table are allocated from bootmem allocator, magic number sets to page->lru.next. But page->lru list is initialized in reserve_bootmem_region(). So when calling free_pagetable(), the function cannot find the magic number of pages. And free_pagetable() frees the pages by free_reserved_page() not put_page_bootmem(). But if the pages are allocated from bootmem allocator and used as page table, the pages have private flag. So before freeing the pages, we should clear the private flag by put_page_bootmem(). Before applying the commit 7bfec6f47bb0 ("mm, page_alloc: check multiple page fields with a single branch"), we could find the following visible issue: BUG: Bad page state in process kworker/u1024:1 page:ffffea103cfd8040 count:0 mapcount:0 mappi flags: 0x6fffff80000800(private) page dumped because: PAGE_FLAGS_CHECK_AT_FREE flag(s) set bad because of flags: 0x800(private) Call Trace: [...] dump_stack+0x63/0x87 [...] bad_page+0x114/0x130 [...] free_pages_prepare+0x299/0x2d0 [...] free_hot_cold_page+0x31/0x150 [...] __free_pages+0x25/0x30 [...] free_pagetable+0x6f/0xb4 [...] remove_pagetable+0x379/0x7ff [...] vmemmap_free+0x10/0x20 [...] sparse_remove_one_section+0x149/0x180 [...] __remove_pages+0x2e9/0x4f0 [...] arch_remove_memory+0x63/0xc0 [...] remove_memory+0x8c/0xc0 [...] acpi_memory_device_remove+0x79/0xa5 [...] acpi_bus_trim+0x5a/0x8d [...] acpi_bus_trim+0x38/0x8d [...] acpi_device_hotplug+0x1b7/0x418 [...] acpi_hotplug_work_fn+0x1e/0x29 [...] process_one_work+0x152/0x400 [...] worker_thread+0x125/0x4b0 [...] kthread+0xd8/0xf0 [...] ret_from_fork+0x22/0x40 And the issue still silently occurs. Until freeing the pages of page table allocated from bootmem allocator, the page->freelist is never used. So the patch sets magic number to page->freelist instead of page->lru.next. [isimatu.yasuaki@jp.fujitsu.com: fix merge issue] Link: http://lkml.kernel.org/r/722b1cc4-93ac-dd8b-2be2-7a7e313b3b0b@gmail.com Link: http://lkml.kernel.org/r/2c29bd9f-5b67-02d0-18a3-8828e78bbb6f@gmail.com Signed-off-by: Yasuaki Ishimatsu Cc: Thomas Gleixner Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Dave Hansen Cc: Vlastimil Babka Cc: Mel Gorman Cc: Xishi Qiu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/init_64.c | 2 +- mm/memory_hotplug.c | 5 +++-- mm/sparse.c | 2 +- 3 files changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index af85b686a7b0..97346f987ef2 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -679,7 +679,7 @@ static void __meminit free_pagetable(struct page *page, int order) if (PageReserved(page)) { __ClearPageReserved(page); - magic = (unsigned long)page->lru.next; + magic = (unsigned long)page->freelist; if (magic == SECTION_INFO || magic == MIX_SECTION_INFO) { while (nr_pages--) put_page_bootmem(page++); diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index b8c11e063ff0..d67787d10ff0 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -179,7 +179,7 @@ static void release_memory_resource(struct resource *res) void get_page_bootmem(unsigned long info, struct page *page, unsigned long type) { - page->lru.next = (struct list_head *) type; + page->freelist = (void *)type; SetPagePrivate(page); set_page_private(page, info); page_ref_inc(page); @@ -189,11 +189,12 @@ void put_page_bootmem(struct page *page) { unsigned long type; - type = (unsigned long) page->lru.next; + type = (unsigned long) page->freelist; BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || type > MEMORY_HOTPLUG_MAX_BOOTMEM_TYPE); if (page_ref_dec_return(page) == 1) { + page->freelist = NULL; ClearPagePrivate(page); set_page_private(page, 0); INIT_LIST_HEAD(&page->lru); diff --git a/mm/sparse.c b/mm/sparse.c index dc30a70e1dce..db6bf3c97ea2 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -662,7 +662,7 @@ static void free_map_bootmem(struct page *memmap) >> PAGE_SHIFT; for (i = 0; i < nr_pages; i++, page++) { - magic = (unsigned long) page->lru.next; + magic = (unsigned long) page->freelist; BUG_ON(magic == NODE_INFO); -- cgit v1.2.3 From ecf1385d72f0491400a8ceca7001196ca369aa8c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 22 Feb 2017 15:46:37 -0800 Subject: mm: drop unused argument of zap_page_range() There's no users of zap_page_range() who wants non-NULL 'details'. Let's drop it. Link: http://lkml.kernel.org/r/20170118122429.43661-3-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Cc: Tetsuo Handa Cc: Peter Zijlstra Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/s390/mm/gmap.c | 2 +- arch/x86/mm/mpx.c | 2 +- drivers/android/binder.c | 2 +- drivers/staging/android/ion/ion.c | 3 +-- include/linux/mm.h | 2 +- mm/madvise.c | 2 +- mm/memory.c | 5 ++--- 7 files changed, 8 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/s390/mm/gmap.c b/arch/s390/mm/gmap.c index ec1f0dedb948..59ac93714fa4 100644 --- a/arch/s390/mm/gmap.c +++ b/arch/s390/mm/gmap.c @@ -687,7 +687,7 @@ void gmap_discard(struct gmap *gmap, unsigned long from, unsigned long to) /* Find vma in the parent mm */ vma = find_vma(gmap->mm, vmaddr); size = min(to - gaddr, PMD_SIZE - (gaddr & ~PMD_MASK)); - zap_page_range(vma, vmaddr, size, NULL); + zap_page_range(vma, vmaddr, size); } up_read(&gmap->mm->mmap_sem); } diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index af59f808742f..aad4ac386f98 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -796,7 +796,7 @@ static noinline int zap_bt_entries_mapping(struct mm_struct *mm, return -EINVAL; len = min(vma->vm_end, end) - addr; - zap_page_range(vma, addr, len, NULL); + zap_page_range(vma, addr, len); trace_mpx_unmap_zap(addr, addr+len); vma = vma->vm_next; diff --git a/drivers/android/binder.c b/drivers/android/binder.c index 9451b762fa1c..15b263a420e8 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -657,7 +657,7 @@ free_range: page = &proc->pages[(page_addr - proc->buffer) / PAGE_SIZE]; if (vma) zap_page_range(vma, (uintptr_t)page_addr + - proc->user_buffer_offset, PAGE_SIZE, NULL); + proc->user_buffer_offset, PAGE_SIZE); err_vm_insert_page_failed: unmap_kernel_range((unsigned long)page_addr, PAGE_SIZE); err_map_kernel_failed: diff --git a/drivers/staging/android/ion/ion.c b/drivers/staging/android/ion/ion.c index 937c2d5d7ec3..969600779e44 100644 --- a/drivers/staging/android/ion/ion.c +++ b/drivers/staging/android/ion/ion.c @@ -865,8 +865,7 @@ static void ion_buffer_sync_for_device(struct ion_buffer *buffer, list_for_each_entry(vma_list, &buffer->vmas, list) { struct vm_area_struct *vma = vma_list->vma; - zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start, - NULL); + zap_page_range(vma, vma->vm_start, vma->vm_end - vma->vm_start); } mutex_unlock(&buffer->lock); } diff --git a/include/linux/mm.h b/include/linux/mm.h index 062936e8b832..574bc157a27c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1185,7 +1185,7 @@ struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); void zap_page_range(struct vm_area_struct *vma, unsigned long address, - unsigned long size, struct zap_details *); + unsigned long size); void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, unsigned long start, unsigned long end); diff --git a/mm/madvise.c b/mm/madvise.c index ca75b8a01ba0..7f1490f0d3a6 100644 --- a/mm/madvise.c +++ b/mm/madvise.c @@ -478,7 +478,7 @@ static long madvise_dontneed(struct vm_area_struct *vma, return -EINVAL; madvise_userfault_dontneed(vma, prev, start, end); - zap_page_range(vma, start, end - start, NULL); + zap_page_range(vma, start, end - start); return 0; } diff --git a/mm/memory.c b/mm/memory.c index e9035a0afee2..7663068a33c6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1370,12 +1370,11 @@ void unmap_vmas(struct mmu_gather *tlb, * @vma: vm_area_struct holding the applicable pages * @start: starting address of pages to zap * @size: number of bytes to zap - * @details: details of shared cache invalidation * * Caller must protect the VMA list */ void zap_page_range(struct vm_area_struct *vma, unsigned long start, - unsigned long size, struct zap_details *details) + unsigned long size) { struct mm_struct *mm = vma->vm_mm; struct mmu_gather tlb; @@ -1386,7 +1385,7 @@ void zap_page_range(struct vm_area_struct *vma, unsigned long start, update_hiwater_rss(mm); mmu_notifier_invalidate_range_start(mm, start, end); for ( ; vma && vma->vm_start < end; vma = vma->vm_next) - unmap_single_vma(&tlb, vma, start, end, details); + unmap_single_vma(&tlb, vma, start, end, NULL); mmu_notifier_invalidate_range_end(mm, start, end); tlb_finish_mmu(&tlb, start, end); } -- cgit v1.2.3 From d1091c7fa3d52ebce4dd3f15d04155b3469b2f90 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Tue, 21 Feb 2017 15:35:32 -0600 Subject: objtool: Improve detection of BUG() and other dead ends The BUG() macro's use of __builtin_unreachable() via the unreachable() macro tells gcc that the instruction is a dead end, and that it's safe to assume the current code path will not execute past the previous instruction. On x86, the BUG() macro is implemented with the 'ud2' instruction. When objtool's branch analysis sees that instruction, it knows the current code path has come to a dead end. Peter Zijlstra has been working on a patch to change the WARN macros to use 'ud2'. That patch will break objtool's assumption that 'ud2' is always a dead end. Generally it's best for objtool to avoid making those kinds of assumptions anyway. The more ignorant it is of kernel code internals, the better. So create a more generic way for objtool to detect dead ends by adding an annotation to the unreachable() macro. The annotation stores a pointer to the end of the unreachable code path in an '__unreachable' section. Objtool can read that section to find the dead ends. Tested-by: Peter Zijlstra (Intel) Signed-off-by: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/41a6d33971462ebd944a1c60ad4bf5be86c17b77.1487712920.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux.lds.S | 1 + include/linux/compiler-gcc.h | 13 ++++++++- tools/objtool/arch.h | 5 ++-- tools/objtool/arch/x86/decode.c | 3 --- tools/objtool/builtin-check.c | 60 ++++++++++++++++++++++++++++++++++++++--- 5 files changed, 71 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index e79f15f108a8..ad0118fbce90 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -346,6 +346,7 @@ SECTIONS /DISCARD/ : { *(.eh_frame) *(__func_stack_frame_non_standard) + *(__unreachable) } } diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 0444b1336268..8ea159fc489d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -195,6 +195,17 @@ #endif #endif +#ifdef CONFIG_STACK_VALIDATION +#define annotate_unreachable() ({ \ + asm("1:\t\n" \ + ".pushsection __unreachable, \"a\"\t\n" \ + ".long 1b\t\n" \ + ".popsection\t\n"); \ +}) +#else +#define annotate_unreachable() +#endif + /* * Mark a position in code as unreachable. This can be used to * suppress control flow warnings after asm blocks that transfer @@ -204,7 +215,7 @@ * this in the preprocessor, but we can live with this because they're * unreleased. Really, we need to have autoconf for the kernel. */ -#define unreachable() __builtin_unreachable() +#define unreachable() annotate_unreachable(); __builtin_unreachable() /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) diff --git a/tools/objtool/arch.h b/tools/objtool/arch.h index f7350fcedc70..a59e061c0b4a 100644 --- a/tools/objtool/arch.h +++ b/tools/objtool/arch.h @@ -31,9 +31,8 @@ #define INSN_CALL_DYNAMIC 8 #define INSN_RETURN 9 #define INSN_CONTEXT_SWITCH 10 -#define INSN_BUG 11 -#define INSN_NOP 12 -#define INSN_OTHER 13 +#define INSN_NOP 11 +#define INSN_OTHER 12 #define INSN_LAST INSN_OTHER int arch_decode_instruction(struct elf *elf, struct section *sec, diff --git a/tools/objtool/arch/x86/decode.c b/tools/objtool/arch/x86/decode.c index 039636ffb6c8..6ac99e3266eb 100644 --- a/tools/objtool/arch/x86/decode.c +++ b/tools/objtool/arch/x86/decode.c @@ -118,9 +118,6 @@ int arch_decode_instruction(struct elf *elf, struct section *sec, op2 == 0x35) /* sysenter, sysret */ *type = INSN_CONTEXT_SWITCH; - else if (op2 == 0x0b || op2 == 0xb9) - /* ud2 */ - *type = INSN_BUG; else if (op2 == 0x0d || op2 == 0x1f) /* nopl/nopw */ *type = INSN_NOP; diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index e8a1f699058a..5fc52ee3264c 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -51,7 +51,7 @@ struct instruction { unsigned int len, state; unsigned char type; unsigned long immediate; - bool alt_group, visited; + bool alt_group, visited, dead_end; struct symbol *call_dest; struct instruction *jump_dest; struct list_head alts; @@ -329,6 +329,54 @@ static int decode_instructions(struct objtool_file *file) return 0; } +/* + * Find all uses of the unreachable() macro, which are code path dead ends. + */ +static int add_dead_ends(struct objtool_file *file) +{ + struct section *sec; + struct rela *rela; + struct instruction *insn; + bool found; + + sec = find_section_by_name(file->elf, ".rela__unreachable"); + if (!sec) + return 0; + + list_for_each_entry(rela, &sec->rela_list, list) { + if (rela->sym->type != STT_SECTION) { + WARN("unexpected relocation symbol type in .rela__unreachable"); + return -1; + } + insn = find_insn(file, rela->sym->sec, rela->addend); + if (insn) + insn = list_prev_entry(insn, list); + else if (rela->addend == rela->sym->sec->len) { + found = false; + list_for_each_entry_reverse(insn, &file->insn_list, list) { + if (insn->sec == rela->sym->sec) { + found = true; + break; + } + } + + if (!found) { + WARN("can't find unreachable insn at %s+0x%x", + rela->sym->sec->name, rela->addend); + return -1; + } + } else { + WARN("can't find unreachable insn at %s+0x%x", + rela->sym->sec->name, rela->addend); + return -1; + } + + insn->dead_end = true; + } + + return 0; +} + /* * Warnings shouldn't be reported for ignored functions. */ @@ -843,6 +891,10 @@ static int decode_sections(struct objtool_file *file) if (ret) return ret; + ret = add_dead_ends(file); + if (ret) + return ret; + add_ignores(file); ret = add_jump_destinations(file); @@ -1037,13 +1089,13 @@ static int validate_branch(struct objtool_file *file, return 0; - case INSN_BUG: - return 0; - default: break; } + if (insn->dead_end) + return 0; + insn = next_insn_same_sec(file, insn); if (!insn) { WARN("%s: unexpected end of section", sec->name); -- cgit v1.2.3 From a00cc7d9dd93d66a3fb83fc52aa57a4bec51c517 Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Fri, 24 Feb 2017 14:57:02 -0800 Subject: mm, x86: add support for PUD-sized transparent hugepages The current transparent hugepage code only supports PMDs. This patch adds support for transparent use of PUDs with DAX. It does not include support for anonymous pages. x86 support code also added. Most of this patch simply parallels the work that was done for huge PMDs. The only major difference is how the new ->pud_entry method in mm_walk works. The ->pmd_entry method replaces the ->pte_entry method, whereas the ->pud_entry method works along with either ->pmd_entry or ->pte_entry. The pagewalk code takes care of locking the PUD before calling ->pud_walk, so handlers do not need to worry whether the PUD is stable. [dave.jiang@intel.com: fix SMP x86 32bit build for native_pud_clear()] Link: http://lkml.kernel.org/r/148719066814.31111.3239231168815337012.stgit@djiang5-desk3.ch.intel.com [dave.jiang@intel.com: native_pud_clear missing on i386 build] Link: http://lkml.kernel.org/r/148640375195.69754.3315433724330910314.stgit@djiang5-desk3.ch.intel.com Link: http://lkml.kernel.org/r/148545059381.17912.8602162635537598445.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Matthew Wilcox Signed-off-by: Dave Jiang Tested-by: Alexander Kapshuk Cc: Dave Hansen Cc: Vlastimil Babka Cc: Jan Kara Cc: Dan Williams Cc: Ross Zwisler Cc: Kirill A. Shutemov Cc: Nilesh Choudhury Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/Kconfig | 3 + arch/x86/Kconfig | 1 + arch/x86/include/asm/paravirt.h | 11 ++ arch/x86/include/asm/paravirt_types.h | 2 + arch/x86/include/asm/pgtable-2level.h | 17 +++ arch/x86/include/asm/pgtable-3level.h | 30 ++++ arch/x86/include/asm/pgtable.h | 140 +++++++++++++++++++ arch/x86/include/asm/pgtable_64.h | 15 ++ arch/x86/kernel/paravirt.c | 1 + arch/x86/mm/pgtable.c | 31 +++++ include/asm-generic/pgtable.h | 80 ++++++++++- include/asm-generic/tlb.h | 14 ++ include/linux/huge_mm.h | 83 +++++++++++- include/linux/mm.h | 30 +++- include/linux/mmu_notifier.h | 14 ++ include/linux/pfn_t.h | 12 ++ mm/gup.c | 7 + mm/huge_memory.c | 249 ++++++++++++++++++++++++++++++++++ mm/memory.c | 88 +++++++++++- mm/pagewalk.c | 20 ++- mm/pgtable-generic.c | 14 ++ 21 files changed, 844 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/Kconfig b/arch/Kconfig index f761142976e5..d0012add6b19 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -571,6 +571,9 @@ config HAVE_IRQ_TIME_ACCOUNTING config HAVE_ARCH_TRANSPARENT_HUGEPAGE bool +config HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD + bool + config HAVE_ARCH_HUGE_VMAP bool diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 874c1238dffd..33007aa74111 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -109,6 +109,7 @@ config X86 select HAVE_ARCH_SECCOMP_FILTER select HAVE_ARCH_TRACEHOOK select HAVE_ARCH_TRANSPARENT_HUGEPAGE + select HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD if X86_64 select HAVE_ARCH_VMAP_STACK if X86_64 select HAVE_ARCH_WITHIN_STACK_FRAMES select HAVE_CC_STACKPROTECTOR diff --git a/arch/x86/include/asm/paravirt.h b/arch/x86/include/asm/paravirt.h index f75fbfe550f2..0489884fdc44 100644 --- a/arch/x86/include/asm/paravirt.h +++ b/arch/x86/include/asm/paravirt.h @@ -475,6 +475,17 @@ static inline void set_pmd_at(struct mm_struct *mm, unsigned long addr, native_pmd_val(pmd)); } +static inline void set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) +{ + if (sizeof(pudval_t) > sizeof(long)) + /* 5 arg words */ + pv_mmu_ops.set_pud_at(mm, addr, pudp, pud); + else + PVOP_VCALL4(pv_mmu_ops.set_pud_at, mm, addr, pudp, + native_pud_val(pud)); +} + static inline void set_pmd(pmd_t *pmdp, pmd_t pmd) { pmdval_t val = native_pmd_val(pmd); diff --git a/arch/x86/include/asm/paravirt_types.h b/arch/x86/include/asm/paravirt_types.h index bb2de45a60f2..b060f962d581 100644 --- a/arch/x86/include/asm/paravirt_types.h +++ b/arch/x86/include/asm/paravirt_types.h @@ -249,6 +249,8 @@ struct pv_mmu_ops { void (*set_pmd)(pmd_t *pmdp, pmd_t pmdval); void (*set_pmd_at)(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp, pmd_t pmdval); + void (*set_pud_at)(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pudval); void (*pte_update)(struct mm_struct *mm, unsigned long addr, pte_t *ptep); diff --git a/arch/x86/include/asm/pgtable-2level.h b/arch/x86/include/asm/pgtable-2level.h index fd74a11959de..a8b96e708c2b 100644 --- a/arch/x86/include/asm/pgtable-2level.h +++ b/arch/x86/include/asm/pgtable-2level.h @@ -21,6 +21,10 @@ static inline void native_set_pmd(pmd_t *pmdp, pmd_t pmd) *pmdp = pmd; } +static inline void native_set_pud(pud_t *pudp, pud_t pud) +{ +} + static inline void native_set_pte_atomic(pte_t *ptep, pte_t pte) { native_set_pte(ptep, pte); @@ -31,6 +35,10 @@ static inline void native_pmd_clear(pmd_t *pmdp) native_set_pmd(pmdp, __pmd(0)); } +static inline void native_pud_clear(pud_t *pudp) +{ +} + static inline void native_pte_clear(struct mm_struct *mm, unsigned long addr, pte_t *xp) { @@ -55,6 +63,15 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *xp) #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) #endif +#ifdef CONFIG_SMP +static inline pud_t native_pudp_get_and_clear(pud_t *xp) +{ + return __pud(xchg((pudval_t *)xp, 0)); +} +#else +#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) +#endif + /* Bit manipulation helper on pte/pgoff entry */ static inline unsigned long pte_bitop(unsigned long value, unsigned int rightshift, unsigned long mask, unsigned int leftshift) diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index cdaa58c9b39e..8f50fb3f04e1 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -121,6 +121,12 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } +#ifndef CONFIG_SMP +static inline void native_pud_clear(pud_t *pudp) +{ +} +#endif + static inline void pud_clear(pud_t *pudp) { set_pud(pudp, __pud(0)); @@ -176,6 +182,30 @@ static inline pmd_t native_pmdp_get_and_clear(pmd_t *pmdp) #define native_pmdp_get_and_clear(xp) native_local_pmdp_get_and_clear(xp) #endif +#ifdef CONFIG_SMP +union split_pud { + struct { + u32 pud_low; + u32 pud_high; + }; + pud_t pud; +}; + +static inline pud_t native_pudp_get_and_clear(pud_t *pudp) +{ + union split_pud res, *orig = (union split_pud *)pudp; + + /* xchg acts as a barrier before setting of the high bits */ + res.pud_low = xchg(&orig->pud_low, 0); + res.pud_high = orig->pud_high; + orig->pud_high = 0; + + return res.pud; +} +#else +#define native_pudp_get_and_clear(xp) native_local_pudp_get_and_clear(xp) +#endif + /* Encode and de-code a swap entry */ #define MAX_SWAPFILES_CHECK() BUILD_BUG_ON(MAX_SWAPFILES_SHIFT > 5) #define __swp_type(x) (((x).val) & 0x1f) diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 437feb436efa..1cfb36b8c024 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -46,6 +46,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); #define set_pte(ptep, pte) native_set_pte(ptep, pte) #define set_pte_at(mm, addr, ptep, pte) native_set_pte_at(mm, addr, ptep, pte) #define set_pmd_at(mm, addr, pmdp, pmd) native_set_pmd_at(mm, addr, pmdp, pmd) +#define set_pud_at(mm, addr, pudp, pud) native_set_pud_at(mm, addr, pudp, pud) #define set_pte_atomic(ptep, pte) \ native_set_pte_atomic(ptep, pte) @@ -128,6 +129,16 @@ static inline int pmd_young(pmd_t pmd) return pmd_flags(pmd) & _PAGE_ACCESSED; } +static inline int pud_dirty(pud_t pud) +{ + return pud_flags(pud) & _PAGE_DIRTY; +} + +static inline int pud_young(pud_t pud) +{ + return pud_flags(pud) & _PAGE_ACCESSED; +} + static inline int pte_write(pte_t pte) { return pte_flags(pte) & _PAGE_RW; @@ -181,6 +192,13 @@ static inline int pmd_trans_huge(pmd_t pmd) return (pmd_val(pmd) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; } +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static inline int pud_trans_huge(pud_t pud) +{ + return (pud_val(pud) & (_PAGE_PSE|_PAGE_DEVMAP)) == _PAGE_PSE; +} +#endif + #define has_transparent_hugepage has_transparent_hugepage static inline int has_transparent_hugepage(void) { @@ -192,6 +210,18 @@ static inline int pmd_devmap(pmd_t pmd) { return !!(pmd_val(pmd) & _PAGE_DEVMAP); } + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static inline int pud_devmap(pud_t pud) +{ + return !!(pud_val(pud) & _PAGE_DEVMAP); +} +#else +static inline int pud_devmap(pud_t pud) +{ + return 0; +} +#endif #endif #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ @@ -333,6 +363,65 @@ static inline pmd_t pmd_mknotpresent(pmd_t pmd) return pmd_clear_flags(pmd, _PAGE_PRESENT | _PAGE_PROTNONE); } +static inline pud_t pud_set_flags(pud_t pud, pudval_t set) +{ + pudval_t v = native_pud_val(pud); + + return __pud(v | set); +} + +static inline pud_t pud_clear_flags(pud_t pud, pudval_t clear) +{ + pudval_t v = native_pud_val(pud); + + return __pud(v & ~clear); +} + +static inline pud_t pud_mkold(pud_t pud) +{ + return pud_clear_flags(pud, _PAGE_ACCESSED); +} + +static inline pud_t pud_mkclean(pud_t pud) +{ + return pud_clear_flags(pud, _PAGE_DIRTY); +} + +static inline pud_t pud_wrprotect(pud_t pud) +{ + return pud_clear_flags(pud, _PAGE_RW); +} + +static inline pud_t pud_mkdirty(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_DIRTY | _PAGE_SOFT_DIRTY); +} + +static inline pud_t pud_mkdevmap(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_DEVMAP); +} + +static inline pud_t pud_mkhuge(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_PSE); +} + +static inline pud_t pud_mkyoung(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_ACCESSED); +} + +static inline pud_t pud_mkwrite(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_RW); +} + +static inline pud_t pud_mknotpresent(pud_t pud) +{ + return pud_clear_flags(pud, _PAGE_PRESENT | _PAGE_PROTNONE); +} + #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline int pte_soft_dirty(pte_t pte) { @@ -344,6 +433,11 @@ static inline int pmd_soft_dirty(pmd_t pmd) return pmd_flags(pmd) & _PAGE_SOFT_DIRTY; } +static inline int pud_soft_dirty(pud_t pud) +{ + return pud_flags(pud) & _PAGE_SOFT_DIRTY; +} + static inline pte_t pte_mksoft_dirty(pte_t pte) { return pte_set_flags(pte, _PAGE_SOFT_DIRTY); @@ -354,6 +448,11 @@ static inline pmd_t pmd_mksoft_dirty(pmd_t pmd) return pmd_set_flags(pmd, _PAGE_SOFT_DIRTY); } +static inline pud_t pud_mksoft_dirty(pud_t pud) +{ + return pud_set_flags(pud, _PAGE_SOFT_DIRTY); +} + static inline pte_t pte_clear_soft_dirty(pte_t pte) { return pte_clear_flags(pte, _PAGE_SOFT_DIRTY); @@ -364,6 +463,11 @@ static inline pmd_t pmd_clear_soft_dirty(pmd_t pmd) return pmd_clear_flags(pmd, _PAGE_SOFT_DIRTY); } +static inline pud_t pud_clear_soft_dirty(pud_t pud) +{ + return pud_clear_flags(pud, _PAGE_SOFT_DIRTY); +} + #endif /* CONFIG_HAVE_ARCH_SOFT_DIRTY */ /* @@ -392,6 +496,12 @@ static inline pmd_t pfn_pmd(unsigned long page_nr, pgprot_t pgprot) massage_pgprot(pgprot)); } +static inline pud_t pfn_pud(unsigned long page_nr, pgprot_t pgprot) +{ + return __pud(((phys_addr_t)page_nr << PAGE_SHIFT) | + massage_pgprot(pgprot)); +} + static inline pte_t pte_modify(pte_t pte, pgprot_t newprot) { pteval_t val = pte_val(pte); @@ -771,6 +881,14 @@ static inline pmd_t native_local_pmdp_get_and_clear(pmd_t *pmdp) return res; } +static inline pud_t native_local_pudp_get_and_clear(pud_t *pudp) +{ + pud_t res = *pudp; + + native_pud_clear(pudp); + return res; +} + static inline void native_set_pte_at(struct mm_struct *mm, unsigned long addr, pte_t *ptep , pte_t pte) { @@ -783,6 +901,12 @@ static inline void native_set_pmd_at(struct mm_struct *mm, unsigned long addr, native_set_pmd(pmdp, pmd); } +static inline void native_set_pud_at(struct mm_struct *mm, unsigned long addr, + pud_t *pudp, pud_t pud) +{ + native_set_pud(pudp, pud); +} + #ifndef CONFIG_PARAVIRT /* * Rules for using pte_update - it must be called after any PTE update which @@ -861,10 +985,15 @@ static inline void ptep_set_wrprotect(struct mm_struct *mm, extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty); +extern int pudp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp, + pud_t entry, int dirty); #define __HAVE_ARCH_PMDP_TEST_AND_CLEAR_YOUNG extern int pmdp_test_and_clear_young(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmdp); +extern int pudp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pud_t *pudp); #define __HAVE_ARCH_PMDP_CLEAR_YOUNG_FLUSH extern int pmdp_clear_flush_young(struct vm_area_struct *vma, @@ -884,6 +1013,13 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long return native_pmdp_get_and_clear(pmdp); } +#define __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR +static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, + unsigned long addr, pud_t *pudp) +{ + return native_pudp_get_and_clear(pudp); +} + #define __HAVE_ARCH_PMDP_SET_WRPROTECT static inline void pmdp_set_wrprotect(struct mm_struct *mm, unsigned long addr, pmd_t *pmdp) @@ -932,6 +1068,10 @@ static inline void update_mmu_cache_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { } +static inline void update_mmu_cache_pud(struct vm_area_struct *vma, + unsigned long addr, pud_t *pud) +{ +} #ifdef CONFIG_HAVE_ARCH_SOFT_DIRTY static inline pte_t pte_swp_mksoft_dirty(pte_t pte) diff --git a/arch/x86/include/asm/pgtable_64.h b/arch/x86/include/asm/pgtable_64.h index 62b775926045..73c7ccc38912 100644 --- a/arch/x86/include/asm/pgtable_64.h +++ b/arch/x86/include/asm/pgtable_64.h @@ -106,6 +106,21 @@ static inline void native_pud_clear(pud_t *pud) native_set_pud(pud, native_make_pud(0)); } +static inline pud_t native_pudp_get_and_clear(pud_t *xp) +{ +#ifdef CONFIG_SMP + return native_make_pud(xchg(&xp->pud, 0)); +#else + /* native_local_pudp_get_and_clear, + * but duplicated because of cyclic dependency + */ + pud_t ret = *xp; + + native_pud_clear(xp); + return ret; +#endif +} + static inline void native_set_pgd(pgd_t *pgdp, pgd_t pgd) { *pgdp = pgd; diff --git a/arch/x86/kernel/paravirt.c b/arch/x86/kernel/paravirt.c index a1bfba0f7234..4797e87b0fb6 100644 --- a/arch/x86/kernel/paravirt.c +++ b/arch/x86/kernel/paravirt.c @@ -425,6 +425,7 @@ struct pv_mmu_ops pv_mmu_ops __ro_after_init = { .pmd_clear = native_pmd_clear, #endif .set_pud = native_set_pud, + .set_pud_at = native_set_pud_at, .pmd_val = PTE_IDENT, .make_pmd = PTE_IDENT, diff --git a/arch/x86/mm/pgtable.c b/arch/x86/mm/pgtable.c index 3feec5af4e67..6cbdff26bb96 100644 --- a/arch/x86/mm/pgtable.c +++ b/arch/x86/mm/pgtable.c @@ -445,6 +445,26 @@ int pmdp_set_access_flags(struct vm_area_struct *vma, return changed; } + +int pudp_set_access_flags(struct vm_area_struct *vma, unsigned long address, + pud_t *pudp, pud_t entry, int dirty) +{ + int changed = !pud_same(*pudp, entry); + + VM_BUG_ON(address & ~HPAGE_PUD_MASK); + + if (changed && dirty) { + *pudp = entry; + /* + * We had a write-protection fault here and changed the pud + * to to more permissive. No need to flush the TLB for that, + * #PF is architecturally guaranteed to do that and in the + * worst-case we'll generate a spurious fault. + */ + } + + return changed; +} #endif int ptep_test_and_clear_young(struct vm_area_struct *vma, @@ -474,6 +494,17 @@ int pmdp_test_and_clear_young(struct vm_area_struct *vma, return ret; } +int pudp_test_and_clear_young(struct vm_area_struct *vma, + unsigned long addr, pud_t *pudp) +{ + int ret = 0; + + if (pud_young(*pudp)) + ret = test_and_clear_bit(_PAGE_BIT_ACCESSED, + (unsigned long *)pudp); + + return ret; +} #endif int ptep_clear_flush_young(struct vm_area_struct *vma, diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 18af2bcefe6a..a0aba0f9c57b 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -36,6 +36,9 @@ extern int ptep_set_access_flags(struct vm_area_struct *vma, extern int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, pmd_t entry, int dirty); +extern int pudp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp, + pud_t entry, int dirty); #else static inline int pmdp_set_access_flags(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp, @@ -44,6 +47,13 @@ static inline int pmdp_set_access_flags(struct vm_area_struct *vma, BUILD_BUG(); return 0; } +static inline int pudp_set_access_flags(struct vm_area_struct *vma, + unsigned long address, pud_t *pudp, + pud_t entry, int dirty) +{ + BUILD_BUG(); + return 0; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif @@ -121,8 +131,8 @@ static inline pte_t ptep_get_and_clear(struct mm_struct *mm, } #endif -#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR #ifdef CONFIG_TRANSPARENT_HUGEPAGE +#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, unsigned long address, pmd_t *pmdp) @@ -131,20 +141,40 @@ static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm, pmd_clear(pmdp); return pmd; } +#endif /* __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR */ +#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR +static inline pud_t pudp_huge_get_and_clear(struct mm_struct *mm, + unsigned long address, + pud_t *pudp) +{ + pud_t pud = *pudp; + + pud_clear(pudp); + return pud; +} +#endif /* __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ -#endif -#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL #ifdef CONFIG_TRANSPARENT_HUGEPAGE +#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR_FULL static inline pmd_t pmdp_huge_get_and_clear_full(struct mm_struct *mm, unsigned long address, pmd_t *pmdp, int full) { return pmdp_huge_get_and_clear(mm, address, pmdp); } -#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef __HAVE_ARCH_PUDP_HUGE_GET_AND_CLEAR_FULL +static inline pud_t pudp_huge_get_and_clear_full(struct mm_struct *mm, + unsigned long address, pud_t *pudp, + int full) +{ + return pudp_huge_get_and_clear(mm, address, pudp); +} +#endif +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long address, pte_t *ptep, @@ -181,6 +211,9 @@ extern pte_t ptep_clear_flush(struct vm_area_struct *vma, extern pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp); +extern pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, + unsigned long address, + pud_t *pudp); #endif #ifndef __HAVE_ARCH_PTEP_SET_WRPROTECT @@ -208,6 +241,23 @@ static inline void pmdp_set_wrprotect(struct mm_struct *mm, } #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef __HAVE_ARCH_PUDP_SET_WRPROTECT +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static inline void pudp_set_wrprotect(struct mm_struct *mm, + unsigned long address, pud_t *pudp) +{ + pud_t old_pud = *pudp; + + set_pud_at(mm, address, pudp, pud_wrprotect(old_pud)); +} +#else +static inline void pudp_set_wrprotect(struct mm_struct *mm, + unsigned long address, pud_t *pudp) +{ + BUILD_BUG(); +} +#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ +#endif #ifndef pmdp_collapse_flush #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -273,12 +323,23 @@ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) { return pmd_val(pmd_a) == pmd_val(pmd_b); } + +static inline int pud_same(pud_t pud_a, pud_t pud_b) +{ + return pud_val(pud_a) == pud_val(pud_b); +} #else /* CONFIG_TRANSPARENT_HUGEPAGE */ static inline int pmd_same(pmd_t pmd_a, pmd_t pmd_b) { BUILD_BUG(); return 0; } + +static inline int pud_same(pud_t pud_a, pud_t pud_b) +{ + BUILD_BUG(); + return 0; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif @@ -640,6 +701,15 @@ static inline int pmd_write(pmd_t pmd) #endif /* __HAVE_ARCH_PMD_WRITE */ #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#if !defined(CONFIG_TRANSPARENT_HUGEPAGE) || \ + (defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + !defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) +static inline int pud_trans_huge(pud_t pud) +{ + return 0; +} +#endif + #ifndef pmd_read_atomic static inline pmd_t pmd_read_atomic(pmd_t *pmdp) { @@ -785,8 +855,10 @@ static inline int pmd_clear_huge(pmd_t *pmd) * e.g. see arch/arc: flush_pmd_tlb_range */ #define flush_pmd_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) +#define flush_pud_tlb_range(vma, addr, end) flush_tlb_range(vma, addr, end) #else #define flush_pmd_tlb_range(vma, addr, end) BUILD_BUG() +#define flush_pud_tlb_range(vma, addr, end) BUILD_BUG() #endif #endif diff --git a/include/asm-generic/tlb.h b/include/asm-generic/tlb.h index 7eed8cf3130a..4329bc6ef04b 100644 --- a/include/asm-generic/tlb.h +++ b/include/asm-generic/tlb.h @@ -232,6 +232,20 @@ static inline void tlb_remove_check_page_size_change(struct mmu_gather *tlb, __tlb_remove_pmd_tlb_entry(tlb, pmdp, address); \ } while (0) +/** + * tlb_remove_pud_tlb_entry - remember a pud mapping for later tlb + * invalidation. This is a nop so far, because only x86 needs it. + */ +#ifndef __tlb_remove_pud_tlb_entry +#define __tlb_remove_pud_tlb_entry(tlb, pudp, address) do {} while (0) +#endif + +#define tlb_remove_pud_tlb_entry(tlb, pudp, address) \ + do { \ + __tlb_adjust_range(tlb, address, HPAGE_PUD_SIZE); \ + __tlb_remove_pud_tlb_entry(tlb, pudp, address); \ + } while (0) + /* * For things like page tables caches (ie caching addresses "inside" the * page tables, like x86 does), for legacy reasons, flushing an diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index f0029e786205..a3762d49ba39 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -6,6 +6,18 @@ extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm, pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr, struct vm_area_struct *vma); extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd); +extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pud_t *dst_pud, pud_t *src_pud, unsigned long addr, + struct vm_area_struct *vma); + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud); +#else +static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) +{ +} +#endif + extern int do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd); extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma, unsigned long addr, @@ -17,6 +29,9 @@ extern bool madvise_free_huge_pmd(struct mmu_gather *tlb, extern int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr); +extern int zap_huge_pud(struct mmu_gather *tlb, + struct vm_area_struct *vma, + pud_t *pud, unsigned long addr); extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, unsigned long end, unsigned char *vec); @@ -26,8 +41,10 @@ extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr, pgprot_t newprot, int prot_numa); -int vmf_insert_pfn_pmd(struct vm_area_struct *, unsigned long addr, pmd_t *, - pfn_t pfn, bool write); +int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, pfn_t pfn, bool write); +int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, + pud_t *pud, pfn_t pfn, bool write); enum transparent_hugepage_flag { TRANSPARENT_HUGEPAGE_FLAG, TRANSPARENT_HUGEPAGE_REQ_MADV_FLAG, @@ -58,13 +75,14 @@ extern struct kobj_attribute shmem_enabled_attr; #define HPAGE_PMD_NR (1<vm_mm->mmap_sem), vma); + if (pud_trans_huge(*pud) || pud_devmap(*pud)) + return __pud_trans_huge_lock(pud, vma); + else + return NULL; +} static inline int hpage_nr_pages(struct page *page) { if (unlikely(PageTransHuge(page))) @@ -143,6 +183,11 @@ static inline int hpage_nr_pages(struct page *page) return 1; } +struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t *pmd, int flags); +struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, + pud_t *pud, int flags); + extern int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd); extern struct page *huge_zero_page; @@ -157,6 +202,11 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) return is_huge_zero_page(pmd_page(pmd)); } +static inline bool is_huge_zero_pud(pud_t pud) +{ + return false; +} + struct page *mm_get_huge_zero_page(struct mm_struct *mm); void mm_put_huge_zero_page(struct mm_struct *mm); @@ -167,6 +217,10 @@ void mm_put_huge_zero_page(struct mm_struct *mm); #define HPAGE_PMD_MASK ({ BUILD_BUG(); 0; }) #define HPAGE_PMD_SIZE ({ BUILD_BUG(); 0; }) +#define HPAGE_PUD_SHIFT ({ BUILD_BUG(); 0; }) +#define HPAGE_PUD_MASK ({ BUILD_BUG(); 0; }) +#define HPAGE_PUD_SIZE ({ BUILD_BUG(); 0; }) + #define hpage_nr_pages(x) 1 #define transparent_hugepage_enabled(__vma) 0 @@ -195,6 +249,9 @@ static inline void __split_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, static inline void split_huge_pmd_address(struct vm_area_struct *vma, unsigned long address, bool freeze, struct page *page) {} +#define split_huge_pud(__vma, __pmd, __address) \ + do { } while (0) + static inline int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags, int advice) { @@ -212,6 +269,11 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, { return NULL; } +static inline spinlock_t *pud_trans_huge_lock(pud_t *pud, + struct vm_area_struct *vma) +{ + return NULL; +} static inline int do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd) { @@ -223,6 +285,11 @@ static inline bool is_huge_zero_page(struct page *page) return false; } +static inline bool is_huge_zero_pud(pud_t pud) +{ + return false; +} + static inline void mm_put_huge_zero_page(struct mm_struct *mm) { return; @@ -233,6 +300,12 @@ static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, { return NULL; } + +static inline struct page *follow_devmap_pud(struct vm_area_struct *vma, + unsigned long addr, pud_t *pud, int flags) +{ + return NULL; +} #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif /* _LINUX_HUGE_MM_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 035a688e5472..d8b75d7d6a9e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -424,6 +424,10 @@ static inline int pmd_devmap(pmd_t pmd) { return 0; } +static inline int pud_devmap(pud_t pud) +{ + return 0; +} #endif /* @@ -1199,6 +1203,10 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, /** * mm_walk - callbacks for walk_page_range + * @pud_entry: if set, called for each non-empty PUD (2nd-level) entry + * this handler should only handle pud_trans_huge() puds. + * the pmd_entry or pte_entry callbacks will be used for + * regular PUDs. * @pmd_entry: if set, called for each non-empty PMD (3rd-level) entry * this handler is required to be able to handle * pmd_trans_huge() pmds. They may simply choose to @@ -1218,6 +1226,8 @@ void unmap_vmas(struct mmu_gather *tlb, struct vm_area_struct *start_vma, * (see the comment on walk_page_range() for more details) */ struct mm_walk { + int (*pud_entry)(pud_t *pud, unsigned long addr, + unsigned long next, struct mm_walk *walk); int (*pmd_entry)(pmd_t *pmd, unsigned long addr, unsigned long next, struct mm_walk *walk); int (*pte_entry)(pte_t *pte, unsigned long addr, @@ -1801,8 +1811,26 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) return ptl; } -extern void __init pagecache_init(void); +/* + * No scalability reason to split PUD locks yet, but follow the same pattern + * as the PMD locks to make it easier if we decide to. The VM should not be + * considered ready to switch to split PUD locks yet; there may be places + * which need to be converted from page_table_lock. + */ +static inline spinlock_t *pud_lockptr(struct mm_struct *mm, pud_t *pud) +{ + return &mm->page_table_lock; +} + +static inline spinlock_t *pud_lock(struct mm_struct *mm, pud_t *pud) +{ + spinlock_t *ptl = pud_lockptr(mm, pud); + + spin_lock(ptl); + return ptl; +} +extern void __init pagecache_init(void); extern void free_area_init(unsigned long * zones_size); extern void free_area_init_node(int nid, unsigned long * zones_size, unsigned long zone_start_pfn, unsigned long *zholes_size); diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index a1a210d59961..51891fb0d3ce 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -381,6 +381,19 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ___pmd; \ }) +#define pudp_huge_clear_flush_notify(__vma, __haddr, __pud) \ +({ \ + unsigned long ___haddr = __haddr & HPAGE_PUD_MASK; \ + struct mm_struct *___mm = (__vma)->vm_mm; \ + pud_t ___pud; \ + \ + ___pud = pudp_huge_clear_flush(__vma, __haddr, __pud); \ + mmu_notifier_invalidate_range(___mm, ___haddr, \ + ___haddr + HPAGE_PUD_SIZE); \ + \ + ___pud; \ +}) + #define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ ({ \ unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ @@ -475,6 +488,7 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define pmdp_clear_young_notify pmdp_test_and_clear_young #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush +#define pudp_huge_clear_flush_notify pudp_huge_clear_flush #define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear #define set_pte_at_notify set_pte_at diff --git a/include/linux/pfn_t.h b/include/linux/pfn_t.h index 033fc7bbcefa..a49b3259cad7 100644 --- a/include/linux/pfn_t.h +++ b/include/linux/pfn_t.h @@ -90,6 +90,13 @@ static inline pmd_t pfn_t_pmd(pfn_t pfn, pgprot_t pgprot) { return pfn_pmd(pfn_t_to_pfn(pfn), pgprot); } + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static inline pud_t pfn_t_pud(pfn_t pfn, pgprot_t pgprot) +{ + return pfn_pud(pfn_t_to_pfn(pfn), pgprot); +} +#endif #endif #ifdef __HAVE_ARCH_PTE_DEVMAP @@ -106,5 +113,10 @@ static inline bool pfn_t_devmap(pfn_t pfn) } pte_t pte_mkdevmap(pte_t pte); pmd_t pmd_mkdevmap(pmd_t pmd); +#if defined(CONFIG_TRANSPARENT_HUGEPAGE) && \ + defined(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD) +pud_t pud_mkdevmap(pud_t pud); #endif +#endif /* __HAVE_ARCH_PTE_DEVMAP */ + #endif /* _LINUX_PFN_T_H_ */ diff --git a/mm/gup.c b/mm/gup.c index 40abe4c90383..1e67461b2733 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -253,6 +253,13 @@ struct page *follow_page_mask(struct vm_area_struct *vma, return page; return no_page_table(vma, flags); } + if (pud_devmap(*pud)) { + ptl = pud_lock(mm, pud); + page = follow_devmap_pud(vma, address, pud, flags); + spin_unlock(ptl); + if (page) + return page; + } if (unlikely(pud_bad(*pud))) return no_page_table(vma, flags); diff --git a/mm/huge_memory.c b/mm/huge_memory.c index f9ecc2aeadfc..85742ac5b32e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -757,6 +757,60 @@ int vmf_insert_pfn_pmd(struct vm_area_struct *vma, unsigned long addr, } EXPORT_SYMBOL_GPL(vmf_insert_pfn_pmd); +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static pud_t maybe_pud_mkwrite(pud_t pud, struct vm_area_struct *vma) +{ + if (likely(vma->vm_flags & VM_WRITE)) + pud = pud_mkwrite(pud); + return pud; +} + +static void insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, + pud_t *pud, pfn_t pfn, pgprot_t prot, bool write) +{ + struct mm_struct *mm = vma->vm_mm; + pud_t entry; + spinlock_t *ptl; + + ptl = pud_lock(mm, pud); + entry = pud_mkhuge(pfn_t_pud(pfn, prot)); + if (pfn_t_devmap(pfn)) + entry = pud_mkdevmap(entry); + if (write) { + entry = pud_mkyoung(pud_mkdirty(entry)); + entry = maybe_pud_mkwrite(entry, vma); + } + set_pud_at(mm, addr, pud, entry); + update_mmu_cache_pud(vma, addr, pud); + spin_unlock(ptl); +} + +int vmf_insert_pfn_pud(struct vm_area_struct *vma, unsigned long addr, + pud_t *pud, pfn_t pfn, bool write) +{ + pgprot_t pgprot = vma->vm_page_prot; + /* + * If we had pud_special, we could avoid all these restrictions, + * but we need to be consistent with PTEs and architectures that + * can't support a 'special' bit. + */ + BUG_ON(!(vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP))); + BUG_ON((vma->vm_flags & (VM_PFNMAP|VM_MIXEDMAP)) == + (VM_PFNMAP|VM_MIXEDMAP)); + BUG_ON((vma->vm_flags & VM_PFNMAP) && is_cow_mapping(vma->vm_flags)); + BUG_ON(!pfn_t_devmap(pfn)); + + if (addr < vma->vm_start || addr >= vma->vm_end) + return VM_FAULT_SIGBUS; + + track_pfn_insert(vma, &pgprot, pfn); + + insert_pfn_pud(vma, addr, pud, pfn, pgprot, write); + return VM_FAULT_NOPAGE; +} +EXPORT_SYMBOL_GPL(vmf_insert_pfn_pud); +#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ + static void touch_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd) { @@ -887,6 +941,123 @@ out: return ret; } +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +static void touch_pud(struct vm_area_struct *vma, unsigned long addr, + pud_t *pud) +{ + pud_t _pud; + + /* + * We should set the dirty bit only for FOLL_WRITE but for now + * the dirty bit in the pud is meaningless. And if the dirty + * bit will become meaningful and we'll only set it with + * FOLL_WRITE, an atomic set_bit will be required on the pud to + * set the young bit, instead of the current set_pud_at. + */ + _pud = pud_mkyoung(pud_mkdirty(*pud)); + if (pudp_set_access_flags(vma, addr & HPAGE_PUD_MASK, + pud, _pud, 1)) + update_mmu_cache_pud(vma, addr, pud); +} + +struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr, + pud_t *pud, int flags) +{ + unsigned long pfn = pud_pfn(*pud); + struct mm_struct *mm = vma->vm_mm; + struct dev_pagemap *pgmap; + struct page *page; + + assert_spin_locked(pud_lockptr(mm, pud)); + + if (flags & FOLL_WRITE && !pud_write(*pud)) + return NULL; + + if (pud_present(*pud) && pud_devmap(*pud)) + /* pass */; + else + return NULL; + + if (flags & FOLL_TOUCH) + touch_pud(vma, addr, pud); + + /* + * device mapped pages can only be returned if the + * caller will manage the page reference count. + */ + if (!(flags & FOLL_GET)) + return ERR_PTR(-EEXIST); + + pfn += (addr & ~PUD_MASK) >> PAGE_SHIFT; + pgmap = get_dev_pagemap(pfn, NULL); + if (!pgmap) + return ERR_PTR(-EFAULT); + page = pfn_to_page(pfn); + get_page(page); + put_dev_pagemap(pgmap); + + return page; +} + +int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm, + pud_t *dst_pud, pud_t *src_pud, unsigned long addr, + struct vm_area_struct *vma) +{ + spinlock_t *dst_ptl, *src_ptl; + pud_t pud; + int ret; + + dst_ptl = pud_lock(dst_mm, dst_pud); + src_ptl = pud_lockptr(src_mm, src_pud); + spin_lock_nested(src_ptl, SINGLE_DEPTH_NESTING); + + ret = -EAGAIN; + pud = *src_pud; + if (unlikely(!pud_trans_huge(pud) && !pud_devmap(pud))) + goto out_unlock; + + /* + * When page table lock is held, the huge zero pud should not be + * under splitting since we don't split the page itself, only pud to + * a page table. + */ + if (is_huge_zero_pud(pud)) { + /* No huge zero pud yet */ + } + + pudp_set_wrprotect(src_mm, addr, src_pud); + pud = pud_mkold(pud_wrprotect(pud)); + set_pud_at(dst_mm, addr, dst_pud, pud); + + ret = 0; +out_unlock: + spin_unlock(src_ptl); + spin_unlock(dst_ptl); + return ret; +} + +void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud) +{ + pud_t entry; + unsigned long haddr; + bool write = vmf->flags & FAULT_FLAG_WRITE; + + vmf->ptl = pud_lock(vmf->vma->vm_mm, vmf->pud); + if (unlikely(!pud_same(*vmf->pud, orig_pud))) + goto unlock; + + entry = pud_mkyoung(orig_pud); + if (write) + entry = pud_mkdirty(entry); + haddr = vmf->address & HPAGE_PUD_MASK; + if (pudp_set_access_flags(vmf->vma, haddr, vmf->pud, entry, write)) + update_mmu_cache_pud(vmf->vma, vmf->address, vmf->pud); + +unlock: + spin_unlock(vmf->ptl); +} +#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ + void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd) { pmd_t entry; @@ -1601,6 +1772,84 @@ spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma) return NULL; } +/* + * Returns true if a given pud maps a thp, false otherwise. + * + * Note that if it returns true, this routine returns without unlocking page + * table lock. So callers must unlock it. + */ +spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma) +{ + spinlock_t *ptl; + + ptl = pud_lock(vma->vm_mm, pud); + if (likely(pud_trans_huge(*pud) || pud_devmap(*pud))) + return ptl; + spin_unlock(ptl); + return NULL; +} + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, + pud_t *pud, unsigned long addr) +{ + pud_t orig_pud; + spinlock_t *ptl; + + ptl = __pud_trans_huge_lock(pud, vma); + if (!ptl) + return 0; + /* + * For architectures like ppc64 we look at deposited pgtable + * when calling pudp_huge_get_and_clear. So do the + * pgtable_trans_huge_withdraw after finishing pudp related + * operations. + */ + orig_pud = pudp_huge_get_and_clear_full(tlb->mm, addr, pud, + tlb->fullmm); + tlb_remove_pud_tlb_entry(tlb, pud, addr); + if (vma_is_dax(vma)) { + spin_unlock(ptl); + /* No zero page support yet */ + } else { + /* No support for anonymous PUD pages yet */ + BUG(); + } + return 1; +} + +static void __split_huge_pud_locked(struct vm_area_struct *vma, pud_t *pud, + unsigned long haddr) +{ + VM_BUG_ON(haddr & ~HPAGE_PUD_MASK); + VM_BUG_ON_VMA(vma->vm_start > haddr, vma); + VM_BUG_ON_VMA(vma->vm_end < haddr + HPAGE_PUD_SIZE, vma); + VM_BUG_ON(!pud_trans_huge(*pud) && !pud_devmap(*pud)); + + count_vm_event(THP_SPLIT_PMD); + + pudp_huge_clear_flush_notify(vma, haddr, pud); +} + +void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud, + unsigned long address) +{ + spinlock_t *ptl; + struct mm_struct *mm = vma->vm_mm; + unsigned long haddr = address & HPAGE_PUD_MASK; + + mmu_notifier_invalidate_range_start(mm, haddr, haddr + HPAGE_PUD_SIZE); + ptl = pud_lock(mm, pud); + if (unlikely(!pud_trans_huge(*pud) && !pud_devmap(*pud))) + goto out; + __split_huge_pud_locked(vma, pud, haddr); + +out: + spin_unlock(ptl); + mmu_notifier_invalidate_range_end(mm, haddr, haddr + HPAGE_PUD_SIZE); +} +#endif /* CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD */ + static void __split_huge_zero_page_pmd(struct vm_area_struct *vma, unsigned long haddr, pmd_t *pmd) { diff --git a/mm/memory.c b/mm/memory.c index e721e8eba570..41e2a2d4b2a6 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -1001,7 +1001,7 @@ static inline int copy_pmd_range(struct mm_struct *dst_mm, struct mm_struct *src next = pmd_addr_end(addr, end); if (pmd_trans_huge(*src_pmd) || pmd_devmap(*src_pmd)) { int err; - VM_BUG_ON(next-addr != HPAGE_PMD_SIZE); + VM_BUG_ON_VMA(next-addr != HPAGE_PMD_SIZE, vma); err = copy_huge_pmd(dst_mm, src_mm, dst_pmd, src_pmd, addr, vma); if (err == -ENOMEM) @@ -1032,6 +1032,18 @@ static inline int copy_pud_range(struct mm_struct *dst_mm, struct mm_struct *src src_pud = pud_offset(src_pgd, addr); do { next = pud_addr_end(addr, end); + if (pud_trans_huge(*src_pud) || pud_devmap(*src_pud)) { + int err; + + VM_BUG_ON_VMA(next-addr != HPAGE_PUD_SIZE, vma); + err = copy_huge_pud(dst_mm, src_mm, + dst_pud, src_pud, addr, vma); + if (err == -ENOMEM) + return -ENOMEM; + if (!err) + continue; + /* fall through */ + } if (pud_none_or_clear_bad(src_pud)) continue; if (copy_pmd_range(dst_mm, src_mm, dst_pud, src_pud, @@ -1263,9 +1275,19 @@ static inline unsigned long zap_pud_range(struct mmu_gather *tlb, pud = pud_offset(pgd, addr); do { next = pud_addr_end(addr, end); + if (pud_trans_huge(*pud) || pud_devmap(*pud)) { + if (next - addr != HPAGE_PUD_SIZE) { + VM_BUG_ON_VMA(!rwsem_is_locked(&tlb->mm->mmap_sem), vma); + split_huge_pud(vma, pud, addr); + } else if (zap_huge_pud(tlb, vma, pud, addr)) + goto next; + /* fall through */ + } if (pud_none_or_clear_bad(pud)) continue; next = zap_pmd_range(tlb, vma, pud, addr, next, details); +next: + cond_resched(); } while (pud++, addr = next, addr != end); return addr; @@ -3490,6 +3512,30 @@ static inline bool vma_is_accessible(struct vm_area_struct *vma) return vma->vm_flags & (VM_READ | VM_EXEC | VM_WRITE); } +static int create_huge_pud(struct vm_fault *vmf) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + /* No support for anonymous transparent PUD pages yet */ + if (vma_is_anonymous(vmf->vma)) + return VM_FAULT_FALLBACK; + if (vmf->vma->vm_ops->huge_fault) + return vmf->vma->vm_ops->huge_fault(vmf); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + return VM_FAULT_FALLBACK; +} + +static int wp_huge_pud(struct vm_fault *vmf, pud_t orig_pud) +{ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE + /* No support for anonymous transparent PUD pages yet */ + if (vma_is_anonymous(vmf->vma)) + return VM_FAULT_FALLBACK; + if (vmf->vma->vm_ops->huge_fault) + return vmf->vma->vm_ops->huge_fault(vmf); +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ + return VM_FAULT_FALLBACK; +} + /* * These routines also need to handle stuff like marking pages dirty * and/or accessed for architectures that don't do it in hardware (most @@ -3605,14 +3651,41 @@ static int __handle_mm_fault(struct vm_area_struct *vma, unsigned long address, }; struct mm_struct *mm = vma->vm_mm; pgd_t *pgd; - pud_t *pud; int ret; pgd = pgd_offset(mm, address); - pud = pud_alloc(mm, pgd, address); - if (!pud) + + vmf.pud = pud_alloc(mm, pgd, address); + if (!vmf.pud) return VM_FAULT_OOM; - vmf.pmd = pmd_alloc(mm, pud, address); + if (pud_none(*vmf.pud) && transparent_hugepage_enabled(vma)) { + vmf.flags |= FAULT_FLAG_SIZE_PUD; + ret = create_huge_pud(&vmf); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; + } else { + pud_t orig_pud = *vmf.pud; + + barrier(); + if (pud_trans_huge(orig_pud) || pud_devmap(orig_pud)) { + unsigned int dirty = flags & FAULT_FLAG_WRITE; + + vmf.flags |= FAULT_FLAG_SIZE_PUD; + + /* NUMA case for anonymous PUDs would go here */ + + if (dirty && !pud_write(orig_pud)) { + ret = wp_huge_pud(&vmf, orig_pud); + if (!(ret & VM_FAULT_FALLBACK)) + return ret; + } else { + huge_pud_set_accessed(&vmf, orig_pud); + return 0; + } + } + } + + vmf.pmd = pmd_alloc(mm, vmf.pud, address); if (!vmf.pmd) return VM_FAULT_OOM; if (pmd_none(*vmf.pmd) && transparent_hugepage_enabled(vma)) { @@ -3743,13 +3816,14 @@ int __pud_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address) */ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) { + spinlock_t *ptl; pmd_t *new = pmd_alloc_one(mm, address); if (!new) return -ENOMEM; smp_wmb(); /* See comment in __pte_alloc */ - spin_lock(&mm->page_table_lock); + ptl = pud_lock(mm, pud); #ifndef __ARCH_HAS_4LEVEL_HACK if (!pud_present(*pud)) { mm_inc_nr_pmds(mm); @@ -3763,7 +3837,7 @@ int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address) } else /* Another has populated it */ pmd_free(mm, new); #endif /* __ARCH_HAS_4LEVEL_HACK */ - spin_unlock(&mm->page_table_lock); + spin_unlock(ptl); return 0; } #endif /* __PAGETABLE_PMD_FOLDED */ diff --git a/mm/pagewalk.c b/mm/pagewalk.c index 207244489a68..03761577ae86 100644 --- a/mm/pagewalk.c +++ b/mm/pagewalk.c @@ -78,14 +78,32 @@ static int walk_pud_range(pgd_t *pgd, unsigned long addr, unsigned long end, pud = pud_offset(pgd, addr); do { + again: next = pud_addr_end(addr, end); - if (pud_none_or_clear_bad(pud)) { + if (pud_none(*pud) || !walk->vma) { if (walk->pte_hole) err = walk->pte_hole(addr, next, walk); if (err) break; continue; } + + if (walk->pud_entry) { + spinlock_t *ptl = pud_trans_huge_lock(pud, walk->vma); + + if (ptl) { + err = walk->pud_entry(pud, addr, next, walk); + spin_unlock(ptl); + if (err) + break; + continue; + } + } + + split_huge_pud(walk->vma, pud, addr); + if (pud_none(*pud)) + goto again; + if (walk->pmd_entry || walk->pte_entry) err = walk_pmd_range(pud, addr, next, walk); if (err) diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index 71c5f9109f2a..4ed5908c65b0 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -123,6 +123,20 @@ pmd_t pmdp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, flush_pmd_tlb_range(vma, address, address + HPAGE_PMD_SIZE); return pmd; } + +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD +pud_t pudp_huge_clear_flush(struct vm_area_struct *vma, unsigned long address, + pud_t *pudp) +{ + pud_t pud; + + VM_BUG_ON(address & ~HPAGE_PUD_MASK); + VM_BUG_ON(!pud_trans_huge(*pudp) && !pud_devmap(*pudp)); + pud = pudp_huge_get_and_clear(vma->vm_mm, address, pudp); + flush_pud_tlb_range(vma, address, address + HPAGE_PUD_SIZE); + return pud; +} +#endif #endif #ifndef __HAVE_ARCH_PGTABLE_DEPOSIT -- cgit v1.2.3 From 220ced1676c490c3192dd9bc1a06be86dee88a56 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 24 Feb 2017 14:57:12 -0800 Subject: mm: fix get_user_pages() vs device-dax pud mappings A new unit test for the device-dax 1GB enabling currently fails with this warning before hanging the test thread: WARNING: CPU: 0 PID: 21 at lib/percpu-refcount.c:155 percpu_ref_switch_to_atomic_rcu+0x1e3/0x1f0 percpu ref (dax_pmem_percpu_release [dax_pmem]) <= 0 (0) after switching to atomic [..] CPU: 0 PID: 21 Comm: rcuos/1 Tainted: G O 4.10.0-rc7-next-20170207+ #944 [..] Call Trace: dump_stack+0x86/0xc3 __warn+0xcb/0xf0 warn_slowpath_fmt+0x5f/0x80 ? rcu_nocb_kthread+0x27a/0x510 ? dax_pmem_percpu_exit+0x50/0x50 [dax_pmem] percpu_ref_switch_to_atomic_rcu+0x1e3/0x1f0 ? percpu_ref_exit+0x60/0x60 rcu_nocb_kthread+0x339/0x510 ? rcu_nocb_kthread+0x27a/0x510 kthread+0x101/0x140 The get_user_pages() path needs to arrange for references to be taken against the dev_pagemap instance backing the pud mapping. Refactor the existing __gup_device_huge_pmd() to also account for the pud case. Link: http://lkml.kernel.org/r/148653181153.38226.9605457830505509385.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Dave Jiang Cc: Matthew Wilcox Cc: Ross Zwisler Cc: Kirill A. Shutemov Cc: Nilesh Choudhury Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/gup.c | 28 ++++++++++++++++++++++++---- 1 file changed, 24 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 0d4fb3ebbbac..99c7805a9693 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -154,14 +154,12 @@ static inline void get_head_page_multiple(struct page *page, int nr) SetPageReferenced(page); } -static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, +static int __gup_device_huge(unsigned long pfn, unsigned long addr, unsigned long end, struct page **pages, int *nr) { int nr_start = *nr; - unsigned long pfn = pmd_pfn(pmd); struct dev_pagemap *pgmap = NULL; - pfn += (addr & ~PMD_MASK) >> PAGE_SHIFT; do { struct page *page = pfn_to_page(pfn); @@ -180,6 +178,24 @@ static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, return 1; } +static int __gup_device_huge_pmd(pmd_t pmd, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + unsigned long fault_pfn; + + fault_pfn = pmd_pfn(pmd) + ((addr & ~PMD_MASK) >> PAGE_SHIFT); + return __gup_device_huge(fault_pfn, addr, end, pages, nr); +} + +static int __gup_device_huge_pud(pud_t pud, unsigned long addr, + unsigned long end, struct page **pages, int *nr) +{ + unsigned long fault_pfn; + + fault_pfn = pud_pfn(pud) + ((addr & ~PUD_MASK) >> PAGE_SHIFT); + return __gup_device_huge(fault_pfn, addr, end, pages, nr); +} + static noinline int gup_huge_pmd(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { @@ -251,9 +267,13 @@ static noinline int gup_huge_pud(pud_t pud, unsigned long addr, if (!pte_allows_gup(pud_val(pud), write)) return 0; + + VM_BUG_ON(!pfn_valid(pud_pfn(pud))); + if (pud_devmap(pud)) + return __gup_device_huge_pud(pud, addr, end, pages, nr); + /* hugepages are never "special" */ VM_BUG_ON(pud_flags(pud) & _PAGE_SPECIAL); - VM_BUG_ON(!pfn_valid(pud_pfn(pud))); refs = 0; head = pud_page(pud); -- cgit v1.2.3 From 897ab3e0c49e24b62e2d54d165c7afec6bbca65b Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Fri, 24 Feb 2017 14:58:22 -0800 Subject: userfaultfd: non-cooperative: add event for memory unmaps When a non-cooperative userfaultfd monitor copies pages in the background, it may encounter regions that were already unmapped. Addition of UFFD_EVENT_UNMAP allows the uffd monitor to track precisely changes in the virtual memory layout. Since there might be different uffd contexts for the affected VMAs, we first should create a temporary representation for the unmap event for each uffd context and then notify them one by one to the appropriate userfault file descriptors. The event notification occurs after the mmap_sem has been released. [arnd@arndb.de: fix nommu build] Link: http://lkml.kernel.org/r/20170203165141.3665284-1-arnd@arndb.de [mhocko@suse.com: fix nommu build] Link: http://lkml.kernel.org/r/20170202091503.GA22823@dhcp22.suse.cz Link: http://lkml.kernel.org/r/1485542673-24387-3-git-send-email-rppt@linux.vnet.ibm.com Signed-off-by: Mike Rapoport Signed-off-by: Michal Hocko Signed-off-by: Arnd Bergmann Acked-by: Hillf Danton Cc: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: Mike Kravetz Cc: Pavel Emelyanov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/mips/kernel/vdso.c | 2 +- arch/tile/mm/elf.c | 2 +- arch/x86/entry/vdso/vma.c | 2 +- arch/x86/mm/mpx.c | 4 +-- fs/aio.c | 2 +- fs/proc/vmcore.c | 4 +-- fs/userfaultfd.c | 65 ++++++++++++++++++++++++++++++++++++++++ include/linux/mm.h | 14 +++++---- include/linux/userfaultfd_k.h | 18 +++++++++++ include/uapi/linux/userfaultfd.h | 3 ++ ipc/shm.c | 8 ++--- mm/mmap.c | 46 ++++++++++++++++++---------- mm/mremap.c | 23 ++++++++------ mm/nommu.c | 7 +++-- mm/util.c | 5 +++- 15 files changed, 160 insertions(+), 45 deletions(-) (limited to 'arch/x86') diff --git a/arch/mips/kernel/vdso.c b/arch/mips/kernel/vdso.c index f9dbfb14af33..093517e85a6c 100644 --- a/arch/mips/kernel/vdso.c +++ b/arch/mips/kernel/vdso.c @@ -111,7 +111,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, int uses_interp) base = mmap_region(NULL, STACK_TOP, PAGE_SIZE, VM_READ|VM_WRITE|VM_EXEC| VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, - 0); + 0, NULL); if (IS_ERR_VALUE(base)) { ret = base; goto out; diff --git a/arch/tile/mm/elf.c b/arch/tile/mm/elf.c index 6225cc998db1..889901824400 100644 --- a/arch/tile/mm/elf.c +++ b/arch/tile/mm/elf.c @@ -143,7 +143,7 @@ int arch_setup_additional_pages(struct linux_binprm *bprm, unsigned long addr = MEM_USER_INTRPT; addr = mmap_region(NULL, addr, INTRPT_SIZE, VM_READ|VM_EXEC| - VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0); + VM_MAYREAD|VM_MAYWRITE|VM_MAYEXEC, 0, NULL); if (addr > (unsigned long) -PAGE_SIZE) retval = (int) addr; } diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 10820f6cefbf..572cee3fccff 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -186,7 +186,7 @@ static int map_vdso(const struct vdso_image *image, unsigned long addr) if (IS_ERR(vma)) { ret = PTR_ERR(vma); - do_munmap(mm, text_start, image->size); + do_munmap(mm, text_start, image->size, NULL); } else { current->mm->context.vdso = (void __user *)text_start; current->mm->context.vdso_image = image; diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index aad4ac386f98..c98079684bdb 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -51,7 +51,7 @@ static unsigned long mpx_mmap(unsigned long len) down_write(&mm->mmap_sem); addr = do_mmap(NULL, 0, len, PROT_READ | PROT_WRITE, - MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate); + MAP_ANONYMOUS | MAP_PRIVATE, VM_MPX, 0, &populate, NULL); up_write(&mm->mmap_sem); if (populate) mm_populate(addr, populate); @@ -893,7 +893,7 @@ static int unmap_entire_bt(struct mm_struct *mm, * avoid recursion, do_munmap() will check whether it comes * from one bounds table through VM_MPX flag. */ - return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm)); + return do_munmap(mm, bt_addr, mpx_bt_size_bytes(mm), NULL); } static int try_unmap_single_bt(struct mm_struct *mm, diff --git a/fs/aio.c b/fs/aio.c index 873b4ca82ccb..7e2ab9c8e39c 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -512,7 +512,7 @@ static int aio_setup_ring(struct kioctx *ctx) ctx->mmap_base = do_mmap_pgoff(ctx->aio_ring_file, 0, ctx->mmap_size, PROT_READ | PROT_WRITE, - MAP_SHARED, 0, &unused); + MAP_SHARED, 0, &unused, NULL); up_write(&mm->mmap_sem); if (IS_ERR((void *)ctx->mmap_base)) { ctx->mmap_size = 0; diff --git a/fs/proc/vmcore.c b/fs/proc/vmcore.c index f52d8e857ff7..885d445afa0d 100644 --- a/fs/proc/vmcore.c +++ b/fs/proc/vmcore.c @@ -388,7 +388,7 @@ static int remap_oldmem_pfn_checked(struct vm_area_struct *vma, } return 0; fail: - do_munmap(vma->vm_mm, from, len); + do_munmap(vma->vm_mm, from, len, NULL); return -EAGAIN; } @@ -481,7 +481,7 @@ static int mmap_vmcore(struct file *file, struct vm_area_struct *vma) return 0; fail: - do_munmap(vma->vm_mm, vma->vm_start, len); + do_munmap(vma->vm_mm, vma->vm_start, len, NULL); return -EAGAIN; } #else diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 8fe601b4875e..4c78458ea78d 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -71,6 +71,13 @@ struct userfaultfd_fork_ctx { struct list_head list; }; +struct userfaultfd_unmap_ctx { + struct userfaultfd_ctx *ctx; + unsigned long start; + unsigned long end; + struct list_head list; +}; + struct userfaultfd_wait_queue { struct uffd_msg msg; wait_queue_t wq; @@ -709,6 +716,64 @@ void userfaultfd_remove(struct vm_area_struct *vma, down_read(&mm->mmap_sem); } +static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps, + unsigned long start, unsigned long end) +{ + struct userfaultfd_unmap_ctx *unmap_ctx; + + list_for_each_entry(unmap_ctx, unmaps, list) + if (unmap_ctx->ctx == ctx && unmap_ctx->start == start && + unmap_ctx->end == end) + return true; + + return false; +} + +int userfaultfd_unmap_prep(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *unmaps) +{ + for ( ; vma && vma->vm_start < end; vma = vma->vm_next) { + struct userfaultfd_unmap_ctx *unmap_ctx; + struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx; + + if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) || + has_unmap_ctx(ctx, unmaps, start, end)) + continue; + + unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL); + if (!unmap_ctx) + return -ENOMEM; + + userfaultfd_ctx_get(ctx); + unmap_ctx->ctx = ctx; + unmap_ctx->start = start; + unmap_ctx->end = end; + list_add_tail(&unmap_ctx->list, unmaps); + } + + return 0; +} + +void userfaultfd_unmap_complete(struct mm_struct *mm, struct list_head *uf) +{ + struct userfaultfd_unmap_ctx *ctx, *n; + struct userfaultfd_wait_queue ewq; + + list_for_each_entry_safe(ctx, n, uf, list) { + msg_init(&ewq.msg); + + ewq.msg.event = UFFD_EVENT_UNMAP; + ewq.msg.arg.remove.start = ctx->start; + ewq.msg.arg.remove.end = ctx->end; + + userfaultfd_event_wait_completion(ctx->ctx, &ewq); + + list_del(&ctx->list); + kfree(ctx); + } +} + static int userfaultfd_release(struct inode *inode, struct file *file) { struct userfaultfd_ctx *ctx = file->private_data; diff --git a/include/linux/mm.h b/include/linux/mm.h index c65aa43b5712..c6fcba1d1ae5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2090,18 +2090,22 @@ extern int install_special_mapping(struct mm_struct *mm, extern unsigned long get_unmapped_area(struct file *, unsigned long, unsigned long, unsigned long, unsigned long); extern unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff); + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf); extern unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate); -extern int do_munmap(struct mm_struct *, unsigned long, size_t); + vm_flags_t vm_flags, unsigned long pgoff, unsigned long *populate, + struct list_head *uf); +extern int do_munmap(struct mm_struct *, unsigned long, size_t, + struct list_head *uf); static inline unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, - unsigned long pgoff, unsigned long *populate) + unsigned long pgoff, unsigned long *populate, + struct list_head *uf) { - return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate); + return do_mmap(file, addr, len, prot, flags, 0, pgoff, populate, uf); } #ifdef CONFIG_MMU diff --git a/include/linux/userfaultfd_k.h b/include/linux/userfaultfd_k.h index 2521542f6c07..a40be5d0661b 100644 --- a/include/linux/userfaultfd_k.h +++ b/include/linux/userfaultfd_k.h @@ -66,6 +66,12 @@ extern void userfaultfd_remove(struct vm_area_struct *vma, unsigned long start, unsigned long end); +extern int userfaultfd_unmap_prep(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *uf); +extern void userfaultfd_unmap_complete(struct mm_struct *mm, + struct list_head *uf); + #else /* CONFIG_USERFAULTFD */ /* mm helpers */ @@ -118,6 +124,18 @@ static inline void userfaultfd_remove(struct vm_area_struct *vma, unsigned long end) { } + +static inline int userfaultfd_unmap_prep(struct vm_area_struct *vma, + unsigned long start, unsigned long end, + struct list_head *uf) +{ + return 0; +} + +static inline void userfaultfd_unmap_complete(struct mm_struct *mm, + struct list_head *uf) +{ +} #endif /* CONFIG_USERFAULTFD */ #endif /* _LINUX_USERFAULTFD_K_H */ diff --git a/include/uapi/linux/userfaultfd.h b/include/uapi/linux/userfaultfd.h index b742c40c2880..3b059530dac9 100644 --- a/include/uapi/linux/userfaultfd.h +++ b/include/uapi/linux/userfaultfd.h @@ -21,6 +21,7 @@ #define UFFD_API_FEATURES (UFFD_FEATURE_EVENT_FORK | \ UFFD_FEATURE_EVENT_REMAP | \ UFFD_FEATURE_EVENT_REMOVE | \ + UFFD_FEATURE_EVENT_UNMAP | \ UFFD_FEATURE_MISSING_HUGETLBFS | \ UFFD_FEATURE_MISSING_SHMEM) #define UFFD_API_IOCTLS \ @@ -110,6 +111,7 @@ struct uffd_msg { #define UFFD_EVENT_FORK 0x13 #define UFFD_EVENT_REMAP 0x14 #define UFFD_EVENT_REMOVE 0x15 +#define UFFD_EVENT_UNMAP 0x16 /* flags for UFFD_EVENT_PAGEFAULT */ #define UFFD_PAGEFAULT_FLAG_WRITE (1<<0) /* If this was a write fault */ @@ -158,6 +160,7 @@ struct uffdio_api { #define UFFD_FEATURE_EVENT_REMOVE (1<<3) #define UFFD_FEATURE_MISSING_HUGETLBFS (1<<4) #define UFFD_FEATURE_MISSING_SHMEM (1<<5) +#define UFFD_FEATURE_EVENT_UNMAP (1<<6) __u64 features; __u64 ioctls; diff --git a/ipc/shm.c b/ipc/shm.c index 7f6537b84ef5..d7805acb44fd 100644 --- a/ipc/shm.c +++ b/ipc/shm.c @@ -1222,7 +1222,7 @@ long do_shmat(int shmid, char __user *shmaddr, int shmflg, ulong *raddr, goto invalid; } - addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate); + addr = do_mmap_pgoff(file, addr, size, prot, flags, 0, &populate, NULL); *raddr = addr; err = 0; if (IS_ERR_VALUE(addr)) @@ -1329,7 +1329,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) */ file = vma->vm_file; size = i_size_read(file_inode(vma->vm_file)); - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); /* * We discovered the size of the shm segment, so * break out of here and fall through to the next @@ -1356,7 +1356,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) if ((vma->vm_ops == &shm_vm_ops) && ((vma->vm_start - addr)/PAGE_SIZE == vma->vm_pgoff) && (vma->vm_file == file)) - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); vma = next; } @@ -1365,7 +1365,7 @@ SYSCALL_DEFINE1(shmdt, char __user *, shmaddr) * given */ if (vma && vma->vm_start == addr && vma->vm_ops == &shm_vm_ops) { - do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start); + do_munmap(mm, vma->vm_start, vma->vm_end - vma->vm_start, NULL); retval = 0; } diff --git a/mm/mmap.c b/mm/mmap.c index 13d16a2b7623..1cec28d20583 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -176,7 +176,7 @@ static struct vm_area_struct *remove_vma(struct vm_area_struct *vma) return next; } -static int do_brk(unsigned long addr, unsigned long len); +static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf); SYSCALL_DEFINE1(brk, unsigned long, brk) { @@ -185,6 +185,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) struct mm_struct *mm = current->mm; unsigned long min_brk; bool populate; + LIST_HEAD(uf); if (down_write_killable(&mm->mmap_sem)) return -EINTR; @@ -222,7 +223,7 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) /* Always allow shrinking brk. */ if (brk <= mm->brk) { - if (!do_munmap(mm, newbrk, oldbrk-newbrk)) + if (!do_munmap(mm, newbrk, oldbrk-newbrk, &uf)) goto set_brk; goto out; } @@ -232,13 +233,14 @@ SYSCALL_DEFINE1(brk, unsigned long, brk) goto out; /* Ok, looks good - let it rip. */ - if (do_brk(oldbrk, newbrk-oldbrk) < 0) + if (do_brk(oldbrk, newbrk-oldbrk, &uf) < 0) goto out; set_brk: mm->brk = brk; populate = newbrk > oldbrk && (mm->def_flags & VM_LOCKED) != 0; up_write(&mm->mmap_sem); + userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate(oldbrk, newbrk - oldbrk); return brk; @@ -1304,7 +1306,8 @@ static inline int mlock_future_check(struct mm_struct *mm, unsigned long do_mmap(struct file *file, unsigned long addr, unsigned long len, unsigned long prot, unsigned long flags, vm_flags_t vm_flags, - unsigned long pgoff, unsigned long *populate) + unsigned long pgoff, unsigned long *populate, + struct list_head *uf) { struct mm_struct *mm = current->mm; int pkey = 0; @@ -1447,7 +1450,7 @@ unsigned long do_mmap(struct file *file, unsigned long addr, vm_flags |= VM_NORESERVE; } - addr = mmap_region(file, addr, len, vm_flags, pgoff); + addr = mmap_region(file, addr, len, vm_flags, pgoff, uf); if (!IS_ERR_VALUE(addr) && ((vm_flags & VM_LOCKED) || (flags & (MAP_POPULATE | MAP_NONBLOCK)) == MAP_POPULATE)) @@ -1583,7 +1586,8 @@ static inline int accountable_mapping(struct file *file, vm_flags_t vm_flags) } unsigned long mmap_region(struct file *file, unsigned long addr, - unsigned long len, vm_flags_t vm_flags, unsigned long pgoff) + unsigned long len, vm_flags_t vm_flags, unsigned long pgoff, + struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; @@ -1609,7 +1613,7 @@ unsigned long mmap_region(struct file *file, unsigned long addr, /* Clear old maps */ while (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) { - if (do_munmap(mm, addr, len)) + if (do_munmap(mm, addr, len, uf)) return -ENOMEM; } @@ -2579,7 +2583,8 @@ int split_vma(struct mm_struct *mm, struct vm_area_struct *vma, * work. This now handles partial unmappings. * Jeremy Fitzhardinge */ -int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) +int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, + struct list_head *uf) { unsigned long end; struct vm_area_struct *vma, *prev, *last; @@ -2603,6 +2608,13 @@ int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) if (vma->vm_start >= end) return 0; + if (uf) { + int error = userfaultfd_unmap_prep(vma, start, end, uf); + + if (error) + return error; + } + /* * If we need to split any vma, do it now to save pain later. * @@ -2668,12 +2680,14 @@ int vm_munmap(unsigned long start, size_t len) { int ret; struct mm_struct *mm = current->mm; + LIST_HEAD(uf); if (down_write_killable(&mm->mmap_sem)) return -EINTR; - ret = do_munmap(mm, start, len); + ret = do_munmap(mm, start, len, &uf); up_write(&mm->mmap_sem); + userfaultfd_unmap_complete(mm, &uf); return ret; } EXPORT_SYMBOL(vm_munmap); @@ -2773,7 +2787,7 @@ SYSCALL_DEFINE5(remap_file_pages, unsigned long, start, unsigned long, size, file = get_file(vma->vm_file); ret = do_mmap_pgoff(vma->vm_file, start, size, - prot, flags, pgoff, &populate); + prot, flags, pgoff, &populate, NULL); fput(file); out: up_write(&mm->mmap_sem); @@ -2799,7 +2813,7 @@ static inline void verify_mm_writelocked(struct mm_struct *mm) * anonymous maps. eventually we may be able to do some * brk-specific accounting here. */ -static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags) +static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long flags, struct list_head *uf) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma, *prev; @@ -2838,7 +2852,7 @@ static int do_brk_flags(unsigned long addr, unsigned long request, unsigned long */ while (find_vma_links(mm, addr, addr + len, &prev, &rb_link, &rb_parent)) { - if (do_munmap(mm, addr, len)) + if (do_munmap(mm, addr, len, uf)) return -ENOMEM; } @@ -2885,9 +2899,9 @@ out: return 0; } -static int do_brk(unsigned long addr, unsigned long len) +static int do_brk(unsigned long addr, unsigned long len, struct list_head *uf) { - return do_brk_flags(addr, len, 0); + return do_brk_flags(addr, len, 0, uf); } int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags) @@ -2895,13 +2909,15 @@ int vm_brk_flags(unsigned long addr, unsigned long len, unsigned long flags) struct mm_struct *mm = current->mm; int ret; bool populate; + LIST_HEAD(uf); if (down_write_killable(&mm->mmap_sem)) return -EINTR; - ret = do_brk_flags(addr, len, flags); + ret = do_brk_flags(addr, len, flags, &uf); populate = ((mm->def_flags & VM_LOCKED) != 0); up_write(&mm->mmap_sem); + userfaultfd_unmap_complete(mm, &uf); if (populate && !ret) mm_populate(addr, len); return ret; diff --git a/mm/mremap.c b/mm/mremap.c index 8779928d6a70..8233b0105c82 100644 --- a/mm/mremap.c +++ b/mm/mremap.c @@ -252,7 +252,8 @@ unsigned long move_page_tables(struct vm_area_struct *vma, static unsigned long move_vma(struct vm_area_struct *vma, unsigned long old_addr, unsigned long old_len, unsigned long new_len, unsigned long new_addr, - bool *locked, struct vm_userfaultfd_ctx *uf) + bool *locked, struct vm_userfaultfd_ctx *uf, + struct list_head *uf_unmap) { struct mm_struct *mm = vma->vm_mm; struct vm_area_struct *new_vma; @@ -341,7 +342,7 @@ static unsigned long move_vma(struct vm_area_struct *vma, if (unlikely(vma->vm_flags & VM_PFNMAP)) untrack_pfn_moved(vma); - if (do_munmap(mm, old_addr, old_len) < 0) { + if (do_munmap(mm, old_addr, old_len, uf_unmap) < 0) { /* OOM: unable to split vma, just get accounts right */ vm_unacct_memory(excess >> PAGE_SHIFT); excess = 0; @@ -417,7 +418,8 @@ static struct vm_area_struct *vma_to_resize(unsigned long addr, static unsigned long mremap_to(unsigned long addr, unsigned long old_len, unsigned long new_addr, unsigned long new_len, bool *locked, - struct vm_userfaultfd_ctx *uf) + struct vm_userfaultfd_ctx *uf, + struct list_head *uf_unmap) { struct mm_struct *mm = current->mm; struct vm_area_struct *vma; @@ -435,12 +437,12 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, if (addr + old_len > new_addr && new_addr + new_len > addr) goto out; - ret = do_munmap(mm, new_addr, new_len); + ret = do_munmap(mm, new_addr, new_len, NULL); if (ret) goto out; if (old_len >= new_len) { - ret = do_munmap(mm, addr+new_len, old_len - new_len); + ret = do_munmap(mm, addr+new_len, old_len - new_len, uf_unmap); if (ret && old_len != new_len) goto out; old_len = new_len; @@ -462,7 +464,8 @@ static unsigned long mremap_to(unsigned long addr, unsigned long old_len, if (offset_in_page(ret)) goto out1; - ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf); + ret = move_vma(vma, addr, old_len, new_len, new_addr, locked, uf, + uf_unmap); if (!(offset_in_page(ret))) goto out; out1: @@ -502,6 +505,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, unsigned long charged = 0; bool locked = false; struct vm_userfaultfd_ctx uf = NULL_VM_UFFD_CTX; + LIST_HEAD(uf_unmap); if (flags & ~(MREMAP_FIXED | MREMAP_MAYMOVE)) return ret; @@ -528,7 +532,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, if (flags & MREMAP_FIXED) { ret = mremap_to(addr, old_len, new_addr, new_len, - &locked, &uf); + &locked, &uf, &uf_unmap); goto out; } @@ -538,7 +542,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, * do_munmap does all the needed commit accounting */ if (old_len >= new_len) { - ret = do_munmap(mm, addr+new_len, old_len - new_len); + ret = do_munmap(mm, addr+new_len, old_len - new_len, &uf_unmap); if (ret && old_len != new_len) goto out; ret = addr; @@ -598,7 +602,7 @@ SYSCALL_DEFINE5(mremap, unsigned long, addr, unsigned long, old_len, } ret = move_vma(vma, addr, old_len, new_len, new_addr, - &locked, &uf); + &locked, &uf, &uf_unmap); } out: if (offset_in_page(ret)) { @@ -609,5 +613,6 @@ out: if (locked && new_len > old_len) mm_populate(new_addr + old_len, new_len - old_len); mremap_userfaultfd_complete(&uf, addr, new_addr, old_len); + userfaultfd_unmap_complete(mm, &uf_unmap); return ret; } diff --git a/mm/nommu.c b/mm/nommu.c index 215c62296028..fe9f4fa4a7a7 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -1205,7 +1205,8 @@ unsigned long do_mmap(struct file *file, unsigned long flags, vm_flags_t vm_flags, unsigned long pgoff, - unsigned long *populate) + unsigned long *populate, + struct list_head *uf) { struct vm_area_struct *vma; struct vm_region *region; @@ -1577,7 +1578,7 @@ static int shrink_vma(struct mm_struct *mm, * - under NOMMU conditions the chunk to be unmapped must be backed by a single * VMA, though it need not cover the whole VMA */ -int do_munmap(struct mm_struct *mm, unsigned long start, size_t len) +int do_munmap(struct mm_struct *mm, unsigned long start, size_t len, struct list_head *uf) { struct vm_area_struct *vma; unsigned long end; @@ -1643,7 +1644,7 @@ int vm_munmap(unsigned long addr, size_t len) int ret; down_write(&mm->mmap_sem); - ret = do_munmap(mm, addr, len); + ret = do_munmap(mm, addr, len, NULL); up_write(&mm->mmap_sem); return ret; } diff --git a/mm/util.c b/mm/util.c index 3cb2164f4099..b8f538863b5a 100644 --- a/mm/util.c +++ b/mm/util.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include @@ -297,14 +298,16 @@ unsigned long vm_mmap_pgoff(struct file *file, unsigned long addr, unsigned long ret; struct mm_struct *mm = current->mm; unsigned long populate; + LIST_HEAD(uf); ret = security_mmap_file(file, prot, flag); if (!ret) { if (down_write_killable(&mm->mmap_sem)) return -EINTR; ret = do_mmap_pgoff(file, addr, len, prot, flag, pgoff, - &populate); + &populate, &uf); up_write(&mm->mmap_sem); + userfaultfd_unmap_complete(mm, &uf); if (populate) mm_populate(ret, populate); } -- cgit v1.2.3 From 712c604dcdf8186295e2af694adf52c6842ad100 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 24 Feb 2017 14:58:44 -0800 Subject: mm: wire up GFP flag passing in dma_alloc_from_contiguous The callers of the DMA alloc functions already provide the proper context GFP flags. Make sure to pass them through to the CMA allocator, to make the CMA compaction context aware. Link: http://lkml.kernel.org/r/20170127172328.18574-3-l.stach@pengutronix.de Signed-off-by: Lucas Stach Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: Radim Krcmar Cc: Catalin Marinas Cc: Will Deacon Cc: Chris Zankel Cc: Ralf Baechle Cc: Paolo Bonzini Cc: Alexander Graf Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/arm/mm/dma-mapping.c | 16 +++++++++------- arch/arm64/mm/dma-mapping.c | 4 ++-- arch/mips/mm/dma-default.c | 4 ++-- arch/x86/kernel/pci-dma.c | 3 ++- arch/xtensa/kernel/pci-dma.c | 3 ++- drivers/base/dma-contiguous.c | 5 +++-- drivers/iommu/amd_iommu.c | 2 +- drivers/iommu/intel-iommu.c | 2 +- include/linux/dma-contiguous.h | 4 ++-- 9 files changed, 24 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/mm/dma-mapping.c b/arch/arm/mm/dma-mapping.c index 82d3e79ec82b..6ffdf17e0d5c 100644 --- a/arch/arm/mm/dma-mapping.c +++ b/arch/arm/mm/dma-mapping.c @@ -349,7 +349,7 @@ static void __dma_free_buffer(struct page *page, size_t size) static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page, const void *caller, bool want_vaddr, - int coherent_flag); + int coherent_flag, gfp_t gfp); static void *__alloc_remap_buffer(struct device *dev, size_t size, gfp_t gfp, pgprot_t prot, struct page **ret_page, @@ -420,7 +420,8 @@ static int __init atomic_pool_init(void) */ if (dev_get_cma_area(NULL)) ptr = __alloc_from_contiguous(NULL, atomic_pool_size, prot, - &page, atomic_pool_init, true, NORMAL); + &page, atomic_pool_init, true, NORMAL, + GFP_KERNEL); else ptr = __alloc_remap_buffer(NULL, atomic_pool_size, gfp, prot, &page, atomic_pool_init, true); @@ -594,14 +595,14 @@ static int __free_from_pool(void *start, size_t size) static void *__alloc_from_contiguous(struct device *dev, size_t size, pgprot_t prot, struct page **ret_page, const void *caller, bool want_vaddr, - int coherent_flag) + int coherent_flag, gfp_t gfp) { unsigned long order = get_order(size); size_t count = size >> PAGE_SHIFT; struct page *page; void *ptr = NULL; - page = dma_alloc_from_contiguous(dev, count, order); + page = dma_alloc_from_contiguous(dev, count, order, gfp); if (!page) return NULL; @@ -655,7 +656,7 @@ static inline pgprot_t __get_dma_pgprot(unsigned long attrs, pgprot_t prot) #define __get_dma_pgprot(attrs, prot) __pgprot(0) #define __alloc_remap_buffer(dev, size, gfp, prot, ret, c, wv) NULL #define __alloc_from_pool(size, ret_page) NULL -#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag) NULL +#define __alloc_from_contiguous(dev, size, prot, ret, c, wv, coherent_flag, gfp) NULL #define __free_from_pool(cpu_addr, size) do { } while (0) #define __free_from_contiguous(dev, page, cpu_addr, size, wv) do { } while (0) #define __dma_free_remap(cpu_addr, size) do { } while (0) @@ -697,7 +698,8 @@ static void *cma_allocator_alloc(struct arm_dma_alloc_args *args, { return __alloc_from_contiguous(args->dev, args->size, args->prot, ret_page, args->caller, - args->want_vaddr, args->coherent_flag); + args->want_vaddr, args->coherent_flag, + args->gfp); } static void cma_allocator_free(struct arm_dma_free_args *args) @@ -1312,7 +1314,7 @@ static struct page **__iommu_alloc_buffer(struct device *dev, size_t size, unsigned long order = get_order(size); struct page *page; - page = dma_alloc_from_contiguous(dev, count, order); + page = dma_alloc_from_contiguous(dev, count, order, gfp); if (!page) goto error; diff --git a/arch/arm64/mm/dma-mapping.c b/arch/arm64/mm/dma-mapping.c index 351f7595cb3e..aff1d0afeb1e 100644 --- a/arch/arm64/mm/dma-mapping.c +++ b/arch/arm64/mm/dma-mapping.c @@ -107,7 +107,7 @@ static void *__dma_alloc_coherent(struct device *dev, size_t size, void *addr; page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, - get_order(size)); + get_order(size), flags); if (!page) return NULL; @@ -390,7 +390,7 @@ static int __init atomic_pool_init(void) if (dev_get_cma_area(NULL)) page = dma_alloc_from_contiguous(NULL, nr_pages, - pool_size_order); + pool_size_order, GFP_KERNEL); else page = alloc_pages(GFP_DMA, pool_size_order); diff --git a/arch/mips/mm/dma-default.c b/arch/mips/mm/dma-default.c index a39c36af97ad..1895a692efd4 100644 --- a/arch/mips/mm/dma-default.c +++ b/arch/mips/mm/dma-default.c @@ -148,8 +148,8 @@ static void *mips_dma_alloc_coherent(struct device *dev, size_t size, gfp = massage_gfp_flags(dev, gfp); if (IS_ENABLED(CONFIG_DMA_CMA) && gfpflags_allow_blocking(gfp)) - page = dma_alloc_from_contiguous(dev, - count, get_order(size)); + page = dma_alloc_from_contiguous(dev, count, get_order(size), + gfp); if (!page) page = alloc_pages(gfp, get_order(size)); diff --git a/arch/x86/kernel/pci-dma.c b/arch/x86/kernel/pci-dma.c index d30c37750765..d5c223c9cf11 100644 --- a/arch/x86/kernel/pci-dma.c +++ b/arch/x86/kernel/pci-dma.c @@ -91,7 +91,8 @@ again: page = NULL; /* CMA can be used only in the context which permits sleeping */ if (gfpflags_allow_blocking(flag)) { - page = dma_alloc_from_contiguous(dev, count, get_order(size)); + page = dma_alloc_from_contiguous(dev, count, get_order(size), + flag); if (page && page_to_phys(page) + size > dma_mask) { dma_release_from_contiguous(dev, page, count); page = NULL; diff --git a/arch/xtensa/kernel/pci-dma.c b/arch/xtensa/kernel/pci-dma.c index 70e362e6038e..34c1f9fa6acc 100644 --- a/arch/xtensa/kernel/pci-dma.c +++ b/arch/xtensa/kernel/pci-dma.c @@ -158,7 +158,8 @@ static void *xtensa_dma_alloc(struct device *dev, size_t size, flag |= GFP_DMA; if (gfpflags_allow_blocking(flag)) - page = dma_alloc_from_contiguous(dev, count, get_order(size)); + page = dma_alloc_from_contiguous(dev, count, get_order(size), + flag); if (!page) page = alloc_pages(flag, get_order(size)); diff --git a/drivers/base/dma-contiguous.c b/drivers/base/dma-contiguous.c index d1a9cbabc627..b55804cac4c4 100644 --- a/drivers/base/dma-contiguous.c +++ b/drivers/base/dma-contiguous.c @@ -181,6 +181,7 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, * @dev: Pointer to device for which the allocation is performed. * @count: Requested number of pages. * @align: Requested alignment of pages (in PAGE_SIZE order). + * @gfp_mask: GFP flags to use for this allocation. * * This function allocates memory buffer for specified device. It uses * device specific contiguous memory area if available or the default @@ -188,12 +189,12 @@ int __init dma_contiguous_reserve_area(phys_addr_t size, phys_addr_t base, * function. */ struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int align) + unsigned int align, gfp_t gfp_mask) { if (align > CONFIG_CMA_ALIGNMENT) align = CONFIG_CMA_ALIGNMENT; - return cma_alloc(dev_get_cma_area(dev), count, align, GFP_KERNEL); + return cma_alloc(dev_get_cma_area(dev), count, align, gfp_mask); } /** diff --git a/drivers/iommu/amd_iommu.c b/drivers/iommu/amd_iommu.c index 1b5b8c5361c5..09bd3b290bb8 100644 --- a/drivers/iommu/amd_iommu.c +++ b/drivers/iommu/amd_iommu.c @@ -2672,7 +2672,7 @@ static void *alloc_coherent(struct device *dev, size_t size, return NULL; page = dma_alloc_from_contiguous(dev, size >> PAGE_SHIFT, - get_order(size)); + get_order(size), flag); if (!page) return NULL; } diff --git a/drivers/iommu/intel-iommu.c b/drivers/iommu/intel-iommu.c index f5e02f8e7371..a8f7ae0eb7a4 100644 --- a/drivers/iommu/intel-iommu.c +++ b/drivers/iommu/intel-iommu.c @@ -3829,7 +3829,7 @@ static void *intel_alloc_coherent(struct device *dev, size_t size, if (gfpflags_allow_blocking(flags)) { unsigned int count = size >> PAGE_SHIFT; - page = dma_alloc_from_contiguous(dev, count, order); + page = dma_alloc_from_contiguous(dev, count, order, flags); if (page && iommu_no_mapping(dev) && page_to_phys(page) + size > dev->coherent_dma_mask) { dma_release_from_contiguous(dev, page, count); diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index fec734df1524..b67bf6ac907d 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -112,7 +112,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, } struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order); + unsigned int order, gfp_t gfp_mask); bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count); @@ -145,7 +145,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size, static inline struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, - unsigned int order) + unsigned int order, gfp_t gfp_mask) { return NULL; } -- cgit v1.2.3 From 76062b4ae2ea54fcfb8fce6940921a90f33f38da Mon Sep 17 00:00:00 2001 From: Shanth Murthy Date: Mon, 13 Feb 2017 04:02:52 -0800 Subject: platform/x86: intel_pmc_ipc: read s0ix residency API This patch adds a new API to indicate S0ix residency in usec. It utilizes the PMC Global Control Registers (GCR) to read deep and shallow S0ix residency. PMC MMIO resources: o Lower 4kB: IPC1 (PMC inter-processor communication) interface o Upper 4kB: GCR (Global Control Registers) This enables the power management framework to take corrective actions when the platform fails to enter S0ix after kernel freeze as part of the suspend to idle flow. (echo freeze > /sys/power/state). This is expected to be used with a S0ix failsafe framework such as: [rajneesh: folded in "fix division in 32-bit case" from Andy Shevchenko] Signed-off-by: Rajneesh Bhardwaj Signed-off-by: Shanth Murthy [andy: fixed kbuild error, removed "total" from variables, fixed macro] Signed-off-by: Andy Shevchenko --- arch/x86/include/asm/intel_pmc_ipc.h | 6 ++++ drivers/platform/x86/intel_pmc_ipc.c | 64 ++++++++++++++++++++++++++++++++---- 2 files changed, 64 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/intel_pmc_ipc.h b/arch/x86/include/asm/intel_pmc_ipc.h index cd0310e186f4..4291b6a5ddf7 100644 --- a/arch/x86/include/asm/intel_pmc_ipc.h +++ b/arch/x86/include/asm/intel_pmc_ipc.h @@ -30,6 +30,7 @@ int intel_pmc_ipc_raw_cmd(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen, u32 dptr, u32 sptr); int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, u32 *out, u32 outlen); +int intel_pmc_s0ix_counter_read(u64 *data); #else @@ -50,6 +51,11 @@ static inline int intel_pmc_ipc_command(u32 cmd, u32 sub, u8 *in, u32 inlen, return -EINVAL; } +static inline int intel_pmc_s0ix_counter_read(u64 *data) +{ + return -EINVAL; +} + #endif /*CONFIG_INTEL_PMC_IPC*/ #endif diff --git a/drivers/platform/x86/intel_pmc_ipc.c b/drivers/platform/x86/intel_pmc_ipc.c index 59a86121105b..9dae8434bd78 100644 --- a/drivers/platform/x86/intel_pmc_ipc.c +++ b/drivers/platform/x86/intel_pmc_ipc.c @@ -32,7 +32,10 @@ #include #include #include +#include + #include + #include /* @@ -54,6 +57,18 @@ #define IPC_WRITE_BUFFER 0x80 #define IPC_READ_BUFFER 0x90 +/* PMC Global Control Registers */ +#define GCR_TELEM_DEEP_S0IX_OFFSET 0x1078 +#define GCR_TELEM_SHLW_S0IX_OFFSET 0x1080 + +/* Residency with clock rate at 19.2MHz to usecs */ +#define S0IX_RESIDENCY_IN_USECS(d, s) \ +({ \ + u64 result = 10ull * ((d) + (s)); \ + do_div(result, 192); \ + result; \ +}) + /* * 16-byte buffer for sending data associated with IPC command. */ @@ -68,7 +83,7 @@ #define PLAT_RESOURCE_IPC_INDEX 0 #define PLAT_RESOURCE_IPC_SIZE 0x1000 #define PLAT_RESOURCE_GCR_OFFSET 0x1008 -#define PLAT_RESOURCE_GCR_SIZE 0x4 +#define PLAT_RESOURCE_GCR_SIZE 0x1000 #define PLAT_RESOURCE_BIOS_DATA_INDEX 1 #define PLAT_RESOURCE_BIOS_IFACE_INDEX 2 #define PLAT_RESOURCE_TELEM_SSRAM_INDEX 3 @@ -113,6 +128,7 @@ static struct intel_pmc_ipc_dev { /* gcr */ resource_size_t gcr_base; int gcr_size; + bool has_gcr_regs; /* punit */ struct platform_device *punit_dev; @@ -178,6 +194,11 @@ static inline u32 ipc_data_readl(u32 offset) return readl(ipcdev.ipc_base + IPC_READ_BUFFER + offset); } +static inline u64 gcr_data_readq(u32 offset) +{ + return readq(ipcdev.ipc_base + offset); +} + static int intel_pmc_ipc_check_status(void) { int status; @@ -710,7 +731,8 @@ static int ipc_plat_get_res(struct platform_device *pdev) dev_err(&pdev->dev, "Failed to get ipc resource\n"); return -ENXIO; } - size = PLAT_RESOURCE_IPC_SIZE; + size = PLAT_RESOURCE_IPC_SIZE + PLAT_RESOURCE_GCR_SIZE; + if (!request_mem_region(res->start, size, pdev->name)) { dev_err(&pdev->dev, "Failed to request ipc resource\n"); return -EBUSY; @@ -746,6 +768,28 @@ static int ipc_plat_get_res(struct platform_device *pdev) return 0; } +/** + * intel_pmc_s0ix_counter_read() - Read S0ix residency. + * @data: Out param that contains current S0ix residency count. + * + * Return: an error code or 0 on success. + */ +int intel_pmc_s0ix_counter_read(u64 *data) +{ + u64 deep, shlw; + + if (!ipcdev.has_gcr_regs) + return -EACCES; + + deep = gcr_data_readq(GCR_TELEM_DEEP_S0IX_OFFSET); + shlw = gcr_data_readq(GCR_TELEM_SHLW_S0IX_OFFSET); + + *data = S0IX_RESIDENCY_IN_USECS(deep, shlw); + + return 0; +} +EXPORT_SYMBOL_GPL(intel_pmc_s0ix_counter_read); + #ifdef CONFIG_ACPI static const struct acpi_device_id ipc_acpi_ids[] = { { "INT34D2", 0}, @@ -795,6 +839,8 @@ static int ipc_plat_probe(struct platform_device *pdev) goto err_sys; } + ipcdev.has_gcr_regs = true; + return 0; err_sys: free_irq(ipcdev.irq, &ipcdev); @@ -806,8 +852,11 @@ err_device: iounmap(ipcdev.ipc_base); res = platform_get_resource(pdev, IORESOURCE_MEM, PLAT_RESOURCE_IPC_INDEX); - if (res) - release_mem_region(res->start, PLAT_RESOURCE_IPC_SIZE); + if (res) { + release_mem_region(res->start, + PLAT_RESOURCE_IPC_SIZE + + PLAT_RESOURCE_GCR_SIZE); + } return ret; } @@ -823,8 +872,11 @@ static int ipc_plat_remove(struct platform_device *pdev) iounmap(ipcdev.ipc_base); res = platform_get_resource(pdev, IORESOURCE_MEM, PLAT_RESOURCE_IPC_INDEX); - if (res) - release_mem_region(res->start, PLAT_RESOURCE_IPC_SIZE); + if (res) { + release_mem_region(res->start, + PLAT_RESOURCE_IPC_SIZE + + PLAT_RESOURCE_GCR_SIZE); + } ipcdev.dev = NULL; return 0; } -- cgit v1.2.3 From 7d134b2ce639448199052fd573a324f7e7cd5ed8 Mon Sep 17 00:00:00 2001 From: "Luis R. Rodriguez" Date: Mon, 27 Feb 2017 14:26:56 -0800 Subject: kprobes: move kprobe declarations to asm-generic/kprobes.h Often all is needed is these small helpers, instead of compiler.h or a full kprobes.h. This is important for asm helpers, in fact even some asm/kprobes.h make use of these helpers... instead just keep a generic asm file with helpers useful for asm code with the least amount of clutter as possible. Likewise we need now to also address what to do about this file for both when architectures have CONFIG_HAVE_KPROBES, and when they do not. Then for when architectures have CONFIG_HAVE_KPROBES but have disabled CONFIG_KPROBES. Right now most asm/kprobes.h do not have guards against CONFIG_KPROBES, this means most architecture code cannot include asm/kprobes.h safely. Correct this and add guards for architectures missing them. Additionally provide architectures that not have kprobes support with the default asm-generic solution. This lets us force asm/kprobes.h on the header include/linux/kprobes.h always, but most importantly we can now safely include just asm/kprobes.h on architecture code without bringing the full kitchen sink of header files. Two architectures already provided a guard against CONFIG_KPROBES on its kprobes.h: sh, arch. The rest of the architectures needed gaurds added. We avoid including any not-needed headers on asm/kprobes.h unless kprobes have been enabled. In a subsequent atomic change we can try now to remove compiler.h from include/linux/kprobes.h. During this sweep I've also identified a few architectures defining a common macro needed for both kprobes and ftrace, that of the definition of the breakput instruction up. Some refer to this as BREAKPOINT_INSTRUCTION. This must be kept outside of the #ifdef CONFIG_KPROBES guard. [mcgrof@kernel.org: fix arm64 build] Link: http://lkml.kernel.org/r/CAB=NE6X1WMByuARS4mZ1g9+W=LuVBnMDnh_5zyN0CLADaVh=Jw@mail.gmail.com [sfr@canb.auug.org.au: fixup for kprobes declarations moving] Link: http://lkml.kernel.org/r/20170214165933.13ebd4f4@canb.auug.org.au Link: http://lkml.kernel.org/r/20170203233139.32682-1-mcgrof@kernel.org Signed-off-by: Luis R. Rodriguez Signed-off-by: Stephen Rothwell Acked-by: Masami Hiramatsu Cc: Arnd Bergmann Cc: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S. Miller Cc: Ingo Molnar Cc: Thomas Gleixner Cc: H. Peter Anvin Cc: Andy Lutomirski Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- MAINTAINERS | 1 + arch/alpha/include/asm/Kbuild | 1 + arch/arc/include/asm/kprobes.h | 6 ++++-- arch/arm/include/asm/kprobes.h | 4 ++++ arch/arm/probes/decode.h | 1 + arch/arm64/include/asm/kprobes.h | 4 ++++ arch/arm64/kernel/armv8_deprecated.c | 1 + arch/arm64/kernel/insn.c | 1 + arch/arm64/kernel/probes/decode-insn.h | 2 ++ arch/avr32/include/asm/kprobes.h | 7 ++++++- arch/blackfin/include/asm/Kbuild | 1 + arch/c6x/include/asm/Kbuild | 1 + arch/cris/include/asm/Kbuild | 1 + arch/frv/include/asm/Kbuild | 1 + arch/h8300/include/asm/Kbuild | 1 + arch/hexagon/include/asm/Kbuild | 1 + arch/ia64/include/asm/kprobes.h | 12 +++++++++--- arch/m32r/include/asm/Kbuild | 1 + arch/m68k/include/asm/Kbuild | 1 + arch/metag/include/asm/Kbuild | 1 + arch/microblaze/include/asm/Kbuild | 1 + arch/mips/include/asm/kprobes.h | 6 +++++- arch/mn10300/include/asm/kprobes.h | 7 ++++++- arch/nios2/include/asm/Kbuild | 1 + arch/openrisc/include/asm/Kbuild | 1 + arch/parisc/include/asm/Kbuild | 1 + arch/powerpc/include/asm/kprobes.h | 3 +++ arch/powerpc/lib/code-patching.c | 1 + arch/s390/include/asm/kprobes.h | 7 ++++++- arch/score/include/asm/Kbuild | 1 + arch/sh/include/asm/kprobes.h | 5 ++++- arch/sparc/include/asm/kprobes.h | 10 ++++++++-- arch/tile/include/asm/kprobes.h | 6 +++++- arch/um/include/asm/Kbuild | 1 + arch/unicore32/include/asm/Kbuild | 1 + arch/x86/include/asm/kprobes.h | 9 ++++++++- arch/xtensa/include/asm/Kbuild | 1 + include/asm-generic/kprobes.h | 25 +++++++++++++++++++++++++ include/linux/compiler.h | 8 -------- include/linux/kprobes.h | 19 +++---------------- 40 files changed, 125 insertions(+), 38 deletions(-) create mode 100644 include/asm-generic/kprobes.h (limited to 'arch/x86') diff --git a/MAINTAINERS b/MAINTAINERS index 6cd8945b9094..846f97aa3508 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -7286,6 +7286,7 @@ M: Masami Hiramatsu S: Maintained F: Documentation/kprobes.txt F: include/linux/kprobes.h +F: include/asm-generic/kprobes.h F: kernel/kprobes.c KS0108 LCD CONTROLLER DRIVER diff --git a/arch/alpha/include/asm/Kbuild b/arch/alpha/include/asm/Kbuild index 46e47c088622..d103db5af5ff 100644 --- a/arch/alpha/include/asm/Kbuild +++ b/arch/alpha/include/asm/Kbuild @@ -10,3 +10,4 @@ generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h generic-y += current.h +generic-y += kprobes.h diff --git a/arch/arc/include/asm/kprobes.h b/arch/arc/include/asm/kprobes.h index 944dbedb38b5..00bdbe167615 100644 --- a/arch/arc/include/asm/kprobes.h +++ b/arch/arc/include/asm/kprobes.h @@ -9,6 +9,8 @@ #ifndef _ARC_KPROBES_H #define _ARC_KPROBES_H +#include + #ifdef CONFIG_KPROBES typedef u16 kprobe_opcode_t; @@ -55,6 +57,6 @@ void trap_is_kprobe(unsigned long address, struct pt_regs *regs); static void trap_is_kprobe(unsigned long address, struct pt_regs *regs) { } -#endif +#endif /* CONFIG_KPROBES */ -#endif +#endif /* _ARC_KPROBES_H */ diff --git a/arch/arm/include/asm/kprobes.h b/arch/arm/include/asm/kprobes.h index 3ea9be559726..59655459da59 100644 --- a/arch/arm/include/asm/kprobes.h +++ b/arch/arm/include/asm/kprobes.h @@ -16,6 +16,9 @@ #ifndef _ARM_KPROBES_H #define _ARM_KPROBES_H +#include + +#ifdef CONFIG_KPROBES #include #include #include @@ -83,4 +86,5 @@ struct arch_optimized_insn { */ }; +#endif /* CONFIG_KPROBES */ #endif /* _ARM_KPROBES_H */ diff --git a/arch/arm/probes/decode.h b/arch/arm/probes/decode.h index f9b08ba7fe73..548d622a3159 100644 --- a/arch/arm/probes/decode.h +++ b/arch/arm/probes/decode.h @@ -22,6 +22,7 @@ #include #include #include +#include void __init arm_probes_decode_init(void); diff --git a/arch/arm64/include/asm/kprobes.h b/arch/arm64/include/asm/kprobes.h index 1737aecfcc5e..6deb8d726041 100644 --- a/arch/arm64/include/asm/kprobes.h +++ b/arch/arm64/include/asm/kprobes.h @@ -16,6 +16,9 @@ #ifndef _ARM_KPROBES_H #define _ARM_KPROBES_H +#include + +#ifdef CONFIG_KPROBES #include #include #include @@ -57,4 +60,5 @@ int kprobe_single_step_handler(struct pt_regs *regs, unsigned int esr); void kretprobe_trampoline(void); void __kprobes *trampoline_probe_handler(struct pt_regs *regs); +#endif /* CONFIG_KPROBES */ #endif /* _ARM_KPROBES_H */ diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index 86032a012388..657977e77ec8 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kernel/insn.c b/arch/arm64/kernel/insn.c index b6badff5a151..3a63954a8b14 100644 --- a/arch/arm64/kernel/insn.c +++ b/arch/arm64/kernel/insn.c @@ -31,6 +31,7 @@ #include #include #include +#include #define AARCH64_INSN_SF_BIT BIT(31) #define AARCH64_INSN_N_BIT BIT(22) diff --git a/arch/arm64/kernel/probes/decode-insn.h b/arch/arm64/kernel/probes/decode-insn.h index 76d3f315407f..192ab007bacb 100644 --- a/arch/arm64/kernel/probes/decode-insn.h +++ b/arch/arm64/kernel/probes/decode-insn.h @@ -16,6 +16,8 @@ #ifndef _ARM_KERNEL_KPROBES_ARM64_H #define _ARM_KERNEL_KPROBES_ARM64_H +#include + /* * ARM strongly recommends a limit of 128 bytes between LoadExcl and * StoreExcl instructions in a single thread of execution. So keep the diff --git a/arch/avr32/include/asm/kprobes.h b/arch/avr32/include/asm/kprobes.h index 45f563ed73fd..28dfc61ad384 100644 --- a/arch/avr32/include/asm/kprobes.h +++ b/arch/avr32/include/asm/kprobes.h @@ -11,10 +11,14 @@ #ifndef __ASM_AVR32_KPROBES_H #define __ASM_AVR32_KPROBES_H +#include + +#define BREAKPOINT_INSTRUCTION 0xd673 /* breakpoint */ + +#ifdef CONFIG_KPROBES #include typedef u16 kprobe_opcode_t; -#define BREAKPOINT_INSTRUCTION 0xd673 /* breakpoint */ #define MAX_INSN_SIZE 2 #define MAX_STACK_SIZE 64 /* 32 would probably be OK */ @@ -46,4 +50,5 @@ extern int kprobe_exceptions_notify(struct notifier_block *self, #define flush_insn_slot(p) do { } while (0) +#endif /* CONFIG_KPROBES */ #endif /* __ASM_AVR32_KPROBES_H */ diff --git a/arch/blackfin/include/asm/Kbuild b/arch/blackfin/include/asm/Kbuild index d6fa60b158be..625db8ac815e 100644 --- a/arch/blackfin/include/asm/Kbuild +++ b/arch/blackfin/include/asm/Kbuild @@ -46,3 +46,4 @@ generic-y += unaligned.h generic-y += user.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/c6x/include/asm/Kbuild b/arch/c6x/include/asm/Kbuild index 4e9f57433f3a..82619c32d25b 100644 --- a/arch/c6x/include/asm/Kbuild +++ b/arch/c6x/include/asm/Kbuild @@ -61,3 +61,4 @@ generic-y += user.h generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/cris/include/asm/Kbuild b/arch/cris/include/asm/Kbuild index 8e4ef321001f..0f5132b08896 100644 --- a/arch/cris/include/asm/Kbuild +++ b/arch/cris/include/asm/Kbuild @@ -45,3 +45,4 @@ generic-y += types.h generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/frv/include/asm/Kbuild b/arch/frv/include/asm/Kbuild index 0f5b0d5d313c..c33b46715f65 100644 --- a/arch/frv/include/asm/Kbuild +++ b/arch/frv/include/asm/Kbuild @@ -7,3 +7,4 @@ generic-y += mm-arch-hooks.h generic-y += preempt.h generic-y += trace_clock.h generic-y += word-at-a-time.h +generic-y += kprobes.h diff --git a/arch/h8300/include/asm/Kbuild b/arch/h8300/include/asm/Kbuild index 5efd0c87f3c0..341740c3581c 100644 --- a/arch/h8300/include/asm/Kbuild +++ b/arch/h8300/include/asm/Kbuild @@ -74,3 +74,4 @@ generic-y += unaligned.h generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/hexagon/include/asm/Kbuild b/arch/hexagon/include/asm/Kbuild index a43a7c90e4af..797b64a4b80b 100644 --- a/arch/hexagon/include/asm/Kbuild +++ b/arch/hexagon/include/asm/Kbuild @@ -59,3 +59,4 @@ generic-y += unaligned.h generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/ia64/include/asm/kprobes.h b/arch/ia64/include/asm/kprobes.h index d5505d6f2382..0302b3664789 100644 --- a/arch/ia64/include/asm/kprobes.h +++ b/arch/ia64/include/asm/kprobes.h @@ -23,14 +23,19 @@ * 2005-Apr Rusty Lynch and Anil S Keshavamurthy * adapted from i386 */ +#include +#include + +#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6) + +#ifdef CONFIG_KPROBES + #include #include #include -#include #define __ARCH_WANT_KPROBES_INSN_SLOT #define MAX_INSN_SIZE 2 /* last half is for kprobe-booster */ -#define BREAK_INST (long)(__IA64_BREAK_KPROBE << 6) #define NOP_M_INST (long)(1<<27) #define BRL_INST(i1, i2) ((long)((0xcL << 37) | /* brl */ \ (0x1L << 12) | /* many */ \ @@ -124,4 +129,5 @@ extern void invalidate_stacked_regs(void); extern void flush_register_stack(void); extern void arch_remove_kprobe(struct kprobe *p); -#endif /* _ASM_KPROBES_H */ +#endif /* CONFIG_KPROBES */ +#endif /* _ASM_KPROBES_H */ diff --git a/arch/m32r/include/asm/Kbuild b/arch/m32r/include/asm/Kbuild index 8c24c5e1db66..deb298777df2 100644 --- a/arch/m32r/include/asm/Kbuild +++ b/arch/m32r/include/asm/Kbuild @@ -11,3 +11,4 @@ generic-y += preempt.h generic-y += sections.h generic-y += trace_clock.h generic-y += word-at-a-time.h +generic-y += kprobes.h diff --git a/arch/m68k/include/asm/Kbuild b/arch/m68k/include/asm/Kbuild index 6c76d6c24b3d..d4f9ccbfa85c 100644 --- a/arch/m68k/include/asm/Kbuild +++ b/arch/m68k/include/asm/Kbuild @@ -33,3 +33,4 @@ generic-y += trace_clock.h generic-y += types.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/metag/include/asm/Kbuild b/arch/metag/include/asm/Kbuild index d3731f0db73b..f9b9df5d6de9 100644 --- a/arch/metag/include/asm/Kbuild +++ b/arch/metag/include/asm/Kbuild @@ -54,3 +54,4 @@ generic-y += user.h generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/microblaze/include/asm/Kbuild b/arch/microblaze/include/asm/Kbuild index 6275eb051801..1732ec13b211 100644 --- a/arch/microblaze/include/asm/Kbuild +++ b/arch/microblaze/include/asm/Kbuild @@ -10,3 +10,4 @@ generic-y += preempt.h generic-y += syscalls.h generic-y += trace_clock.h generic-y += word-at-a-time.h +generic-y += kprobes.h diff --git a/arch/mips/include/asm/kprobes.h b/arch/mips/include/asm/kprobes.h index daba1f9a4f79..291846d9ba83 100644 --- a/arch/mips/include/asm/kprobes.h +++ b/arch/mips/include/asm/kprobes.h @@ -22,6 +22,9 @@ #ifndef _ASM_KPROBES_H #define _ASM_KPROBES_H +#include + +#ifdef CONFIG_KPROBES #include #include @@ -94,4 +97,5 @@ struct kprobe_ctlblk { extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); -#endif /* _ASM_KPROBES_H */ +#endif /* CONFIG_KPROBES */ +#endif /* _ASM_KPROBES_H */ diff --git a/arch/mn10300/include/asm/kprobes.h b/arch/mn10300/include/asm/kprobes.h index c800b590183a..7abea0bdb549 100644 --- a/arch/mn10300/include/asm/kprobes.h +++ b/arch/mn10300/include/asm/kprobes.h @@ -21,13 +21,17 @@ #ifndef _ASM_KPROBES_H #define _ASM_KPROBES_H +#include + +#define BREAKPOINT_INSTRUCTION 0xff + +#ifdef CONFIG_KPROBES #include #include struct kprobe; typedef unsigned char kprobe_opcode_t; -#define BREAKPOINT_INSTRUCTION 0xff #define MAX_INSN_SIZE 8 #define MAX_STACK_SIZE 128 @@ -47,4 +51,5 @@ extern int kprobe_exceptions_notify(struct notifier_block *self, extern void arch_remove_kprobe(struct kprobe *p); +#endif /* CONFIG_KPROBES */ #endif /* _ASM_KPROBES_H */ diff --git a/arch/nios2/include/asm/Kbuild b/arch/nios2/include/asm/Kbuild index 35b0e883761a..aaa3c218b56c 100644 --- a/arch/nios2/include/asm/Kbuild +++ b/arch/nios2/include/asm/Kbuild @@ -62,3 +62,4 @@ generic-y += user.h generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/openrisc/include/asm/Kbuild b/arch/openrisc/include/asm/Kbuild index fb241757f7f0..fb01873a5aad 100644 --- a/arch/openrisc/include/asm/Kbuild +++ b/arch/openrisc/include/asm/Kbuild @@ -67,3 +67,4 @@ generic-y += user.h generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/parisc/include/asm/Kbuild b/arch/parisc/include/asm/Kbuild index cc70b4116718..a9909c2d04c5 100644 --- a/arch/parisc/include/asm/Kbuild +++ b/arch/parisc/include/asm/Kbuild @@ -28,3 +28,4 @@ generic-y += user.h generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/powerpc/include/asm/kprobes.h b/arch/powerpc/include/asm/kprobes.h index d821835ade86..0503c98b2117 100644 --- a/arch/powerpc/include/asm/kprobes.h +++ b/arch/powerpc/include/asm/kprobes.h @@ -1,5 +1,8 @@ #ifndef _ASM_POWERPC_KPROBES_H #define _ASM_POWERPC_KPROBES_H + +#include + #ifdef __KERNEL__ /* * Kernel Probes (KProbes) diff --git a/arch/powerpc/lib/code-patching.c b/arch/powerpc/lib/code-patching.c index 0899315e1434..0d3002b7e2b4 100644 --- a/arch/powerpc/lib/code-patching.c +++ b/arch/powerpc/lib/code-patching.c @@ -14,6 +14,7 @@ #include #include #include +#include int patch_instruction(unsigned int *addr, unsigned int instr) diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 591e5a5279b0..84c0f9086483 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -27,6 +27,11 @@ * 2005-Dec Used as a template for s390 by Mike Grundy * */ +#include + +#define BREAKPOINT_INSTRUCTION 0x0002 + +#ifdef CONFIG_KPROBES #include #include #include @@ -37,7 +42,6 @@ struct pt_regs; struct kprobe; typedef u16 kprobe_opcode_t; -#define BREAKPOINT_INSTRUCTION 0x0002 /* Maximum instruction size is 3 (16bit) halfwords: */ #define MAX_INSN_SIZE 0x0003 @@ -91,4 +95,5 @@ int probe_is_insn_relative_long(u16 *insn); #define flush_insn_slot(p) do { } while (0) +#endif /* CONFIG_KPROBES */ #endif /* _ASM_S390_KPROBES_H */ diff --git a/arch/score/include/asm/Kbuild b/arch/score/include/asm/Kbuild index db3e28ca3ae2..926943a49ea5 100644 --- a/arch/score/include/asm/Kbuild +++ b/arch/score/include/asm/Kbuild @@ -13,3 +13,4 @@ generic-y += trace_clock.h generic-y += xor.h generic-y += serial.h generic-y += word-at-a-time.h +generic-y += kprobes.h diff --git a/arch/sh/include/asm/kprobes.h b/arch/sh/include/asm/kprobes.h index 134f3980e44a..f0986f9b3844 100644 --- a/arch/sh/include/asm/kprobes.h +++ b/arch/sh/include/asm/kprobes.h @@ -1,13 +1,16 @@ #ifndef __ASM_SH_KPROBES_H #define __ASM_SH_KPROBES_H +#include + +#define BREAKPOINT_INSTRUCTION 0xc33a + #ifdef CONFIG_KPROBES #include #include typedef insn_size_t kprobe_opcode_t; -#define BREAKPOINT_INSTRUCTION 0xc33a #define MAX_INSN_SIZE 16 #define MAX_STACK_SIZE 64 diff --git a/arch/sparc/include/asm/kprobes.h b/arch/sparc/include/asm/kprobes.h index a145d798e112..49f8402035d7 100644 --- a/arch/sparc/include/asm/kprobes.h +++ b/arch/sparc/include/asm/kprobes.h @@ -1,13 +1,17 @@ #ifndef _SPARC64_KPROBES_H #define _SPARC64_KPROBES_H +#include + +#define BREAKPOINT_INSTRUCTION 0x91d02070 /* ta 0x70 */ +#define BREAKPOINT_INSTRUCTION_2 0x91d02071 /* ta 0x71 */ + +#ifdef CONFIG_KPROBES #include #include typedef u32 kprobe_opcode_t; -#define BREAKPOINT_INSTRUCTION 0x91d02070 /* ta 0x70 */ -#define BREAKPOINT_INSTRUCTION_2 0x91d02071 /* ta 0x71 */ #define MAX_INSN_SIZE 2 #define kretprobe_blacklist_size 0 @@ -48,4 +52,6 @@ int kprobe_exceptions_notify(struct notifier_block *self, int kprobe_fault_handler(struct pt_regs *regs, int trapnr); asmlinkage void __kprobes kprobe_trap(unsigned long trap_level, struct pt_regs *regs); + +#endif /* CONFIG_KPROBES */ #endif /* _SPARC64_KPROBES_H */ diff --git a/arch/tile/include/asm/kprobes.h b/arch/tile/include/asm/kprobes.h index d8f9a83943b1..4a8b1cadca24 100644 --- a/arch/tile/include/asm/kprobes.h +++ b/arch/tile/include/asm/kprobes.h @@ -17,10 +17,13 @@ #ifndef _ASM_TILE_KPROBES_H #define _ASM_TILE_KPROBES_H +#include + +#ifdef CONFIG_KPROBES + #include #include #include - #include #define __ARCH_WANT_KPROBES_INSN_SLOT @@ -76,4 +79,5 @@ void arch_remove_kprobe(struct kprobe *); extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); +#endif /* CONFIG_KPROBES */ #endif /* _ASM_TILE_KPROBES_H */ diff --git a/arch/um/include/asm/Kbuild b/arch/um/include/asm/Kbuild index 90c281cd7e1d..e9d42aab76dc 100644 --- a/arch/um/include/asm/Kbuild +++ b/arch/um/include/asm/Kbuild @@ -25,3 +25,4 @@ generic-y += topology.h generic-y += trace_clock.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/unicore32/include/asm/Kbuild b/arch/unicore32/include/asm/Kbuild index 5d51ade89f4c..84205fe1cd79 100644 --- a/arch/unicore32/include/asm/Kbuild +++ b/arch/unicore32/include/asm/Kbuild @@ -63,3 +63,4 @@ generic-y += user.h generic-y += vga.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index d1d1e5094c28..200581691c6e 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -21,6 +21,12 @@ * * See arch/x86/kernel/kprobes.c for x86 kprobes history. */ + +#include + +#define BREAKPOINT_INSTRUCTION 0xcc + +#ifdef CONFIG_KPROBES #include #include #include @@ -32,7 +38,6 @@ struct pt_regs; struct kprobe; typedef u8 kprobe_opcode_t; -#define BREAKPOINT_INSTRUCTION 0xcc #define RELATIVEJUMP_OPCODE 0xe9 #define RELATIVEJUMP_SIZE 5 #define RELATIVECALL_OPCODE 0xe8 @@ -116,4 +121,6 @@ extern int kprobe_exceptions_notify(struct notifier_block *self, unsigned long val, void *data); extern int kprobe_int3_handler(struct pt_regs *regs); extern int kprobe_debug_handler(struct pt_regs *regs); + +#endif /* CONFIG_KPROBES */ #endif /* _ASM_X86_KPROBES_H */ diff --git a/arch/xtensa/include/asm/Kbuild b/arch/xtensa/include/asm/Kbuild index 9e9760b20be5..f41408c53fe1 100644 --- a/arch/xtensa/include/asm/Kbuild +++ b/arch/xtensa/include/asm/Kbuild @@ -31,3 +31,4 @@ generic-y += topology.h generic-y += trace_clock.h generic-y += word-at-a-time.h generic-y += xor.h +generic-y += kprobes.h diff --git a/include/asm-generic/kprobes.h b/include/asm-generic/kprobes.h new file mode 100644 index 000000000000..57af9f21d148 --- /dev/null +++ b/include/asm-generic/kprobes.h @@ -0,0 +1,25 @@ +#ifndef _ASM_GENERIC_KPROBES_H +#define _ASM_GENERIC_KPROBES_H + +#if defined(__KERNEL__) && !defined(__ASSEMBLY__) +#ifdef CONFIG_KPROBES +/* + * Blacklist ganerating macro. Specify functions which is not probed + * by using this macro. + */ +# define __NOKPROBE_SYMBOL(fname) \ +static unsigned long __used \ + __attribute__((__section__("_kprobe_blacklist"))) \ + _kbl_addr_##fname = (unsigned long)fname; +# define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) +/* Use this to forbid a kprobes attach on very low level functions */ +# define __kprobes __attribute__((__section__(".kprobes.text"))) +# define nokprobe_inline __always_inline +#else +# define NOKPROBE_SYMBOL(fname) +# define __kprobes +# define nokprobe_inline inline +#endif +#endif /* defined(__KERNEL__) && !defined(__ASSEMBLY__) */ + +#endif /* _ASM_GENERIC_KPROBES_H */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 91c30cba984e..b2eb9c0a68c4 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -570,12 +570,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s (_________p1); \ }) -/* Ignore/forbid kprobes attach on very low level functions marked by this attribute: */ -#ifdef CONFIG_KPROBES -# define __kprobes __attribute__((__section__(".kprobes.text"))) -# define nokprobe_inline __always_inline -#else -# define __kprobes -# define nokprobe_inline inline -#endif #endif /* __LINUX_COMPILER_H */ diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 16ddfb8b304a..c328e4f7dcad 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -29,7 +29,7 @@ * and Prasanna S Panchamukhi * added function-return probes. */ -#include /* for __kprobes */ +#include #include #include #include @@ -40,9 +40,9 @@ #include #include #include +#include #ifdef CONFIG_KPROBES -#include /* kprobe_status settings */ #define KPROBE_HIT_ACTIVE 0x00000001 @@ -51,6 +51,7 @@ #define KPROBE_HIT_SSDONE 0x00000008 #else /* CONFIG_KPROBES */ +#include typedef int kprobe_opcode_t; struct arch_specific_insn { int dummy; @@ -509,18 +510,4 @@ static inline bool is_kprobe_optinsn_slot(unsigned long addr) } #endif -#ifdef CONFIG_KPROBES -/* - * Blacklist ganerating macro. Specify functions which is not probed - * by using this macro. - */ -#define __NOKPROBE_SYMBOL(fname) \ -static unsigned long __used \ - __attribute__((section("_kprobe_blacklist"))) \ - _kbl_addr_##fname = (unsigned long)fname; -#define NOKPROBE_SYMBOL(fname) __NOKPROBE_SYMBOL(fname) -#else -#define NOKPROBE_SYMBOL(fname) -#endif - #endif /* _LINUX_KPROBES_H */ -- cgit v1.2.3 From 9332ef9dbd172d4ab0a0141df7cb21c696a5ce96 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Feb 2017 14:28:47 -0800 Subject: scripts/spelling.txt: add "an user" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: an user||a user an userspace||a userspace I also added "userspace" to the list since it is a common word in Linux. I found some instances for "an userfaultfd", but I did not add it to the list. I felt it is endless to find words that start with "user" such as "userland" etc., so must draw a line somewhere. Link: http://lkml.kernel.org/r/1481573103-11329-4-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/admin-guide/ras.rst | 2 +- Documentation/devicetree/bindings/opp/opp.txt | 2 +- Documentation/filesystems/quota.txt | 2 +- Documentation/kselftest.txt | 4 ++-- Documentation/media/dvb-drivers/ci.rst | 2 +- Documentation/networking/cdc_mbim.txt | 4 ++-- Documentation/vm/userfaultfd.txt | 2 +- arch/Kconfig | 2 +- arch/powerpc/xmon/ppc-opc.c | 2 +- arch/x86/kvm/mmu.c | 2 +- drivers/media/dvb-core/dvb_ringbuffer.h | 4 ++-- drivers/scsi/lpfc/lpfc_attr.c | 2 +- fs/userfaultfd.c | 6 +++--- include/net/mac80211.h | 2 +- kernel/irq/manage.c | 2 +- net/bluetooth/hci_sock.c | 6 +++--- net/netfilter/nfnetlink_cthelper.c | 2 +- scripts/spelling.txt | 2 ++ tools/perf/Documentation/tips.txt | 2 +- 19 files changed, 27 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/admin-guide/ras.rst b/Documentation/admin-guide/ras.rst index 9939348bd4a3..1b90c6f00a92 100644 --- a/Documentation/admin-guide/ras.rst +++ b/Documentation/admin-guide/ras.rst @@ -81,7 +81,7 @@ That defines some categories of errors: still run, eventually replacing the affected hardware by a hot spare, if available. - Also, when an error happens on an userspace process, it is also possible to + Also, when an error happens on a userspace process, it is also possible to kill such process and let userspace restart it. The mechanism for handling non-fatal errors is usually complex and may diff --git a/Documentation/devicetree/bindings/opp/opp.txt b/Documentation/devicetree/bindings/opp/opp.txt index 9f5ca4457b5f..ecdcfb790704 100644 --- a/Documentation/devicetree/bindings/opp/opp.txt +++ b/Documentation/devicetree/bindings/opp/opp.txt @@ -136,7 +136,7 @@ Optional properties: larger OPP table, based on what version of the hardware we are running on. We still can't have multiple nodes with the same opp-hz value in OPP table. - It's an user defined array containing a hierarchy of hardware version numbers, + It's a user defined array containing a hierarchy of hardware version numbers, supported by the OPP. For example: a platform with hierarchy of three levels of versions (A, B and C), this field should be like , where X corresponds to Version hierarchy A, Y corresponds to version hierarchy B and Z diff --git a/Documentation/filesystems/quota.txt b/Documentation/filesystems/quota.txt index 29fc01552646..32874b06ebe9 100644 --- a/Documentation/filesystems/quota.txt +++ b/Documentation/filesystems/quota.txt @@ -6,7 +6,7 @@ Quota subsystem allows system administrator to set limits on used space and number of used inodes (inode is a filesystem structure which is associated with each file or directory) for users and/or groups. For both used space and number of used inodes there are actually two limits. The first one is called softlimit -and the second one hardlimit. An user can never exceed a hardlimit for any +and the second one hardlimit. A user can never exceed a hardlimit for any resource (unless he has CAP_SYS_RESOURCE capability). User is allowed to exceed softlimit but only for limited period of time. This period is called "grace period" or "grace time". When grace time is over, user is not able to allocate diff --git a/Documentation/kselftest.txt b/Documentation/kselftest.txt index d431dc82c228..5bd590335839 100644 --- a/Documentation/kselftest.txt +++ b/Documentation/kselftest.txt @@ -59,14 +59,14 @@ Install selftests ================= You can use kselftest_install.sh tool installs selftests in default -location which is tools/testing/selftests/kselftest or an user specified +location which is tools/testing/selftests/kselftest or a user specified location. To install selftests in default location: $ cd tools/testing/selftests $ ./kselftest_install.sh -To install selftests in an user specified location: +To install selftests in a user specified location: $ cd tools/testing/selftests $ ./kselftest_install.sh install_dir diff --git a/Documentation/media/dvb-drivers/ci.rst b/Documentation/media/dvb-drivers/ci.rst index 8124bf5ce5ef..69b07e9d1816 100644 --- a/Documentation/media/dvb-drivers/ci.rst +++ b/Documentation/media/dvb-drivers/ci.rst @@ -20,7 +20,7 @@ existing low level CI API. ca_zap ~~~~~~ -An userspace application, like ``ca_zap`` is required to handle encrypted +A userspace application, like ``ca_zap`` is required to handle encrypted MPEG-TS streams. The ``ca_zap`` userland application is in charge of sending the diff --git a/Documentation/networking/cdc_mbim.txt b/Documentation/networking/cdc_mbim.txt index a15ea602aa52..b9482ca10254 100644 --- a/Documentation/networking/cdc_mbim.txt +++ b/Documentation/networking/cdc_mbim.txt @@ -38,7 +38,7 @@ Basic usage =========== MBIM functions are inactive when unmanaged. The cdc_mbim driver only -provides an userspace interface to the MBIM control channel, and will +provides a userspace interface to the MBIM control channel, and will not participate in the management of the function. This implies that a userspace MBIM management application always is required to enable a MBIM function. @@ -200,7 +200,7 @@ structure described in section 10.5.29 of [1]. The DSS VLAN subdevices are used as a practical interface between the shared MBIM data channel and a MBIM DSS aware userspace application. It is not intended to be presented as-is to an end user. The -assumption is that an userspace application initiating a DSS session +assumption is that a userspace application initiating a DSS session also takes care of the necessary framing of the DSS data, presenting the stream to the end user in an appropriate way for the stream type. diff --git a/Documentation/vm/userfaultfd.txt b/Documentation/vm/userfaultfd.txt index fe51a5aa8963..0e5543a920e5 100644 --- a/Documentation/vm/userfaultfd.txt +++ b/Documentation/vm/userfaultfd.txt @@ -149,7 +149,7 @@ migration thread in the QEMU running in the destination node will receive the page that triggered the userfault and it'll map it as usual with the UFFDIO_COPY|ZEROPAGE (without actually knowing if it was spontaneously sent by the source or if it was an urgent page -requested through an userfault). +requested through a userfault). By the time the userfaults start, the QEMU in the destination node doesn't need to keep any per-page state bitmap relative to the live diff --git a/arch/Kconfig b/arch/Kconfig index d0012add6b19..cd211a14a88f 100644 --- a/arch/Kconfig +++ b/arch/Kconfig @@ -29,7 +29,7 @@ config OPROFILE_EVENT_MULTIPLEX The number of hardware counters is limited. The multiplexing feature enables OProfile to gather more events than counters are provided by the hardware. This is realized by switching - between events at an user specified time interval. + between events at a user specified time interval. If unsure, say N. diff --git a/arch/powerpc/xmon/ppc-opc.c b/arch/powerpc/xmon/ppc-opc.c index 6845e91ba04a..954dbf8222d7 100644 --- a/arch/powerpc/xmon/ppc-opc.c +++ b/arch/powerpc/xmon/ppc-opc.c @@ -1587,7 +1587,7 @@ extract_tbr (unsigned long insn, #define CTX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x7)) #define CTX_MASK CTX(0x3f, 0x7) -/* An User Context form instruction. */ +/* A User Context form instruction. */ #define UCTX(op, xop) (OP (op) | (((unsigned long)(xop)) & 0x1f)) #define UCTX_MASK UCTX(0x3f, 0x1f) diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 2fd7586aad4d..1cda35277278 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -4102,7 +4102,7 @@ static void update_permission_bitmask(struct kvm_vcpu *vcpu, * as a SMAP violation if all of the following * conditions are ture: * - X86_CR4_SMAP is set in CR4 - * - An user page is accessed + * - A user page is accessed * - Page fault in kernel mode * - if CPL = 3 or X86_EFLAGS_AC is clear * diff --git a/drivers/media/dvb-core/dvb_ringbuffer.h b/drivers/media/dvb-core/dvb_ringbuffer.h index bbe94873d44d..8ed6bcc3a56e 100644 --- a/drivers/media/dvb-core/dvb_ringbuffer.h +++ b/drivers/media/dvb-core/dvb_ringbuffer.h @@ -136,7 +136,7 @@ extern void dvb_ringbuffer_flush_spinlock_wakeup(struct dvb_ringbuffer *rbuf); } /** - * dvb_ringbuffer_read_user - Reads a buffer into an user pointer + * dvb_ringbuffer_read_user - Reads a buffer into a user pointer * * @rbuf: pointer to struct dvb_ringbuffer * @buf: pointer to the buffer where the data will be stored @@ -193,7 +193,7 @@ extern ssize_t dvb_ringbuffer_write(struct dvb_ringbuffer *rbuf, const u8 *buf, size_t len); /** - * dvb_ringbuffer_write_user - Writes a buffer received via an user pointer + * dvb_ringbuffer_write_user - Writes a buffer received via a user pointer * * @rbuf: pointer to struct dvb_ringbuffer * @buf: pointer to the buffer where the data will be read diff --git a/drivers/scsi/lpfc/lpfc_attr.c b/drivers/scsi/lpfc/lpfc_attr.c index 50cf402dea29..03cb05abc821 100644 --- a/drivers/scsi/lpfc/lpfc_attr.c +++ b/drivers/scsi/lpfc/lpfc_attr.c @@ -3329,7 +3329,7 @@ static DEVICE_ATTR(lpfc_static_vport, S_IRUGO, * @buf: Data buffer. * @count: Size of the data buffer. * - * This function get called when an user write to the lpfc_stat_data_ctrl + * This function get called when a user write to the lpfc_stat_data_ctrl * sysfs file. This function parse the command written to the sysfs file * and take appropriate action. These commands are used for controlling * driver statistical data collection. diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 625b7285a37b..e6e0a619cb3a 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1807,17 +1807,17 @@ static void init_once_userfaultfd_ctx(void *mem) } /** - * userfaultfd_file_create - Creates an userfaultfd file pointer. + * userfaultfd_file_create - Creates a userfaultfd file pointer. * @flags: Flags for the userfaultfd file. * - * This function creates an userfaultfd file pointer, w/out installing + * This function creates a userfaultfd file pointer, w/out installing * it into the fd table. This is useful when the userfaultfd file is * used during the initialization of data structures that require * extra setup after the userfaultfd creation. So the userfaultfd * creation is split into the file pointer creation phase, and the * file descriptor installation phase. In this way races with * userspace closing the newly installed file descriptor can be - * avoided. Returns an userfaultfd file pointer, or a proper error + * avoided. Returns a userfaultfd file pointer, or a proper error * pointer. */ static struct file *userfaultfd_file_create(int flags) diff --git a/include/net/mac80211.h b/include/net/mac80211.h index b9a08cd1d97d..a3bab3c5ecfb 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -3392,7 +3392,7 @@ enum ieee80211_reconfig_type { * since there won't be any time to beacon before the switch anyway. * @pre_channel_switch: This is an optional callback that is called * before a channel switch procedure is started (ie. when a STA - * gets a CSA or an userspace initiated channel-switch), allowing + * gets a CSA or a userspace initiated channel-switch), allowing * the driver to prepare for the channel switch. * @post_channel_switch: This is an optional callback that is called * after a channel switch procedure is completed, allowing the diff --git a/kernel/irq/manage.c b/kernel/irq/manage.c index 6b669593e7eb..944d068b6c48 100644 --- a/kernel/irq/manage.c +++ b/kernel/irq/manage.c @@ -353,7 +353,7 @@ static int setup_affinity(struct irq_desc *desc, struct cpumask *mask) return 0; /* - * Preserve the managed affinity setting and an userspace affinity + * Preserve the managed affinity setting and a userspace affinity * setup, but make sure that one of the targets is online. */ if (irqd_affinity_is_managed(&desc->irq_data) || diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c index 48f9471e7c85..f64d6566021f 100644 --- a/net/bluetooth/hci_sock.c +++ b/net/bluetooth/hci_sock.c @@ -851,7 +851,7 @@ static int hci_sock_release(struct socket *sock) if (hdev) { if (hci_pi(sk)->channel == HCI_CHANNEL_USER) { - /* When releasing an user channel exclusive access, + /* When releasing a user channel exclusive access, * call hci_dev_do_close directly instead of calling * hci_dev_close to ensure the exclusive access will * be released and the controller brought back down. @@ -1172,7 +1172,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, /* In case the transport is already up and * running, clear the error here. * - * This can happen when opening an user + * This can happen when opening a user * channel and HCI_AUTO_OFF grace period * is still active. */ @@ -1190,7 +1190,7 @@ static int hci_sock_bind(struct socket *sock, struct sockaddr *addr, if (!hci_sock_gen_cookie(sk)) { /* In the case when a cookie has already been assigned, * this socket will transition from a raw socket into - * an user channel socket. For a clean transition, send + * a user channel socket. For a clean transition, send * the close notification first. */ skb = create_monitor_ctrl_close(sk); diff --git a/net/netfilter/nfnetlink_cthelper.c b/net/netfilter/nfnetlink_cthelper.c index 3b79f34b5095..de8782345c86 100644 --- a/net/netfilter/nfnetlink_cthelper.c +++ b/net/netfilter/nfnetlink_cthelper.c @@ -48,7 +48,7 @@ nfnl_userspace_cthelper(struct sk_buff *skb, unsigned int protoff, if (helper == NULL) return NF_DROP; - /* This is an user-space helper not yet configured, skip. */ + /* This is a user-space helper not yet configured, skip. */ if ((helper->flags & (NF_CT_HELPER_F_USERSPACE | NF_CT_HELPER_F_CONFIGURED)) == NF_CT_HELPER_F_USERSPACE) diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 372840a672a4..13794532c3fa 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -86,6 +86,8 @@ alue||value ambigious||ambiguous amoung||among amout||amount +an user||a user +an userspace||a userspace analysator||analyzer ang||and anniversery||anniversary diff --git a/tools/perf/Documentation/tips.txt b/tools/perf/Documentation/tips.txt index 8a6479c0eac9..170b0289a7bc 100644 --- a/tools/perf/Documentation/tips.txt +++ b/tools/perf/Documentation/tips.txt @@ -22,7 +22,7 @@ If you have debuginfo enabled, try: perf report -s sym,srcline For memory address profiling, try: perf mem record / perf mem report For tracepoint events, try: perf report -s trace_fields To record callchains for each sample: perf record -g -To record every process run by an user: perf record -u +To record every process run by a user: perf record -u Skip collecing build-id when recording: perf record -B To change sampling frequency to 100 Hz: perf record -F 100 See assembly instructions with percentage: perf annotate -- cgit v1.2.3 From 03440c4e5e2f167764997a7e0f2dbb279d8078e6 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 27 Feb 2017 14:28:49 -0800 Subject: scripts/spelling.txt: add "an union" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: an union||a union Link: http://lkml.kernel.org/r/1481573103-11329-5-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/media/uapi/dvb/dvb-frontend-parameters.rst | 4 ++-- arch/x86/include/asm/desc_defs.h | 2 +- drivers/acpi/acpica/dbconvert.c | 2 +- drivers/acpi/acpica/nspredef.c | 2 +- drivers/acpi/acpica/nsxfeval.c | 4 ++-- drivers/staging/lustre/lustre/include/lustre/lustre_idl.h | 2 +- include/linux/dcache.h | 4 ++-- include/media/v4l2-ctrls.h | 4 ++-- include/xen/interface/grant_table.h | 2 +- scripts/spelling.txt | 1 + sound/pci/ice1712/wm8766.c | 2 +- sound/pci/ice1712/wm8776.c | 2 +- tools/perf/util/probe-finder.c | 4 ++-- tools/perf/util/sort.h | 2 +- 14 files changed, 19 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/media/uapi/dvb/dvb-frontend-parameters.rst b/Documentation/media/uapi/dvb/dvb-frontend-parameters.rst index bf31411fc9df..899fd5c3545e 100644 --- a/Documentation/media/uapi/dvb/dvb-frontend-parameters.rst +++ b/Documentation/media/uapi/dvb/dvb-frontend-parameters.rst @@ -9,7 +9,7 @@ frontend parameters The kind of parameters passed to the frontend device for tuning depend on the kind of hardware you are using. -The struct ``dvb_frontend_parameters`` uses an union with specific +The struct ``dvb_frontend_parameters`` uses a union with specific per-system parameters. However, as newer delivery systems required more data, the structure size weren't enough to fit, and just extending its size would break the existing applications. So, those parameters were @@ -23,7 +23,7 @@ So, newer applications should use instead, in order to be able to support the newer System Delivery like DVB-S2, DVB-T2, DVB-C2, ISDB, etc. -All kinds of parameters are combined as an union in the +All kinds of parameters are combined as a union in the FrontendParameters structure: diff --git a/arch/x86/include/asm/desc_defs.h b/arch/x86/include/asm/desc_defs.h index eb5deb42484d..49265345d4d2 100644 --- a/arch/x86/include/asm/desc_defs.h +++ b/arch/x86/include/asm/desc_defs.h @@ -15,7 +15,7 @@ * FIXME: Accessing the desc_struct through its fields is more elegant, * and should be the one valid thing to do. However, a lot of open code * still touches the a and b accessors, and doing this allow us to do it - * incrementally. We keep the signature as a struct, rather than an union, + * incrementally. We keep the signature as a struct, rather than a union, * so we can get rid of it transparently in the future -- glommer */ /* 8 byte segment descriptor */ diff --git a/drivers/acpi/acpica/dbconvert.c b/drivers/acpi/acpica/dbconvert.c index 251f9477a984..857dbc43a9b1 100644 --- a/drivers/acpi/acpica/dbconvert.c +++ b/drivers/acpi/acpica/dbconvert.c @@ -242,7 +242,7 @@ acpi_status acpi_db_convert_to_package(char *string, union acpi_object *object) * * RETURN: Status * - * DESCRIPTION: Convert a typed and tokenized string to an union acpi_object. Typing: + * DESCRIPTION: Convert a typed and tokenized string to a union acpi_object. Typing: * 1) String objects were surrounded by quotes. * 2) Buffer objects were surrounded by parentheses. * 3) Package objects were surrounded by brackets "[]". diff --git a/drivers/acpi/acpica/nspredef.c b/drivers/acpi/acpica/nspredef.c index 3dbbecf22087..9d14b509529e 100644 --- a/drivers/acpi/acpica/nspredef.c +++ b/drivers/acpi/acpica/nspredef.c @@ -323,7 +323,7 @@ acpi_ns_check_reference(struct acpi_evaluate_info *info, /* * Check the reference object for the correct reference type (opcode). - * The only type of reference that can be converted to an union acpi_object is + * The only type of reference that can be converted to a union acpi_object is * a reference to a named object (reference class: NAME) */ if (return_object->reference.class == ACPI_REFCLASS_NAME) { diff --git a/drivers/acpi/acpica/nsxfeval.c b/drivers/acpi/acpica/nsxfeval.c index 8e365c0e766b..c944ff5c9c3d 100644 --- a/drivers/acpi/acpica/nsxfeval.c +++ b/drivers/acpi/acpica/nsxfeval.c @@ -495,9 +495,9 @@ static void acpi_ns_resolve_references(struct acpi_evaluate_info *info) /* * Two types of references are supported - those created by Index and * ref_of operators. A name reference (AML_NAMEPATH_OP) can be converted - * to an union acpi_object, so it is not dereferenced here. A ddb_handle + * to a union acpi_object, so it is not dereferenced here. A ddb_handle * (AML_LOAD_OP) cannot be dereferenced, nor can it be converted to - * an union acpi_object. + * a union acpi_object. */ switch (info->return_object->reference.class) { case ACPI_REFCLASS_INDEX: diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h index b0eb80d70c23..60b827eeefe2 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_idl.h @@ -1704,7 +1704,7 @@ struct ost_lvb { * lquota data structures */ -/* The lquota_id structure is an union of all the possible identifier types that +/* The lquota_id structure is a union of all the possible identifier types that * can be used with quota, this includes: * - 64-bit user ID * - 64-bit group ID diff --git a/include/linux/dcache.h b/include/linux/dcache.h index c965e4469499..591b6c16f9c1 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -562,7 +562,7 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) * @inode: inode to select the dentry from multiple layers (can be NULL) * @flags: open flags to control copy-up behavior * - * If dentry is on an union/overlay, then return the underlying, real dentry. + * If dentry is on a union/overlay, then return the underlying, real dentry. * Otherwise return the dentry itself. * * See also: Documentation/filesystems/vfs.txt @@ -581,7 +581,7 @@ static inline struct dentry *d_real(struct dentry *dentry, * d_real_inode - Return the real inode * @dentry: The dentry to query * - * If dentry is on an union/overlay, then return the underlying, real inode. + * If dentry is on a union/overlay, then return the underlying, real inode. * Otherwise return d_inode(). */ static inline struct inode *d_real_inode(const struct dentry *dentry) diff --git a/include/media/v4l2-ctrls.h b/include/media/v4l2-ctrls.h index e1006b391cdc..bee1404391dd 100644 --- a/include/media/v4l2-ctrls.h +++ b/include/media/v4l2-ctrls.h @@ -174,10 +174,10 @@ typedef void (*v4l2_ctrl_notify_fnc)(struct v4l2_ctrl *ctrl, void *priv); * not freed when the control is deleted. Should this be needed * then a new internal bitfield can be added to tell the framework * to free this pointer. - * @p_cur: The control's current value represented via an union with + * @p_cur: The control's current value represented via a union with * provides a standard way of accessing control types * through a pointer. - * @p_new: The control's new value represented via an union with provides + * @p_new: The control's new value represented via a union with provides * a standard way of accessing control types * through a pointer. */ diff --git a/include/xen/interface/grant_table.h b/include/xen/interface/grant_table.h index 56806bc90c2f..7fb7112d667c 100644 --- a/include/xen/interface/grant_table.h +++ b/include/xen/interface/grant_table.h @@ -181,7 +181,7 @@ struct grant_entry_header { }; /* - * Version 2 of the grant entry structure, here is an union because three + * Version 2 of the grant entry structure, here is a union because three * different types are suppotted: full_page, sub_page and transitive. */ union grant_entry_v2 { diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 13794532c3fa..27991a91de6f 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -86,6 +86,7 @@ alue||value ambigious||ambiguous amoung||among amout||amount +an union||a union an user||a user an userspace||a userspace analysator||analyzer diff --git a/sound/pci/ice1712/wm8766.c b/sound/pci/ice1712/wm8766.c index f7ac8d5e862c..27c03e40c9b1 100644 --- a/sound/pci/ice1712/wm8766.c +++ b/sound/pci/ice1712/wm8766.c @@ -254,7 +254,7 @@ static int snd_wm8766_ctl_put(struct snd_kcontrol *kcontrol, int n = kcontrol->private_value; u16 val, regval1, regval2; - /* this also works for enum because value is an union */ + /* this also works for enum because value is a union */ regval1 = ucontrol->value.integer.value[0]; regval2 = ucontrol->value.integer.value[1]; if (wm->ctl[n].flags & WM8766_FLAG_INVERT) { diff --git a/sound/pci/ice1712/wm8776.c b/sound/pci/ice1712/wm8776.c index ebd2fe4b4a57..553669b103c2 100644 --- a/sound/pci/ice1712/wm8776.c +++ b/sound/pci/ice1712/wm8776.c @@ -528,7 +528,7 @@ static int snd_wm8776_ctl_put(struct snd_kcontrol *kcontrol, int n = kcontrol->private_value; u16 val, regval1, regval2; - /* this also works for enum because value is an union */ + /* this also works for enum because value is a union */ regval1 = ucontrol->value.integer.value[0]; regval2 = ucontrol->value.integer.value[1]; if (wm->ctl[n].flags & WM8776_FLAG_INVERT) { diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 0d9d6e0803b8..57cd268d4275 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -464,7 +464,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, /* Verify it is a data structure */ tag = dwarf_tag(&type); if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) { - pr_warning("%s is not a data structure nor an union.\n", + pr_warning("%s is not a data structure nor a union.\n", varname); return -EINVAL; } @@ -479,7 +479,7 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, } else { /* Verify it is a data structure */ if (tag != DW_TAG_structure_type && tag != DW_TAG_union_type) { - pr_warning("%s is not a data structure nor an union.\n", + pr_warning("%s is not a data structure nor a union.\n", varname); return -EINVAL; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index 7aff317fc7c4..796c847e2f00 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -108,7 +108,7 @@ struct hist_entry { /* * Since perf diff only supports the stdio output, TUI * fields are only accessed from perf report (or perf - * top). So make it an union to reduce memory usage. + * top). So make it a union to reduce memory usage. */ struct hist_entry_diff diff; struct /* for TUI */ { -- cgit v1.2.3 From f1f1007644ffc8051a4c11427d58b1967ae7b75a Mon Sep 17 00:00:00 2001 From: Vegard Nossum Date: Mon, 27 Feb 2017 14:30:07 -0800 Subject: mm: add new mmgrab() helper Apart from adding the helper function itself, the rest of the kernel is converted mechanically using: git grep -l 'atomic_inc.*mm_count' | xargs sed -i 's/atomic_inc(&\(.*\)->mm_count);/mmgrab\(\1\);/' git grep -l 'atomic_inc.*mm_count' | xargs sed -i 's/atomic_inc(&\(.*\)\.mm_count);/mmgrab\(\&\1\);/' This is needed for a later patch that hooks into the helper, but might be a worthwhile cleanup on its own. (Michal Hocko provided most of the kerneldoc comment.) Link: http://lkml.kernel.org/r/20161218123229.22952-1-vegard.nossum@oracle.com Signed-off-by: Vegard Nossum Acked-by: Michal Hocko Acked-by: Peter Zijlstra (Intel) Acked-by: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/alpha/kernel/smp.c | 2 +- arch/arc/kernel/smp.c | 2 +- arch/arm/kernel/smp.c | 2 +- arch/arm64/kernel/smp.c | 2 +- arch/blackfin/mach-common/smp.c | 2 +- arch/hexagon/kernel/smp.c | 2 +- arch/ia64/kernel/setup.c | 2 +- arch/m32r/kernel/setup.c | 2 +- arch/metag/kernel/smp.c | 2 +- arch/mips/kernel/traps.c | 2 +- arch/mn10300/kernel/smp.c | 2 +- arch/parisc/kernel/smp.c | 2 +- arch/powerpc/kernel/smp.c | 2 +- arch/s390/kernel/processor.c | 2 +- arch/score/kernel/traps.c | 2 +- arch/sh/kernel/smp.c | 2 +- arch/sparc/kernel/leon_smp.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/sparc/kernel/sun4d_smp.c | 2 +- arch/sparc/kernel/sun4m_smp.c | 2 +- arch/sparc/kernel/traps_32.c | 2 +- arch/sparc/kernel/traps_64.c | 2 +- arch/tile/kernel/smpboot.c | 2 +- arch/x86/kernel/cpu/common.c | 4 ++-- arch/xtensa/kernel/smp.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_process.c | 2 +- drivers/gpu/drm/i915/i915_gem_userptr.c | 2 +- drivers/infiniband/hw/hfi1/file_ops.c | 2 +- fs/proc/base.c | 4 ++-- fs/userfaultfd.c | 2 +- include/linux/sched.h | 22 ++++++++++++++++++++++ kernel/exit.c | 2 +- kernel/futex.c | 2 +- kernel/sched/core.c | 4 ++-- mm/khugepaged.c | 2 +- mm/ksm.c | 2 +- mm/mmu_context.c | 2 +- mm/mmu_notifier.c | 2 +- mm/oom_kill.c | 4 ++-- virt/kvm/kvm_main.c | 2 +- 40 files changed, 65 insertions(+), 43 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index 46bf263c3153..acb4b146a607 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -144,7 +144,7 @@ smp_callin(void) alpha_mv.smp_callin(); /* All kernel threads share the same mm context. */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; /* inform the notifiers about the new cpu */ diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index 2afbafadb6ab..695624181682 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -140,7 +140,7 @@ void start_kernel_secondary(void) setup_processor(); atomic_inc(&mm->mm_users); - atomic_inc(&mm->mm_count); + mmgrab(mm); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 7dd14e8395e6..c6514ce0fcbc 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -371,7 +371,7 @@ asmlinkage void secondary_start_kernel(void) * reference and switch to it. */ cpu = smp_processor_id(); - atomic_inc(&mm->mm_count); + mmgrab(mm); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index a8ec5da530af..827d52d78b67 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -222,7 +222,7 @@ asmlinkage void secondary_start_kernel(void) * All kernel threads share the same mm context; grab a * reference and switch to it. */ - atomic_inc(&mm->mm_count); + mmgrab(mm); current->active_mm = mm; /* diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 23c4ef5f8bdc..bc5617ef7128 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -308,7 +308,7 @@ void secondary_start_kernel(void) /* Attach the new idle task to the global mm. */ atomic_inc(&mm->mm_users); - atomic_inc(&mm->mm_count); + mmgrab(mm); current->active_mm = mm; preempt_disable(); diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index 983bae7d2665..c02a6455839e 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c @@ -162,7 +162,7 @@ void start_secondary(void) ); /* Set the memory struct */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; cpu = smp_processor_id(); diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index c483ece3eb84..d68322966f33 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -994,7 +994,7 @@ cpu_init (void) */ ia64_setreg(_IA64_REG_CR_DCR, ( IA64_DCR_DP | IA64_DCR_DK | IA64_DCR_DX | IA64_DCR_DR | IA64_DCR_DA | IA64_DCR_DD | IA64_DCR_LC)); - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; BUG_ON(current->mm); diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index 136c69f1fb8a..b18bc0bd6544 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -403,7 +403,7 @@ void __init cpu_init (void) printk(KERN_INFO "Initializing CPU#%d\n", cpu_id); /* Set up and load the per-CPU TSS and LDT */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; if (current->mm) BUG(); diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index bad13232de51..af9cff547a19 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -345,7 +345,7 @@ asmlinkage void secondary_start_kernel(void) * reference and switch to it. */ atomic_inc(&mm->mm_users); - atomic_inc(&mm->mm_count); + mmgrab(mm); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); enter_lazy_tlb(mm, current); diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index cb479be31a50..49c6df20672a 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -2232,7 +2232,7 @@ void per_cpu_trap_init(bool is_boot_cpu) if (!cpu_data[cpu].asid_cache) cpu_data[cpu].asid_cache = asid_first_version(cpu); - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index 426173c4b0b9..e65b5cc2fa67 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c @@ -589,7 +589,7 @@ static void __init smp_cpu_init(void) } printk(KERN_INFO "Initializing CPU#%d\n", cpu_id); - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; BUG_ON(current->mm); diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 75dab2871346..67b452b41ff6 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -279,7 +279,7 @@ smp_cpu_init(int cpunum) set_cpu_online(cpunum, true); /* Initialise the idle task for this CPU */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 893bd7f79be6..573fb3a461b5 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -707,7 +707,7 @@ void start_secondary(void *unused) unsigned int cpu = smp_processor_id(); int i, base; - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; smp_store_cpu_info(cpu); diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index 21004aaac69b..bc2b60dcb178 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -73,7 +73,7 @@ void cpu_init(void) get_cpu_id(id); if (machine_has_cpu_mhz) update_cpu_mhz(NULL); - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; BUG_ON(current->mm); enter_lazy_tlb(&init_mm, current); diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index 2b22bcf02c27..569ac02f68df 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -336,7 +336,7 @@ void __init trap_init(void) set_except_vector(18, handle_dbe); flush_icache_range(DEBUG_VECTOR_BASE_ADDR, IRQ_VECTOR_BASE_ADDR); - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; cpu_cache_init(); } diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 38e7860845db..ee379c699c08 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -178,7 +178,7 @@ asmlinkage void start_secondary(void) struct mm_struct *mm = &init_mm; enable_mmu(); - atomic_inc(&mm->mm_count); + mmgrab(mm); atomic_inc(&mm->mm_users); current->active_mm = mm; #ifdef CONFIG_MMU diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index 71e16f2241c2..b99d33797e1d 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -93,7 +93,7 @@ void leon_cpu_pre_online(void *arg) : "memory" /* paranoid */); /* Attach to the address space of init_task. */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 90a02cb64e20..8e3e13924594 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -122,7 +122,7 @@ void smp_callin(void) current_thread_info()->new_child = 0; /* Attach to the address space of init_task. */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; /* inform the notifiers about the new cpu */ diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 9d98e5002a09..7b55c50eabe5 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -93,7 +93,7 @@ void sun4d_cpu_pre_online(void *arg) show_leds(cpuid); /* Attach to the address space of init_task. */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; local_ops->cache_all(); diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index 278c40abce82..633c4cf6fdb0 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -59,7 +59,7 @@ void sun4m_cpu_pre_online(void *arg) : "memory" /* paranoid */); /* Attach to the address space of init_task. */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; while (!cpumask_test_cpu(cpuid, &smp_commenced_mask)) diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index 4f21df7d4f13..ecddac5a4c96 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -448,7 +448,7 @@ void trap_init(void) thread_info_offsets_are_bolixed_pete(); /* Attach to the address space of init_task. */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; /* NOTE: Other cpus have this done as they are started diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index dfc97a47c9a0..e022d7b00390 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -2837,6 +2837,6 @@ void __init trap_init(void) /* Attach to the address space of init_task. On SMP we * do this in smp.c:smp_callin for other cpus. */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; } diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 6c0abaacec33..53ce940a5016 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -160,7 +160,7 @@ static void start_secondary(void) __this_cpu_write(current_asid, min_asid); /* Set up this thread as another owner of the init_mm */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); current->active_mm = &init_mm; if (current->mm) BUG(); diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f07005e6f461..c64ca5929cb5 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1510,7 +1510,7 @@ void cpu_init(void) for (i = 0; i <= IO_BITMAP_LONGS; i++) t->io_bitmap[i] = ~0UL; - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); me->active_mm = &init_mm; BUG_ON(me->mm); enter_lazy_tlb(&init_mm, me); @@ -1561,7 +1561,7 @@ void cpu_init(void) /* * Set up and load the per-CPU TSS and LDT */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); curr->active_mm = &init_mm; BUG_ON(curr->mm); enter_lazy_tlb(&init_mm, curr); diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index fc4ad21a5ed4..9bf5cea3bae4 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -136,7 +136,7 @@ void secondary_start_kernel(void) /* All kernel threads share the same mm context. */ atomic_inc(&mm->mm_users); - atomic_inc(&mm->mm_count); + mmgrab(mm); current->active_mm = mm; cpumask_set_cpu(cpu, mm_cpumask(mm)); enter_lazy_tlb(mm, current); diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_process.c b/drivers/gpu/drm/amd/amdkfd/kfd_process.c index ef7c8de7060e..ca5f2aa7232d 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_process.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_process.c @@ -262,7 +262,7 @@ static void kfd_process_notifier_release(struct mmu_notifier *mn, * and because the mmu_notifier_unregister function also drop * mm_count we need to take an extra count here. */ - atomic_inc(&p->mm->mm_count); + mmgrab(p->mm); mmu_notifier_unregister_no_release(&p->mmu_notifier, p->mm); mmu_notifier_call_srcu(&p->rcu, &kfd_process_destroy_delayed); } diff --git a/drivers/gpu/drm/i915/i915_gem_userptr.c b/drivers/gpu/drm/i915/i915_gem_userptr.c index 6a8fa085b74e..65802d93fdc1 100644 --- a/drivers/gpu/drm/i915/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/i915_gem_userptr.c @@ -334,7 +334,7 @@ i915_gem_userptr_init__mm_struct(struct drm_i915_gem_object *obj) mm->i915 = to_i915(obj->base.dev); mm->mm = current->mm; - atomic_inc(¤t->mm->mm_count); + mmgrab(current->mm); mm->mn = NULL; diff --git a/drivers/infiniband/hw/hfi1/file_ops.c b/drivers/infiniband/hw/hfi1/file_ops.c index f46033984d07..3b19c16a9e45 100644 --- a/drivers/infiniband/hw/hfi1/file_ops.c +++ b/drivers/infiniband/hw/hfi1/file_ops.c @@ -185,7 +185,7 @@ static int hfi1_file_open(struct inode *inode, struct file *fp) if (fd) { fd->rec_cpu_num = -1; /* no cpu affinity by default */ fd->mm = current->mm; - atomic_inc(&fd->mm->mm_count); + mmgrab(fd->mm); fp->private_data = fd; } else { fp->private_data = NULL; diff --git a/fs/proc/base.c b/fs/proc/base.c index b8f06273353e..5d51a188871b 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -766,7 +766,7 @@ struct mm_struct *proc_mem_open(struct inode *inode, unsigned int mode) if (!IS_ERR_OR_NULL(mm)) { /* ensure this mm_struct can't be freed */ - atomic_inc(&mm->mm_count); + mmgrab(mm); /* but do not pin its memory */ mmput(mm); } @@ -1064,7 +1064,7 @@ static int __set_oom_adj(struct file *file, int oom_adj, bool legacy) if (p) { if (atomic_read(&p->mm->mm_users) > 1) { mm = p->mm; - atomic_inc(&mm->mm_count); + mmgrab(mm); } task_unlock(p); } diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index e6e0a619cb3a..3c421d06a18e 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -1847,7 +1847,7 @@ static struct file *userfaultfd_file_create(int flags) ctx->released = false; ctx->mm = current->mm; /* prevent the mm struct to be freed */ - atomic_inc(&ctx->mm->mm_count); + mmgrab(ctx->mm); file = anon_inode_getfile("[userfaultfd]", &userfaultfd_fops, ctx, O_RDWR | (flags & UFFD_SHARED_FCNTL_FLAGS)); diff --git a/include/linux/sched.h b/include/linux/sched.h index 451e241f32c5..7cfa5546c840 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2904,6 +2904,28 @@ static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) */ extern struct mm_struct * mm_alloc(void); +/** + * mmgrab() - Pin a &struct mm_struct. + * @mm: The &struct mm_struct to pin. + * + * Make sure that @mm will not get freed even after the owning task + * exits. This doesn't guarantee that the associated address space + * will still exist later on and mmget_not_zero() has to be used before + * accessing it. + * + * This is a preferred way to to pin @mm for a longer/unbounded amount + * of time. + * + * Use mmdrop() to release the reference acquired by mmgrab(). + * + * See also for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmgrab(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_count); +} + /* mmdrop drops the mm and the page tables */ extern void __mmdrop(struct mm_struct *); static inline void mmdrop(struct mm_struct *mm) diff --git a/kernel/exit.c b/kernel/exit.c index 90b09ca35c84..8a768a3672a5 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -539,7 +539,7 @@ static void exit_mm(void) __set_current_state(TASK_RUNNING); down_read(&mm->mmap_sem); } - atomic_inc(&mm->mm_count); + mmgrab(mm); BUG_ON(mm != current->active_mm); /* more a memory barrier than a real lock */ task_lock(current); diff --git a/kernel/futex.c b/kernel/futex.c index cdf365036141..b687cb22301c 100644 --- a/kernel/futex.c +++ b/kernel/futex.c @@ -338,7 +338,7 @@ static inline bool should_fail_futex(bool fshared) static inline void futex_get_mm(union futex_key *key) { - atomic_inc(&key->private.mm->mm_count); + mmgrab(key->private.mm); /* * Ensure futex_get_mm() implies a full barrier such that * get_futex_key() implies a full barrier. This is relied upon diff --git a/kernel/sched/core.c b/kernel/sched/core.c index e1ae6ac15eac..6ea1925ac5c0 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -2847,7 +2847,7 @@ context_switch(struct rq *rq, struct task_struct *prev, if (!mm) { next->active_mm = oldmm; - atomic_inc(&oldmm->mm_count); + mmgrab(oldmm); enter_lazy_tlb(oldmm, next); } else switch_mm_irqs_off(oldmm, mm, next); @@ -6098,7 +6098,7 @@ void __init sched_init(void) /* * The boot idle thread does lazy MMU switching as well: */ - atomic_inc(&init_mm.mm_count); + mmgrab(&init_mm); enter_lazy_tlb(&init_mm, current); /* diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 77ae3239c3de..34bce5c308e3 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -420,7 +420,7 @@ int __khugepaged_enter(struct mm_struct *mm) list_add_tail(&mm_slot->mm_node, &khugepaged_scan.mm_head); spin_unlock(&khugepaged_mm_lock); - atomic_inc(&mm->mm_count); + mmgrab(mm); if (wakeup) wake_up_interruptible(&khugepaged_wait); diff --git a/mm/ksm.c b/mm/ksm.c index cf211c01ceac..520e4c37fec7 100644 --- a/mm/ksm.c +++ b/mm/ksm.c @@ -1854,7 +1854,7 @@ int __ksm_enter(struct mm_struct *mm) spin_unlock(&ksm_mmlist_lock); set_bit(MMF_VM_MERGEABLE, &mm->flags); - atomic_inc(&mm->mm_count); + mmgrab(mm); if (needs_wakeup) wake_up_interruptible(&ksm_thread_wait); diff --git a/mm/mmu_context.c b/mm/mmu_context.c index 6f4d27c5bb32..daf67bb02b4a 100644 --- a/mm/mmu_context.c +++ b/mm/mmu_context.c @@ -25,7 +25,7 @@ void use_mm(struct mm_struct *mm) task_lock(tsk); active_mm = tsk->active_mm; if (active_mm != mm) { - atomic_inc(&mm->mm_count); + mmgrab(mm); tsk->active_mm = mm; } tsk->mm = mm; diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index f4259e496f83..32bc9f2ff7eb 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -275,7 +275,7 @@ static int do_mmu_notifier_register(struct mmu_notifier *mn, mm->mmu_notifier_mm = mmu_notifier_mm; mmu_notifier_mm = NULL; } - atomic_inc(&mm->mm_count); + mmgrab(mm); /* * Serialize the update against mmu_notifier_unregister. A diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 578321f1c070..51c091849dcb 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -653,7 +653,7 @@ static void mark_oom_victim(struct task_struct *tsk) /* oom_mm is bound to the signal struct life time. */ if (!cmpxchg(&tsk->signal->oom_mm, NULL, mm)) - atomic_inc(&tsk->signal->oom_mm->mm_count); + mmgrab(tsk->signal->oom_mm); /* * Make sure that the task is woken up from uninterruptible sleep @@ -870,7 +870,7 @@ static void oom_kill_process(struct oom_control *oc, const char *message) /* Get a reference to safely compare mm after task_unlock(victim) */ mm = victim->mm; - atomic_inc(&mm->mm_count); + mmgrab(mm); /* * We should send SIGKILL before setting TIF_MEMDIE in order to prevent * the OOM victim from depleting the memory reserves from the user diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index 5b0dd4a9b2cb..35f71409d9ee 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -611,7 +611,7 @@ static struct kvm *kvm_create_vm(unsigned long type) return ERR_PTR(-ENOMEM); spin_lock_init(&kvm->mmu_lock); - atomic_inc(¤t->mm->mm_count); + mmgrab(current->mm); kvm->mm = current->mm; kvm_eventfd_init(kvm); mutex_init(&kvm->lock); -- cgit v1.2.3 From 2959a5f726f6510d6dd7c958f8877e08d0cf589c Mon Sep 17 00:00:00 2001 From: Jinbum Park Date: Mon, 27 Feb 2017 14:30:22 -0800 Subject: mm: add arch-independent testcases for RODATA This patch makes arch-independent testcases for RODATA. Both x86 and x86_64 already have testcases for RODATA, But they are arch-specific because using inline assembly directly. And cacheflush.h is not a suitable location for rodata-test related things. Since they were in cacheflush.h, If someone change the state of CONFIG_DEBUG_RODATA_TEST, It cause overhead of kernel build. To solve the above issues, write arch-independent testcases and move it to shared location. [jinb.park7@gmail.com: fix config dependency] Link: http://lkml.kernel.org/r/20170209131625.GA16954@pjb1027-Latitude-E5410 Link: http://lkml.kernel.org/r/20170129105436.GA9303@pjb1027-Latitude-E5410 Signed-off-by: Jinbum Park Acked-by: Kees Cook Cc: Ingo Molnar Cc: H. Peter Anvin Cc: Arjan van de Ven Cc: Laura Abbott Cc: Russell King Cc: Valentin Rothberg Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/Kconfig.debug | 8 ----- arch/x86/include/asm/cacheflush.h | 10 ------ arch/x86/kernel/Makefile | 1 - arch/x86/kernel/test_rodata.c | 75 --------------------------------------- arch/x86/mm/init_32.c | 4 --- arch/x86/mm/init_64.c | 5 --- include/linux/rodata_test.h | 23 ++++++++++++ init/main.c | 6 ++-- mm/Kconfig.debug | 6 ++++ mm/Makefile | 1 + mm/rodata_test.c | 56 +++++++++++++++++++++++++++++ 11 files changed, 90 insertions(+), 105 deletions(-) delete mode 100644 arch/x86/kernel/test_rodata.c create mode 100644 include/linux/rodata_test.h create mode 100644 mm/rodata_test.c (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index c4cba00dbdee..63c1d13aaf9f 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -74,14 +74,6 @@ config EFI_PGT_DUMP issues with the mapping of the EFI runtime regions into that table. -config DEBUG_RODATA_TEST - bool "Testcase for the marking rodata read-only" - default y - ---help--- - This option enables a testcase for the setting rodata read-only - as well as for the change_page_attr() infrastructure. - If in doubt, say "N" - config DEBUG_WX bool "Warn on W+X mappings at boot" select X86_PTDUMP_CORE diff --git a/arch/x86/include/asm/cacheflush.h b/arch/x86/include/asm/cacheflush.h index 872877d930de..e7e1942edff7 100644 --- a/arch/x86/include/asm/cacheflush.h +++ b/arch/x86/include/asm/cacheflush.h @@ -90,18 +90,8 @@ void clflush_cache_range(void *addr, unsigned int size); #define mmio_flush_range(addr, size) clflush_cache_range(addr, size) -extern const int rodata_test_data; extern int kernel_set_to_readonly; void set_kernel_text_rw(void); void set_kernel_text_ro(void); -#ifdef CONFIG_DEBUG_RODATA_TEST -int rodata_test(void); -#else -static inline int rodata_test(void) -{ - return 0; -} -#endif - #endif /* _ASM_X86_CACHEFLUSH_H */ diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index bdcdb3b3a219..84c00592d359 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -100,7 +100,6 @@ obj-$(CONFIG_HPET_TIMER) += hpet.o obj-$(CONFIG_APB_TIMER) += apb_timer.o obj-$(CONFIG_AMD_NB) += amd_nb.o -obj-$(CONFIG_DEBUG_RODATA_TEST) += test_rodata.o obj-$(CONFIG_DEBUG_NMI_SELFTEST) += nmi_selftest.o obj-$(CONFIG_KVM_GUEST) += kvm.o kvmclock.o diff --git a/arch/x86/kernel/test_rodata.c b/arch/x86/kernel/test_rodata.c deleted file mode 100644 index 222e84e2432e..000000000000 --- a/arch/x86/kernel/test_rodata.c +++ /dev/null @@ -1,75 +0,0 @@ -/* - * test_rodata.c: functional test for mark_rodata_ro function - * - * (C) Copyright 2008 Intel Corporation - * Author: Arjan van de Ven - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ -#include -#include -#include - -int rodata_test(void) -{ - unsigned long result; - unsigned long start, end; - - /* test 1: read the value */ - /* If this test fails, some previous testrun has clobbered the state */ - if (!rodata_test_data) { - printk(KERN_ERR "rodata_test: test 1 fails (start data)\n"); - return -ENODEV; - } - - /* test 2: write to the variable; this should fault */ - /* - * If this test fails, we managed to overwrite the data - * - * This is written in assembly to be able to catch the - * exception that is supposed to happen in the correct - * case - */ - - result = 1; - asm volatile( - "0: mov %[zero],(%[rodata_test])\n" - " mov %[zero], %[rslt]\n" - "1:\n" - ".section .fixup,\"ax\"\n" - "2: jmp 1b\n" - ".previous\n" - _ASM_EXTABLE(0b,2b) - : [rslt] "=r" (result) - : [rodata_test] "r" (&rodata_test_data), [zero] "r" (0UL) - ); - - - if (!result) { - printk(KERN_ERR "rodata_test: test data was not read only\n"); - return -ENODEV; - } - - /* test 3: check the value hasn't changed */ - /* If this test fails, we managed to overwrite the data */ - if (!rodata_test_data) { - printk(KERN_ERR "rodata_test: Test 3 fails (end data)\n"); - return -ENODEV; - } - /* test 4: check if the rodata section is 4Kb aligned */ - start = (unsigned long)__start_rodata; - end = (unsigned long)__end_rodata; - if (start & (PAGE_SIZE - 1)) { - printk(KERN_ERR "rodata_test: .rodata is not 4k aligned\n"); - return -ENODEV; - } - if (end & (PAGE_SIZE - 1)) { - printk(KERN_ERR "rodata_test: .rodata end is not 4k aligned\n"); - return -ENODEV; - } - - return 0; -} diff --git a/arch/x86/mm/init_32.c b/arch/x86/mm/init_32.c index 928d657de829..2b4b53e6793f 100644 --- a/arch/x86/mm/init_32.c +++ b/arch/x86/mm/init_32.c @@ -864,9 +864,6 @@ static noinline int do_test_wp_bit(void) return flag; } -const int rodata_test_data = 0xC3; -EXPORT_SYMBOL_GPL(rodata_test_data); - int kernel_set_to_readonly __read_mostly; void set_kernel_text_rw(void) @@ -939,7 +936,6 @@ void mark_rodata_ro(void) set_pages_ro(virt_to_page(start), size >> PAGE_SHIFT); printk(KERN_INFO "Write protecting the kernel read-only data: %luk\n", size >> 10); - rodata_test(); #ifdef CONFIG_CPA_DEBUG printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, start + size); diff --git a/arch/x86/mm/init_64.c b/arch/x86/mm/init_64.c index 97346f987ef2..15173d37f399 100644 --- a/arch/x86/mm/init_64.c +++ b/arch/x86/mm/init_64.c @@ -1000,9 +1000,6 @@ void __init mem_init(void) mem_init_print_info(NULL); } -const int rodata_test_data = 0xC3; -EXPORT_SYMBOL_GPL(rodata_test_data); - int kernel_set_to_readonly; void set_kernel_text_rw(void) @@ -1071,8 +1068,6 @@ void mark_rodata_ro(void) all_end = roundup((unsigned long)_brk_end, PMD_SIZE); set_memory_nx(text_end, (all_end - text_end) >> PAGE_SHIFT); - rodata_test(); - #ifdef CONFIG_CPA_DEBUG printk(KERN_INFO "Testing CPA: undo %lx-%lx\n", start, end); set_memory_rw(start, (end-start) >> PAGE_SHIFT); diff --git a/include/linux/rodata_test.h b/include/linux/rodata_test.h new file mode 100644 index 000000000000..ea05f6c51413 --- /dev/null +++ b/include/linux/rodata_test.h @@ -0,0 +1,23 @@ +/* + * rodata_test.h: functional test for mark_rodata_ro function + * + * (C) Copyright 2008 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#ifndef _RODATA_TEST_H +#define _RODATA_TEST_H + +#ifdef CONFIG_DEBUG_RODATA_TEST +extern const int rodata_test_data; +void rodata_test(void); +#else +static inline void rodata_test(void) {} +#endif + +#endif /* _RODATA_TEST_H */ diff --git a/init/main.c b/init/main.c index 6eb10ce472b9..47ea22d181ef 100644 --- a/init/main.c +++ b/init/main.c @@ -82,6 +82,7 @@ #include #include #include +#include #include #include @@ -935,9 +936,10 @@ __setup("rodata=", set_debug_rodata); #ifdef CONFIG_STRICT_KERNEL_RWX static void mark_readonly(void) { - if (rodata_enabled) + if (rodata_enabled) { mark_rodata_ro(); - else + rodata_test(); + } else pr_info("Kernel memory protection disabled.\n"); } #else diff --git a/mm/Kconfig.debug b/mm/Kconfig.debug index afcc550877ff..79d0fd13b5b3 100644 --- a/mm/Kconfig.debug +++ b/mm/Kconfig.debug @@ -90,3 +90,9 @@ config DEBUG_PAGE_REF careful when enabling this feature because it adds about 30 KB to the kernel code. However the runtime performance overhead is virtually nil until the tracepoints are actually enabled. + +config DEBUG_RODATA_TEST + bool "Testcase for the marking rodata read-only" + depends on STRICT_KERNEL_RWX + ---help--- + This option enables a testcase for the setting rodata read-only. diff --git a/mm/Makefile b/mm/Makefile index aa0aa17cb413..026f6a828a50 100644 --- a/mm/Makefile +++ b/mm/Makefile @@ -85,6 +85,7 @@ obj-$(CONFIG_MEMORY_FAILURE) += memory-failure.o obj-$(CONFIG_HWPOISON_INJECT) += hwpoison-inject.o obj-$(CONFIG_DEBUG_KMEMLEAK) += kmemleak.o obj-$(CONFIG_DEBUG_KMEMLEAK_TEST) += kmemleak-test.o +obj-$(CONFIG_DEBUG_RODATA_TEST) += rodata_test.o obj-$(CONFIG_PAGE_OWNER) += page_owner.o obj-$(CONFIG_CLEANCACHE) += cleancache.o obj-$(CONFIG_MEMORY_ISOLATION) += page_isolation.o diff --git a/mm/rodata_test.c b/mm/rodata_test.c new file mode 100644 index 000000000000..0fd21670b513 --- /dev/null +++ b/mm/rodata_test.c @@ -0,0 +1,56 @@ +/* + * rodata_test.c: functional test for mark_rodata_ro function + * + * (C) Copyright 2008 Intel Corporation + * Author: Arjan van de Ven + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ +#include +#include + +const int rodata_test_data = 0xC3; +EXPORT_SYMBOL_GPL(rodata_test_data); + +void rodata_test(void) +{ + unsigned long start, end; + int zero = 0; + + /* test 1: read the value */ + /* If this test fails, some previous testrun has clobbered the state */ + if (!rodata_test_data) { + pr_err("rodata_test: test 1 fails (start data)\n"); + return; + } + + /* test 2: write to the variable; this should fault */ + if (!probe_kernel_write((void *)&rodata_test_data, + (void *)&zero, sizeof(zero))) { + pr_err("rodata_test: test data was not read only\n"); + return; + } + + /* test 3: check the value hasn't changed */ + if (rodata_test_data == zero) { + pr_err("rodata_test: test data was changed\n"); + return; + } + + /* test 4: check if the rodata section is PAGE_SIZE aligned */ + start = (unsigned long)__start_rodata; + end = (unsigned long)__end_rodata; + if (start & (PAGE_SIZE - 1)) { + pr_err("rodata_test: start of .rodata is not page size aligned\n"); + return; + } + if (end & (PAGE_SIZE - 1)) { + pr_err("rodata_test: end of .rodata is not page size aligned\n"); + return; + } + + pr_info("rodata_test: all tests were successful\n"); +} -- cgit v1.2.3 From 3e761a42e19c63b624ebac94d918d8a15e07e2a7 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Mon, 27 Feb 2017 14:30:25 -0800 Subject: mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear() Looks like I also missed the build config that includes CONFIG_HIGHMEM64G && CONFIG_PARAVIRT to export the native_pud_clear() dummy function. Fixes: a00cc7d9dd93d ("mm, x86: add support for PUD-sized transparent hugepages") Link: http://lkml.kernel.org/r/148823188084.56076.17451228917824355200.stgit@djiang5-desk3.ch.intel.com Signed-off-by: Dave Jiang Reported-by: Laura Abbott Reported-by: Boris Ostrovsky Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable-3level.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 8f50fb3f04e1..72277b1028a5 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -121,7 +121,8 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } -#ifndef CONFIG_SMP +#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \ + defined(CONFIG_PARAVIRT)) static inline void native_pud_clear(pud_t *pudp) { } -- cgit v1.2.3 From 40496c8ee73a5ca4fa581badf2247418980586b1 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 7 Jan 2017 23:21:18 -0500 Subject: x86: msr-index.h: Define MSR_PKG_CST_CONFIG_CONTROL define MSR_PKG_CST_CONFIG_CONTROL (0xE2), which is the string used by Intel Documentation. We use this MSR in intel_idle and turbostat by a previous name, to be updated in the next patch. Cc: x86@kernel.org Signed-off-by: Len Brown --- arch/x86/include/asm/msr-index.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 710273c617b8..975f23eefe14 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -47,6 +47,7 @@ #define MSR_PLATFORM_INFO 0x000000ce #define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 +#define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) #define ATM_LNC_C6_AUTO_DEMOTE (1UL << 25) -- cgit v1.2.3 From 419c9e986ea453d07140bffb9708d730b6317e8e Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 7 Jan 2017 23:26:22 -0500 Subject: x86: msr-index.h: Remove unused MSR_NHM_SNB_PKG_CST_CFG_CTL The two users, intel_idle driver and turbostat utility are using the new name, MSR_PKG_CST_CONFIG_CONTROL Cc: x86@kernel.org Signed-off-by: Len Brown --- arch/x86/include/asm/msr-index.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 975f23eefe14..12ecc6c1e7f8 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -46,7 +46,6 @@ #define MSR_FSB_FREQ 0x000000cd #define MSR_PLATFORM_INFO 0x000000ce -#define MSR_NHM_SNB_PKG_CST_CFG_CTL 0x000000e2 #define MSR_PKG_CST_CONFIG_CONTROL 0x000000e2 #define NHM_C3_AUTO_DEMOTE (1UL << 25) #define NHM_C1_AUTO_DEMOTE (1UL << 26) -- cgit v1.2.3 From 0539ba118fe241b0d03202fda0cd19cb758b7fbd Mon Sep 17 00:00:00 2001 From: Len Brown Date: Fri, 10 Feb 2017 00:27:20 -0500 Subject: tools/power turbostat: Baytrail c-state support The Baytrail SOC, with its Silvermont core, has some unique properties: 1. a hardware CC1 residency counter 2. a module-c6 residency counter 3. a package-c6 counter at traditional package-c7 counter address. The SOC does not support c3, pc3, c7 or pc7 counters. Signed-off-by: Len Brown --- arch/x86/include/asm/msr-index.h | 2 ++ tools/power/x86/turbostat/turbostat.c | 46 ++++++++++++++++++++++++++++------- 2 files changed, 39 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 12ecc6c1e7f8..079db99f6560 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -147,6 +147,7 @@ /* C-state Residency Counters */ #define MSR_PKG_C3_RESIDENCY 0x000003f8 #define MSR_PKG_C6_RESIDENCY 0x000003f9 +#define MSR_ATOM_PKG_C6_RESIDENCY 0x000003fa #define MSR_PKG_C7_RESIDENCY 0x000003fa #define MSR_CORE_C3_RESIDENCY 0x000003fc #define MSR_CORE_C6_RESIDENCY 0x000003fd @@ -203,6 +204,7 @@ #define MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B #define MSR_CORE_C1_RES 0x00000660 +#define MSR_MODULE_C6_RES_MS 0x00000664 #define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 #define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index d56f2b75dc58..19802be6dc02 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -167,6 +167,7 @@ struct core_data { unsigned long long c3; unsigned long long c6; unsigned long long c7; + unsigned long long mc6_us; /* duplicate as per-core for now, even though per module */ unsigned int core_temp_c; unsigned int core_id; unsigned long long counter[MAX_ADDED_COUNTERS]; @@ -375,6 +376,7 @@ struct msr_counter bic[] = { { 0x0, "RAM_J" }, { 0x0, "Core" }, { 0x0, "CPU" }, + { 0x0, "Mod%c6" }, }; #define MAX_BIC (sizeof(bic) / sizeof(struct msr_counter)) @@ -412,6 +414,7 @@ struct msr_counter bic[] = { #define BIC_RAM_J (1ULL << 31) #define BIC_Core (1ULL << 32) #define BIC_CPU (1ULL << 33) +#define BIC_Mod_c6 (1ULL << 34) unsigned long long bic_enabled = 0xFFFFFFFFFFFFFFFFULL; unsigned long long bic_present; @@ -504,6 +507,8 @@ void print_header(void) if (DO_BIC(BIC_CPU_c7)) outp += sprintf(outp, "\tCPU%%c7"); + if (DO_BIC(BIC_Mod_c6)) + outp += sprintf(outp, "\tMod%%c6"); if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "\tCoreTmp"); @@ -629,6 +634,7 @@ int dump_counters(struct thread_data *t, struct core_data *c, outp += sprintf(outp, "cADDED [%d] msr0x%x: %08llX\n", i, mp->msr_num, c->counter[i]); } + outp += sprintf(outp, "mc6_us: %016llX\n", c->mc6_us); } if (p) { @@ -774,6 +780,10 @@ int format_counters(struct thread_data *t, struct core_data *c, if (DO_BIC(BIC_CPU_c7)) outp += sprintf(outp, "\t%.2f", 100.0 * c->c7/t->tsc); + /* Mod%c6 */ + if (DO_BIC(BIC_Mod_c6)) + outp += sprintf(outp, "\t%.2f", 100.0 * c->mc6_us / t->tsc); + if (DO_BIC(BIC_CoreTmp)) outp += sprintf(outp, "\t%d", c->core_temp_c); @@ -988,6 +998,7 @@ delta_core(struct core_data *new, struct core_data *old) old->c6 = new->c6 - old->c6; old->c7 = new->c7 - old->c7; old->core_temp_c = new->core_temp_c; + old->mc6_us = new->mc6_us - old->mc6_us; for (i = 0, mp = sys.cp; mp; i++, mp = mp->next) { if (mp->format == FORMAT_RAW) @@ -1109,6 +1120,7 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c3 = 0; c->c6 = 0; c->c7 = 0; + c->mc6_us = 0; c->core_temp_c = 0; p->pkg_wtd_core_c0 = 0; @@ -1173,6 +1185,7 @@ int sum_counters(struct thread_data *t, struct core_data *c, average.cores.c3 += c->c3; average.cores.c6 += c->c6; average.cores.c7 += c->c7; + average.cores.mc6_us += c->mc6_us; average.cores.core_temp_c = MAX(average.cores.core_temp_c, c->core_temp_c); @@ -1246,6 +1259,7 @@ void compute_average(struct thread_data *t, struct core_data *c, average.cores.c3 /= topo.num_cores; average.cores.c6 /= topo.num_cores; average.cores.c7 /= topo.num_cores; + average.cores.mc6_us /= topo.num_cores; if (do_skl_residency) { average.packages.pkg_wtd_core_c0 /= topo.num_packages; @@ -1376,8 +1390,7 @@ retry: return -5; t->smi_count = msr & 0xFFFFFFFF; } - - if (use_c1_residency_msr) { + if (DO_BIC(BIC_CPU_c1) && use_c1_residency_msr) { if (get_msr(cpu, MSR_CORE_C1_RES, &t->c1)) return -6; } @@ -1409,6 +1422,10 @@ retry: if (get_msr(cpu, MSR_CORE_C7_RESIDENCY, &c->c7)) return -8; + if (DO_BIC(BIC_Mod_c6)) + if (get_msr(cpu, MSR_MODULE_C6_RES_MS, &c->mc6_us)) + return -8; + if (DO_BIC(BIC_CoreTmp)) { if (get_msr(cpu, MSR_IA32_THERM_STATUS, &msr)) return -9; @@ -1437,9 +1454,16 @@ retry: if (do_pc3) if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) return -9; - if (do_pc6) - if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) - return -10; + if (do_pc6) { + if (do_slm_cstates) { + if (get_msr(cpu, MSR_ATOM_PKG_C6_RESIDENCY, &p->pc6)) + return -10; + } else { + if (get_msr(cpu, MSR_PKG_C6_RESIDENCY, &p->pc6)) + return -10; + } + } + if (do_pc2) if (get_msr(cpu, MSR_PKG_C2_RESIDENCY, &p->pc2)) return -11; @@ -1533,7 +1557,7 @@ char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7}; int amt_pkg_cstate_limits[16] = {PCLUNL, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; int bxt_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; @@ -3336,8 +3360,6 @@ int has_skl_msrs(unsigned int family, unsigned int model) return 0; } - - int is_slm(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -3731,6 +3753,12 @@ void process_cpuid() do_pc3 = (pkg_cstate_limit >= PCL__3); do_pc6 = (pkg_cstate_limit >= PCL__6); do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); + if (has_slv_msrs(family, model)) { + do_pc2 = do_pc3 = do_pc7 = 0; + do_pc6 = 1; + BIC_PRESENT(BIC_Mod_c6); + use_c1_residency_msr = 1; + } do_c8_c9_c10 = has_hsw_msrs(family, model); do_irtl_hsw = has_hsw_msrs(family, model); do_skl_residency = has_skl_msrs(family, model); @@ -4112,7 +4140,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(outf, "turbostat version 4.17 1 Jan 2017" + fprintf(outf, "turbostat version 4.17 10 Jan 2017" " - Len Brown \n"); } -- cgit v1.2.3 From 8a34fd0226eaae64d61ff9a113d276e28acb6b5c Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 12 Jan 2017 23:22:28 -0500 Subject: x86 msr-index.h: Define Atom specific core ratio MSR locations These MSRs are currently used by the intel_pstate driver, using a local definition. Cc: x86@kernel.org Signed-off-by: Len Brown --- arch/x86/include/asm/msr-index.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 079db99f6560..83bc672c225c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -209,6 +209,12 @@ #define MSR_CC6_DEMOTION_POLICY_CONFIG 0x00000668 #define MSR_MC6_DEMOTION_POLICY_CONFIG 0x00000669 +#define MSR_ATOM_CORE_RATIOS 0x0000066a +#define MSR_ATOM_CORE_VIDS 0x0000066b +#define MSR_ATOM_CORE_TURBO_RATIOS 0x0000066c +#define MSR_ATOM_CORE_TURBO_VIDS 0x0000066d + + #define MSR_CORE_PERF_LIMIT_REASONS 0x00000690 #define MSR_GFX_PERF_LIMIT_REASONS 0x000006B0 #define MSR_RING_PERF_LIMIT_REASONS 0x000006B1 -- cgit v1.2.3 From 98af74599ea0757098a5776ea29e581b661dcf6f Mon Sep 17 00:00:00 2001 From: Len Brown Date: Sat, 21 Jan 2017 01:15:09 -0500 Subject: x86 msr_index.h: Define MSR_MISC_FEATURE_CONTROL This non-architectural MSR has disable bits for various prefetchers on modern processors. While these bits are generally touched only by the BIOS, say, via BIOS SETUP, it is useful to dump them when examining options that can alter performance. Cc: x86@kernel.org Signed-off-by: Len Brown --- arch/x86/include/asm/msr-index.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 83bc672c225c..312fb7e14cdd 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -467,6 +467,7 @@ #define MSR_IA32_TEMPERATURE_TARGET 0x000001a2 +#define MSR_MISC_FEATURE_CONTROL 0x000001a4 #define MSR_MISC_PWR_MGMT 0x000001aa #define MSR_IA32_ENERGY_PERF_BIAS 0x000001b0 -- cgit v1.2.3 From 311f77708f362c1af0a999ba5ebbbc062fdc5601 Mon Sep 17 00:00:00 2001 From: Len Brown Date: Thu, 16 Feb 2017 23:48:50 -0500 Subject: x86: intel-family.h: Add GEMINI_LAKE SOC Cc: x86@kernel.org Signed-off-by: Len Brown --- arch/x86/include/asm/intel-family.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/intel-family.h b/arch/x86/include/asm/intel-family.h index 8167fdb67ae8..9814db42b790 100644 --- a/arch/x86/include/asm/intel-family.h +++ b/arch/x86/include/asm/intel-family.h @@ -59,6 +59,7 @@ #define INTEL_FAM6_ATOM_MERRIFIELD 0x4A /* Tangier */ #define INTEL_FAM6_ATOM_MOOREFIELD 0x5A /* Anniedale */ #define INTEL_FAM6_ATOM_GOLDMONT 0x5C +#define INTEL_FAM6_ATOM_GEMINI_LAKE 0x7A #define INTEL_FAM6_ATOM_DENVERTON 0x5F /* Goldmont Microserver */ /* Xeon Phi */ -- cgit v1.2.3 From 75013fb16f8484898eaa8d0b08fed942d790f029 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Wed, 1 Mar 2017 01:23:24 +0900 Subject: kprobes/x86: Fix kernel panic when certain exception-handling addresses are probed Fix to the exception table entry check by using probed address instead of the address of copied instruction. This bug may cause unexpected kernel panic if user probe an address where an exception can happen which should be fixup by __ex_table (e.g. copy_from_user.) Unless user puts a kprobe on such address, this doesn't cause any problem. This bug has been introduced years ago, by commit: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently"). Signed-off-by: Masami Hiramatsu Cc: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 464846888d9a ("x86/kprobes: Fix a bug which can modify kernel code permanently") Link: http://lkml.kernel.org/r/148829899399.28855.12581062400757221722.stgit@devbox Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes/common.h | 2 +- arch/x86/kernel/kprobes/core.c | 6 +++--- arch/x86/kernel/kprobes/opt.c | 2 +- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes/common.h b/arch/x86/kernel/kprobes/common.h index c6ee63f927ab..d688826e5736 100644 --- a/arch/x86/kernel/kprobes/common.h +++ b/arch/x86/kernel/kprobes/common.h @@ -67,7 +67,7 @@ #endif /* Ensure if the instruction can be boostable */ -extern int can_boost(kprobe_opcode_t *instruction); +extern int can_boost(kprobe_opcode_t *instruction, void *addr); /* Recover instruction if given address is probed */ extern unsigned long recover_probed_instruction(kprobe_opcode_t *buf, unsigned long addr); diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 520b8dfe1640..88b3c942473d 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -166,12 +166,12 @@ NOKPROBE_SYMBOL(skip_prefixes); * Returns non-zero if opcode is boostable. * RIP relative instructions are adjusted at copying time in 64 bits mode */ -int can_boost(kprobe_opcode_t *opcodes) +int can_boost(kprobe_opcode_t *opcodes, void *addr) { kprobe_opcode_t opcode; kprobe_opcode_t *orig_opcodes = opcodes; - if (search_exception_tables((unsigned long)opcodes)) + if (search_exception_tables((unsigned long)addr)) return 0; /* Page fault may occur on this address. */ retry: @@ -416,7 +416,7 @@ static int arch_copy_kprobe(struct kprobe *p) * __copy_instruction can modify the displacement of the instruction, * but it doesn't affect boostable check. */ - if (can_boost(p->ainsn.insn)) + if (can_boost(p->ainsn.insn, p->addr)) p->ainsn.boostable = 0; else p->ainsn.boostable = -1; diff --git a/arch/x86/kernel/kprobes/opt.c b/arch/x86/kernel/kprobes/opt.c index 3d1bee9d6a72..3e7c6e5a08ff 100644 --- a/arch/x86/kernel/kprobes/opt.c +++ b/arch/x86/kernel/kprobes/opt.c @@ -178,7 +178,7 @@ static int copy_optimized_instructions(u8 *dest, u8 *src) while (len < RELATIVEJUMP_SIZE) { ret = __copy_instruction(dest + len, src + len); - if (!ret || !can_boost(dest + len)) + if (!ret || !can_boost(dest + len, src + len)) return -EINVAL; len += ret; } -- cgit v1.2.3 From 10bce8410607a18eb3adf5d2739db8c8593e110d Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 27 Feb 2017 23:50:58 +0100 Subject: x86/kdebugfs: Move boot params hierarchy under (debugfs)/x86/ ... since this is all x86-specific data and it makes sense to have it under x86/ logically instead in the toplevel debugfs dir. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170227225058.27289-1-bp@alien8.de Signed-off-by: Ingo Molnar --- arch/x86/kernel/kdebugfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kdebugfs.c b/arch/x86/kernel/kdebugfs.c index bdb83e431d89..38b64587b31b 100644 --- a/arch/x86/kernel/kdebugfs.c +++ b/arch/x86/kernel/kdebugfs.c @@ -167,7 +167,7 @@ static int __init boot_params_kdebugfs_init(void) struct dentry *dbp, *version, *data; int error = -ENOMEM; - dbp = debugfs_create_dir("boot_params", NULL); + dbp = debugfs_create_dir("boot_params", arch_debugfs_dir); if (!dbp) return -ENOMEM; -- cgit v1.2.3 From bb3f0a52630c84807fca9bdd76ac2f5dcec82689 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Tue, 28 Feb 2017 13:50:52 +0800 Subject: x86/apic: Fix a warning message in logical CPU IDs allocation The current warning message in allocate_logical_cpuid() is somewhat confusing: Only 1 processors supported.Processor 2/0x2 and the rest are ignored. As it might imply that there's only one CPU in the system - while what we ran into here is a kernel limitation. Fix the warning message to clarify all that: APIC: NR_CPUS/possible_cpus limit of 2 reached. Processor 2/0x2 and the rest are ignored. ( Also update the error return from -1 to -EINVAL, which is the more canonical return value. ) Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: nicstange@gmail.com Cc: wanpeng.li@hotmail.com Link: http://lkml.kernel.org/r/1488261052-25753-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 4261b3282ad9..11088b86e5c7 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2062,10 +2062,10 @@ static int allocate_logical_cpuid(int apicid) /* Allocate a new cpuid. */ if (nr_logical_cpuids >= nr_cpu_ids) { - WARN_ONCE(1, "Only %d processors supported." + WARN_ONCE(1, "APIC: NR_CPUS/possible_cpus limit of %i reached. " "Processor %d/0x%x and the rest are ignored.\n", - nr_cpu_ids - 1, nr_logical_cpuids, apicid); - return -1; + nr_cpu_ids, nr_logical_cpuids, apicid); + return -EINVAL; } cpuid_to_apicid[nr_logical_cpuids] = apicid; -- cgit v1.2.3 From 11277aabcbbe13916151af897d29a5e9f71ca73f Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Thu, 23 Feb 2017 17:16:41 +0800 Subject: x86/apic: Simplify enable_IR_x2apic(), remove try_to_enable_IR() The following commit: 2e63ad4bd5dd ("x86/apic: Do not init irq remapping if ioapic is disabled") ... added a check for skipped IO-APIC setup to enable_IR_x2apic(), but this check is also duplicated in try_to_enable_IR() - and it will never succeed in calling irq_remapping_enable(). Remove the whole irq_remapping_enable() complication: if the IO-APIC is disabled we cannot enable IRQ remapping. Signed-off-by: Dou Liyang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bp@alien8.de Cc: nicstange@gmail.com Cc: wanpeng.li@hotmail.com Link: http://lkml.kernel.org/r/1487841401-1543-1-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/apic/apic.c | 17 ++++------------- 1 file changed, 4 insertions(+), 13 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 11088b86e5c7..aee7deddabd0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1610,24 +1610,15 @@ static inline void try_to_enable_x2apic(int remap_mode) { } static inline void __x2apic_enable(void) { } #endif /* !CONFIG_X86_X2APIC */ -static int __init try_to_enable_IR(void) -{ -#ifdef CONFIG_X86_IO_APIC - if (!x2apic_enabled() && skip_ioapic_setup) { - pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); - return -1; - } -#endif - return irq_remapping_enable(); -} - void __init enable_IR_x2apic(void) { unsigned long flags; int ret, ir_stat; - if (skip_ioapic_setup) + if (skip_ioapic_setup) { + pr_info("Not enabling interrupt remapping due to skipped IO-APIC setup\n"); return; + } ir_stat = irq_remapping_prepare(); if (ir_stat < 0 && !x2apic_supported()) @@ -1645,7 +1636,7 @@ void __init enable_IR_x2apic(void) /* If irq_remapping_prepare() succeeded, try to enable it */ if (ir_stat >= 0) - ir_stat = try_to_enable_IR(); + ir_stat = irq_remapping_enable(); /* ir_stat contains the remap mode or an error code */ try_to_enable_x2apic(ir_stat); -- cgit v1.2.3 From 1b17c6df852851b40c3c27c66b8fa2fd99cf25d8 Mon Sep 17 00:00:00 2001 From: Andrew Banman Date: Fri, 17 Feb 2017 11:07:49 -0600 Subject: x86/platform/uv/BAU: Fix HUB errors by remove initial write to sw-ack register Writing to the software acknowledge clear register when there are no pending messages causes a HUB error to assert. The original intent of this write was to clear the pending bits before start of operation, but this is an incorrect method and has been determined to be unnecessary. Signed-off-by: Andrew Banman Acked-by: Mike Travis Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: akpm@linux-foundation.org Cc: rja@hpe.com Cc: sivanich@hpe.com Link: http://lkml.kernel.org/r/1487351269-181133-1-git-send-email-abanman@hpe.com Signed-off-by: Ingo Molnar --- arch/x86/platform/uv/tlb_uv.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/uv/tlb_uv.c b/arch/x86/platform/uv/tlb_uv.c index 766d4d3529a1..f25982cdff90 100644 --- a/arch/x86/platform/uv/tlb_uv.c +++ b/arch/x86/platform/uv/tlb_uv.c @@ -1847,7 +1847,6 @@ static void pq_init(int node, int pnode) ops.write_payload_first(pnode, first); ops.write_payload_last(pnode, last); - ops.write_g_sw_ack(pnode, 0xffffUL); /* in effect, all msg_type's are set to MSG_NOOP */ memset(pqp, 0, sizeof(struct bau_pq_entry) * DEST_Q_SIZE); -- cgit v1.2.3 From 90b28ded88dda8bea82b4a86923e73ba0746d884 Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Sun, 19 Feb 2017 19:32:48 +0100 Subject: x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk Without the parameter reboot=a, ASUS EeeBook X205TA will hang when it should reboot. This adds the appropriate quirk, thus fixing the problem. Signed-off-by: Matjaz Hegedic Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index e244c19a2451..01c9cda3e1b7 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -223,6 +223,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_BOARD_NAME, "P4S800"), }, }, + { /* Handle problems with rebooting on ASUS EeeBook X205TA */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TA", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, /* Certec */ { /* Handle problems with rebooting on Certec BPC600 */ -- cgit v1.2.3 From 73667e31a153a66da97feb1584726222504924f8 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Feb 2017 22:17:17 +0100 Subject: x86/hyperv: Hide unused label This new 32-bit warning just showed up: arch/x86/hyperv/hv_init.c: In function 'hyperv_init': arch/x86/hyperv/hv_init.c:167:1: error: label 'register_msr_cs' defined but not used [-Werror=unused-label] The easiest solution is to move the label up into the existing #ifdef that has the goto. Fixes: dee863b571b0 ("hv: export current Hyper-V clocksource") Signed-off-by: Arnd Bergmann Acked-by: Stephen Hemminger Cc: Greg Kroah-Hartman Cc: Haiyang Zhang Cc: devel@linuxdriverproject.org Cc: Vitaly Kuznetsov Cc: "K. Y. Srinivasan" Link: http://lkml.kernel.org/r/20170214211736.2641241-1-arnd@arndb.de Signed-off-by: Thomas Gleixner --- arch/x86/hyperv/hv_init.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/hyperv/hv_init.c b/arch/x86/hyperv/hv_init.c index db64baf0e500..8bef70e7f3cc 100644 --- a/arch/x86/hyperv/hv_init.c +++ b/arch/x86/hyperv/hv_init.c @@ -158,13 +158,13 @@ void hyperv_init(void) clocksource_register_hz(&hyperv_cs_tsc, NSEC_PER_SEC/100); return; } +register_msr_cs: #endif /* * For 32 bit guests just use the MSR based mechanism for reading * the partition counter. */ -register_msr_cs: hyperv_cs = &hyperv_cs_msr; if (ms_hyperv.features & HV_X64_MSR_TIME_REF_COUNT_AVAILABLE) clocksource_register_hz(&hyperv_cs_msr, NSEC_PER_SEC/100); -- cgit v1.2.3 From aa5ec3f715d576353c7d8f4f8085634bd845b73f Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 27 Feb 2017 21:29:22 +0900 Subject: x86/vmware: Remove duplicate inclusion of asm/timer.h Signed-off-by: Masanari Iida Cc: akataria@vmware.com Cc: virtualization@lists.linux-foundation.org Link: http://lkml.kernel.org/r/20170227122922.26230-1-standby24x7@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/vmware.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/vmware.c b/arch/x86/kernel/cpu/vmware.c index 891f4dad7b2c..22403a28caf5 100644 --- a/arch/x86/kernel/cpu/vmware.c +++ b/arch/x86/kernel/cpu/vmware.c @@ -30,7 +30,6 @@ #include #include #include -#include #undef pr_fmt #define pr_fmt(fmt) "vmware: " fmt -- cgit v1.2.3 From e86a2d2d34e20289ae7d46692e16d43c3a785db9 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Mon, 27 Feb 2017 22:07:03 +0900 Subject: x86/intel_rdt: Remove duplicate inclusion of linux/cpu.h Signed-off-by: Masanari Iida Cc: fenghua.yu@intel.com Link: http://lkml.kernel.org/r/20170227130703.26968-1-standby24x7@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 8af04afdfcb9..759577d9d166 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -27,7 +27,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From 153654dbe595a68845ba14d5b0bfe299fa6a7e99 Mon Sep 17 00:00:00 2001 From: Rui Wang Date: Tue, 28 Feb 2017 21:34:28 +0800 Subject: x86/PCI: Implement pcibios_release_device to release IRQ from IOAPIC The revert of 991de2e59090 ("PCI, x86: Implement pcibios_alloc_irq() and pcibios_free_irq()") causes a problem for IOAPIC hotplug. The problem is that IRQs are allocated and freed in pci_enable_device() and pci_disable_device(). But there are some drivers which don't call pci_disable_device(), and they have good reasons not calling it, so if they're using IOAPIC their IRQs won't have a chance to be released from the IOAPIC. When this happens IOAPIC hot-removal fails with a kernel stack dump and an error message like this: [149335.697989] pin16 on IOAPIC2 is still in use. It turns out that we can fix it in a different way without moving IRQ allocation into pcibios_alloc_irq(), thus avoiding the regression of 991de2e59090. We can keep the allocation and freeing of IRQs as is within pci_enable_device()/pci_disable_device(), without breaking any previous assumption of the rest of the system, keeping compatibility with both the legacy and the modern drivers. We can accomplish this by implementing the existing __weak hook of pcibios_release_device() thus when a pci device is about to be deleted we get notified in the hook and take the chance to release its IRQ, if any, from the IOAPIC. Implement pcibios_release_device() for x86 to release any IRQ not released by the driver. Signed-off-by: Rui Wang Cc: tony.luck@intel.com Cc: linux-pci@vger.kernel.org Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: fengguang.wu@intel.com Cc: helgaas@kernel.org Cc: kbuild-all@01.org Cc: bhelgaas@google.com Link: http://lkml.kernel.org/r/1488288869-31290-2-git-send-email-rui.y.wang@intel.com Signed-off-by: Thomas Gleixner --- arch/x86/pci/common.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/pci/common.c b/arch/x86/pci/common.c index 0cb52ae0a8f0..190e718694b1 100644 --- a/arch/x86/pci/common.c +++ b/arch/x86/pci/common.c @@ -735,6 +735,15 @@ void pcibios_disable_device (struct pci_dev *dev) pcibios_disable_irq(dev); } +#ifdef CONFIG_ACPI_HOTPLUG_IOAPIC +void pcibios_release_device(struct pci_dev *dev) +{ + if (atomic_dec_return(&dev->enable_cnt) >= 0) + pcibios_disable_device(dev); + +} +#endif + int pci_ext_cfg_avail(void) { if (raw_pci_ext_ops) -- cgit v1.2.3 From 58ab9a088ddac4efe823471275859d64f735577e Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Thu, 23 Feb 2017 14:26:03 -0800 Subject: x86/pkeys: Check against max pkey to avoid overflows Kirill reported a warning from UBSAN about undefined behavior when using protection keys. He is running on hardware that actually has support for it, which is not widely available. The warning triggers because of very large shifts of integers when doing a pkey_free() of a large, invalid value. This happens because we never check that the pkey "fits" into the mm_pkey_allocation_map(). I do not believe there is any danger here of anything bad happening other than some aliasing issues where somebody could do: pkey_free(35); and the kernel would effectively execute: pkey_free(8); While this might be confusing to an app that was doing something stupid, it has to do something stupid and the effects are limited to the app shooting itself in the foot. Signed-off-by: Dave Hansen Cc: stable@vger.kernel.org Cc: linux-kselftest@vger.kernel.org Cc: shuah@kernel.org Cc: kirill.shutemov@linux.intel.com Link: http://lkml.kernel.org/r/20170223222603.A022ED65@viggo.jf.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pkeys.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pkeys.h b/arch/x86/include/asm/pkeys.h index 34684adb6899..b3b09b98896d 100644 --- a/arch/x86/include/asm/pkeys.h +++ b/arch/x86/include/asm/pkeys.h @@ -46,6 +46,15 @@ extern int __arch_set_user_pkey_access(struct task_struct *tsk, int pkey, static inline bool mm_pkey_is_allocated(struct mm_struct *mm, int pkey) { + /* + * "Allocated" pkeys are those that have been returned + * from pkey_alloc(). pkey 0 is special, and never + * returned from pkey_alloc(). + */ + if (pkey <= 0) + return false; + if (pkey >= arch_max_pkey()) + return false; return mm_pkey_allocation_map(mm) & (1U << pkey); } @@ -82,12 +91,6 @@ int mm_pkey_alloc(struct mm_struct *mm) static inline int mm_pkey_free(struct mm_struct *mm, int pkey) { - /* - * pkey 0 is special, always allocated and can never - * be freed. - */ - if (!pkey) - return -EINVAL; if (!mm_pkey_is_allocated(mm, pkey)) return -EINVAL; -- cgit v1.2.3 From 940b2f2fd963c043418ce8af64605783b2b19140 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 18 Feb 2017 12:31:40 +0100 Subject: x86/events: Remove last remnants of old filenames Update to the new file paths, remove them from introductory comments. Signed-off-by: Borislav Petkov Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170218113140.8051-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/events/amd/core.c | 2 +- arch/x86/events/intel/cstate.c | 2 +- arch/x86/events/intel/rapl.c | 2 +- arch/x86/events/intel/uncore.h | 6 +++--- tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c | 2 +- 5 files changed, 7 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index afb222b63cae..c84584bb9402 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -604,7 +604,7 @@ amd_get_event_constraints_f15h(struct cpu_hw_events *cpuc, int idx, return &amd_f15_PMC20; } case AMD_EVENT_NB: - /* moved to perf_event_amd_uncore.c */ + /* moved to uncore.c */ return &emptyconstraint; default: return &emptyconstraint; diff --git a/arch/x86/events/intel/cstate.c b/arch/x86/events/intel/cstate.c index aff4b5b69d40..238ae3248ba5 100644 --- a/arch/x86/events/intel/cstate.c +++ b/arch/x86/events/intel/cstate.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_cstate.c: support cstate residency counters + * Support cstate residency counters * * Copyright (C) 2015, Intel Corp. * Author: Kan Liang (kan.liang@intel.com) diff --git a/arch/x86/events/intel/rapl.c b/arch/x86/events/intel/rapl.c index 22054ca49026..9d05c7e67f60 100644 --- a/arch/x86/events/intel/rapl.c +++ b/arch/x86/events/intel/rapl.c @@ -1,5 +1,5 @@ /* - * perf_event_intel_rapl.c: support Intel RAPL energy consumption counters + * Support Intel RAPL energy consumption counters * Copyright (C) 2013 Google, Inc., Stephane Eranian * * Intel RAPL interface is specified in the IA-32 Manual Vol3b diff --git a/arch/x86/events/intel/uncore.h b/arch/x86/events/intel/uncore.h index ad986c1e29bc..df5989f27b1b 100644 --- a/arch/x86/events/intel/uncore.h +++ b/arch/x86/events/intel/uncore.h @@ -360,7 +360,7 @@ extern struct list_head pci2phy_map_head; extern struct pci_extra_dev *uncore_extra_pci_dev; extern struct event_constraint uncore_constraint_empty; -/* perf_event_intel_uncore_snb.c */ +/* uncore_snb.c */ int snb_uncore_pci_init(void); int ivb_uncore_pci_init(void); int hsw_uncore_pci_init(void); @@ -371,7 +371,7 @@ void nhm_uncore_cpu_init(void); void skl_uncore_cpu_init(void); int snb_pci2phy_map_init(int devid); -/* perf_event_intel_uncore_snbep.c */ +/* uncore_snbep.c */ int snbep_uncore_pci_init(void); void snbep_uncore_cpu_init(void); int ivbep_uncore_pci_init(void); @@ -385,5 +385,5 @@ void knl_uncore_cpu_init(void); int skx_uncore_pci_init(void); void skx_uncore_cpu_init(void); -/* perf_event_intel_uncore_nhmex.c */ +/* uncore_nhmex.c */ void nhmex_uncore_cpu_init(void); diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 7913363bde5c..4f3c758d875d 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -31,7 +31,7 @@ #error Instruction buffer size too small #endif -/* Based on branch_type() from perf_event_intel_lbr.c */ +/* Based on branch_type() from arch/x86/events/intel/lbr.c */ static void intel_pt_insn_decoder(struct insn *insn, struct intel_pt_insn *intel_pt_insn) { -- cgit v1.2.3 From 72042a8c7b01048a36ece216aaf206b7d60ca661 Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 20 Feb 2017 10:12:35 +1100 Subject: x86/purgatory: Make functions and variables static Sparse emits several 'symbol not declared' warnings for various functions and variables. Add static keyword to functions and variables which have file scope only. Signed-off-by: Tobin C. Harding Link: http://lkml.kernel.org/r/1487545956-2547-2-git-send-email-me@tobin.cc Signed-off-by: Thomas Gleixner --- arch/x86/purgatory/purgatory.c | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 25e068ba3382..2a5f4373c797 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -18,11 +18,11 @@ struct sha_region { unsigned long len; }; -unsigned long backup_dest = 0; -unsigned long backup_src = 0; -unsigned long backup_sz = 0; +static unsigned long backup_dest; +static unsigned long backup_src; +static unsigned long backup_sz; -u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; +static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; struct sha_region sha_regions[16] = {}; @@ -39,7 +39,7 @@ static int copy_backup_region(void) return 0; } -int verify_sha256_digest(void) +static int verify_sha256_digest(void) { struct sha_region *ptr, *end; u8 digest[SHA256_DIGEST_SIZE]; -- cgit v1.2.3 From e98fe5127b9cc8ab33d1094b81c19deb1f9082bf Mon Sep 17 00:00:00 2001 From: "Tobin C. Harding" Date: Mon, 20 Feb 2017 10:12:36 +1100 Subject: x86/purgatory: Fix sparse warning, symbol not declared Sparse emits warning, 'symbol not declared' for a function that has neither file scope nor a forward declaration. The functions only call site is an ASM file. Add a header file with the function declaration. Include the header file in the C source file defining the function in order to fix the sparse warning. Include the header file in ASM file containing the call site to document the usage. Signed-off-by: Tobin C. Harding Link: http://lkml.kernel.org/r/1487545956-2547-3-git-send-email-me@tobin.cc Signed-off-by: Thomas Gleixner --- arch/x86/purgatory/purgatory.c | 1 + arch/x86/purgatory/purgatory.h | 8 ++++++++ arch/x86/purgatory/setup-x86_64.S | 1 + 3 files changed, 10 insertions(+) create mode 100644 arch/x86/purgatory/purgatory.h (limited to 'arch/x86') diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index 2a5f4373c797..b6d5c8946e66 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -11,6 +11,7 @@ */ #include "sha256.h" +#include "purgatory.h" #include "../boot/string.h" struct sha_region { diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h new file mode 100644 index 000000000000..e2e365a6c192 --- /dev/null +++ b/arch/x86/purgatory/purgatory.h @@ -0,0 +1,8 @@ +#ifndef PURGATORY_H +#define PURGATORY_H + +#ifndef __ASSEMBLY__ +extern void purgatory(void); +#endif /* __ASSEMBLY__ */ + +#endif /* PURGATORY_H */ diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index fe3c91ba1bd0..f90e9dfa90bb 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -9,6 +9,7 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ +#include "purgatory.h" .text .globl purgatory_start -- cgit v1.2.3 From 8392f16d38bb5222c03073a3906b7fd272386faf Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 21 Feb 2017 19:36:39 +0100 Subject: x86/boot: Correct setup_header.start_sys name It is called start_sys_seg elsewhere so rename it to that. It is an obsolete field so we could just as well directly call it __u16 __pad... No functional change. Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/20170221183639.16554-1-bp@alien8.de Signed-off-by: Thomas Gleixner --- arch/x86/include/uapi/asm/bootparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/uapi/asm/bootparam.h b/arch/x86/include/uapi/asm/bootparam.h index 5138dacf8bb8..07244ea16765 100644 --- a/arch/x86/include/uapi/asm/bootparam.h +++ b/arch/x86/include/uapi/asm/bootparam.h @@ -58,7 +58,7 @@ struct setup_header { __u32 header; __u16 version; __u32 realmode_swtch; - __u16 start_sys; + __u16 start_sys_seg; __u16 kernel_version; __u8 type_of_loader; __u8 loadflags; -- cgit v1.2.3 From bba82fd75694832b59433bf4e9d7ede8de1dfd42 Mon Sep 17 00:00:00 2001 From: Robert O'Callahan Date: Wed, 1 Feb 2017 17:06:11 +1300 Subject: KVM: x86: never specify a sample period for virtualized in_tx_cp counters MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit pmc_reprogram_counter() always sets a sample period based on the value of pmc->counter. However, hsw_hw_config() rejects sample periods less than 2^31 - 1. So for example, if a KVM guest does struct perf_event_attr attr; memset(&attr, 0, sizeof(attr)); attr.type = PERF_TYPE_RAW; attr.size = sizeof(attr); attr.config = 0x2005101c4; // conditional branches retired IN_TXCP attr.sample_period = 0; int fd = syscall(__NR_perf_event_open, &attr, 0, -1, -1, 0); ioctl(fd, PERF_EVENT_IOC_DISABLE, 0); ioctl(fd, PERF_EVENT_IOC_ENABLE, 0); the guest kernel counts some conditional branch events, then updates the virtual PMU register with a nonzero count. The host reaches pmc_reprogram_counter() with nonzero pmc->counter, triggers EOPNOTSUPP in hsw_hw_config(), prints "kvm_pmu: event creation failed" in pmc_reprogram_counter(), and silently (from the guest's point of view) stops counting events. We fix event counting by forcing attr.sample_period to always be zero for in_tx_cp counters. Sampling doesn't work, but it already didn't work and can't be fixed without major changes to the approach in hsw_hw_config(). Signed-off-by: Robert O'Callahan Signed-off-by: Radim Krčmář --- arch/x86/kvm/pmu.c | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/pmu.c b/arch/x86/kvm/pmu.c index 06ce377dcbc9..026db42a86c3 100644 --- a/arch/x86/kvm/pmu.c +++ b/arch/x86/kvm/pmu.c @@ -113,12 +113,19 @@ static void pmc_reprogram_counter(struct kvm_pmc *pmc, u32 type, .config = config, }; + attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); + if (in_tx) attr.config |= HSW_IN_TX; - if (in_tx_cp) + if (in_tx_cp) { + /* + * HSW_IN_TX_CHECKPOINTED is not supported with nonzero + * period. Just clear the sample period so at least + * allocating the counter doesn't fail. + */ + attr.sample_period = 0; attr.config |= HSW_IN_TX_CHECKPOINTED; - - attr.sample_period = (-pmc->counter) & pmc_bitmask(pmc); + } event = perf_event_create_kernel_counter(&attr, -1, current, intr ? kvm_perf_overflow_intr : -- cgit v1.2.3 From b7ceaec112aa35aa287325754d8c52b8304892cd Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Wed, 22 Feb 2017 07:36:16 -0800 Subject: x86/asm: Tidy up TSS limit code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In an earlier version of the patch ("x86/kvm/vmx: Defer TR reload after VM exit") that introduced TSS limit validity tracking, I confused which helper was which. On reflection, the names I chose sucked. Rename the helpers to make it more obvious what's going on and add some comments. While I'm at it, clear __tss_limit_invalid when force-reloading as well as when contitionally reloading, since any TR reload fixes the limit. Signed-off-by: Andy Lutomirski Signed-off-by: Radim Krčmář --- arch/x86/include/asm/desc.h | 18 +++++++++++------- arch/x86/kernel/ioport.c | 8 +++++++- arch/x86/kernel/process.c | 6 +++--- 3 files changed, 21 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/desc.h b/arch/x86/include/asm/desc.h index cb8f9149f6c8..1548ca92ad3f 100644 --- a/arch/x86/include/asm/desc.h +++ b/arch/x86/include/asm/desc.h @@ -205,6 +205,8 @@ static inline void native_load_tr_desc(void) asm volatile("ltr %w0"::"q" (GDT_ENTRY_TSS*8)); } +DECLARE_PER_CPU(bool, __tss_limit_invalid); + static inline void force_reload_TR(void) { struct desc_struct *d = get_cpu_gdt_table(smp_processor_id()); @@ -220,18 +222,20 @@ static inline void force_reload_TR(void) write_gdt_entry(d, GDT_ENTRY_TSS, &tss, DESC_TSS); load_TR_desc(); + this_cpu_write(__tss_limit_invalid, false); } -DECLARE_PER_CPU(bool, need_tr_refresh); - -static inline void refresh_TR(void) +/* + * Call this if you need the TSS limit to be correct, which should be the case + * if and only if you have TIF_IO_BITMAP set or you're switching to a task + * with TIF_IO_BITMAP set. + */ +static inline void refresh_tss_limit(void) { DEBUG_LOCKS_WARN_ON(preemptible()); - if (unlikely(this_cpu_read(need_tr_refresh))) { + if (unlikely(this_cpu_read(__tss_limit_invalid))) force_reload_TR(); - this_cpu_write(need_tr_refresh, false); - } } /* @@ -250,7 +254,7 @@ static inline void invalidate_tss_limit(void) if (unlikely(test_thread_flag(TIF_IO_BITMAP))) force_reload_TR(); else - this_cpu_write(need_tr_refresh, true); + this_cpu_write(__tss_limit_invalid, true); } static inline void native_load_gdt(const struct desc_ptr *dtr) diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index b01bc8517450..875d3d25dd6a 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -47,8 +47,14 @@ asmlinkage long sys_ioperm(unsigned long from, unsigned long num, int turn_on) t->io_bitmap_ptr = bitmap; set_thread_flag(TIF_IO_BITMAP); + /* + * Now that we have an IO bitmap, we need our TSS limit to be + * correct. It's fine if we are preempted after doing this: + * with TIF_IO_BITMAP set, context switches will keep our TSS + * limit correct. + */ preempt_disable(); - refresh_TR(); + refresh_tss_limit(); preempt_enable(); } diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7780efa635b9..0b302591b51f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -65,8 +65,8 @@ __visible DEFINE_PER_CPU_SHARED_ALIGNED(struct tss_struct, cpu_tss) = { }; EXPORT_PER_CPU_SYMBOL(cpu_tss); -DEFINE_PER_CPU(bool, need_tr_refresh); -EXPORT_PER_CPU_SYMBOL_GPL(need_tr_refresh); +DEFINE_PER_CPU(bool, __tss_limit_invalid); +EXPORT_PER_CPU_SYMBOL_GPL(__tss_limit_invalid); /* * this gets called so that we can store lazy state into memory and copy the @@ -218,7 +218,7 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, * Make sure that the TSS limit is correct for the CPU * to notice the IO bitmap. */ - refresh_TR(); + refresh_tss_limit(); } else if (test_tsk_thread_flag(prev_p, TIF_IO_BITMAP)) { /* * Clear any possible leftover bits: -- cgit v1.2.3 From 0fce546f9f07b94ccc9de09cf48d35e18946d2fa Mon Sep 17 00:00:00 2001 From: Jérémy Lefaure Date: Sat, 25 Feb 2017 17:46:53 -0500 Subject: x86/kvm/vmx: remove unused variable in segment_base() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The pointer 'struct desc_struct *d' is unused since commit 8c2e41f7ae12 ("x86/kvm/vmx: Simplify segment_base()") so let's remove it. Signed-off-by: Jérémy Lefaure Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ef4ba71dbb66..764f1f897847 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2053,7 +2053,6 @@ static bool update_transition_efer(struct vcpu_vmx *vmx, int efer_offset) static unsigned long segment_base(u16 selector) { struct desc_ptr *gdt = this_cpu_ptr(&host_gdt); - struct desc_struct *d; struct desc_struct *table; unsigned long v; -- cgit v1.2.3 From acc9ab60132739e0c5ab40644444cd7b22d12886 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 27 Feb 2017 04:24:39 -0800 Subject: KVM: nVMX: Fix pending events injection MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit L2 fails to boot on a non-APICv box dues to 'commit 0ad3bed6c5ec ("kvm: nVMX: move nested events check to kvm_vcpu_running")' KVM internal error. Suberror: 3 extra data[0]: 800000ef extra data[1]: 1 RAX=0000000000000000 RBX=ffffffff81f36140 RCX=0000000000000000 RDX=0000000000000000 RSI=0000000000000000 RDI=0000000000000000 RBP=ffff88007c92fe90 RSP=ffff88007c92fe90 R8 =ffff88007fccdca0 R9 =0000000000000000 R10=00000000fffedb3d R11=0000000000000000 R12=0000000000000003 R13=0000000000000000 R14=0000000000000000 R15=ffff88007c92c000 RIP=ffffffff810645e6 RFL=00000246 [---Z-P-] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 0000000000000000 ffffffff 00c00000 CS =0010 0000000000000000 ffffffff 00a09b00 DPL=0 CS64 [-RA] SS =0000 0000000000000000 ffffffff 00c00000 DS =0000 0000000000000000 ffffffff 00c00000 FS =0000 0000000000000000 ffffffff 00c00000 GS =0000 ffff88007fcc0000 ffffffff 00c00000 LDT=0000 0000000000000000 ffffffff 00c00000 TR =0040 ffff88007fcd4200 00002087 00008b00 DPL=0 TSS64-busy GDT= ffff88007fcc9000 0000007f IDT= ffffffffff578000 00000fff CR0=80050033 CR2=00000000ffffffff CR3=0000000001e0a000 CR4=003406e0 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000fffe0ff0 DR7=0000000000000400 EFER=0000000000000d01 We should try to reinject previous events if any before trying to inject new event if pending. If vmexit is triggered by L2 guest and L0 interested in, we should reinject IDT-vectoring info to L2 through vmcs02 if any, otherwise, we can consider new IRQs/NMIs which can be injected and call nested events callback to switch from L2 to L1 if needed and inject the proper vmexit events. However, 'commit 0ad3bed6c5ec ("kvm: nVMX: move nested events check to kvm_vcpu_running")' results in the handle events order reversely on non-APICv box. This patch fixes it by bailing out for pending events and not consider new events in this scenario. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Jan Kiszka Signed-off-by: Wanpeng Li Fixes: 0ad3bed6c5ec ("kvm: nVMX: move nested events check to kvm_vcpu_running") Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 764f1f897847..283aa8601833 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10641,6 +10641,11 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) { struct vcpu_vmx *vmx = to_vmx(vcpu); + if (vcpu->arch.exception.pending || + vcpu->arch.nmi_injected || + vcpu->arch.interrupt.pending) + return -EBUSY; + if (nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) && vmx->nested.preemption_timer_expired) { if (vmx->nested.nested_run_pending) @@ -10650,8 +10655,7 @@ static int vmx_check_nested_events(struct kvm_vcpu *vcpu, bool external_intr) } if (vcpu->arch.nmi_pending && nested_exit_on_nmi(vcpu)) { - if (vmx->nested.nested_run_pending || - vcpu->arch.interrupt.pending) + if (vmx->nested.nested_run_pending) return -EBUSY; nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, NMI_VECTOR | INTR_TYPE_NMI_INTR | -- cgit v1.2.3 From e390f9a9689a42f477a6073e2e7df530a4c1b740 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Wed, 1 Mar 2017 12:04:44 -0600 Subject: objtool, modules: Discard objtool annotation sections for modules The '__unreachable' and '__func_stack_frame_non_standard' sections are only used at compile time. They're discarded for vmlinux but they should also be discarded for modules. Since this is a recurring pattern, prefix the section names with ".discard.". It's a nice convention and vmlinux.lds.h already discards such sections. Also remove the 'a' (allocatable) flag from the __unreachable section since it doesn't make sense for a discarded section. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Jessica Yu Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: d1091c7fa3d5 ("objtool: Improve detection of BUG() and other dead ends") Link: http://lkml.kernel.org/r/20170301180444.lhd53c5tibc4ns77@treble Signed-off-by: Ingo Molnar --- arch/x86/kernel/vmlinux.lds.S | 2 -- include/linux/compiler-gcc.h | 2 +- include/linux/frame.h | 2 +- scripts/mod/modpost.c | 1 + scripts/module-common.lds | 5 ++++- tools/objtool/builtin-check.c | 6 +++--- 6 files changed, 10 insertions(+), 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/vmlinux.lds.S b/arch/x86/kernel/vmlinux.lds.S index ad0118fbce90..c74ae9ce8dc4 100644 --- a/arch/x86/kernel/vmlinux.lds.S +++ b/arch/x86/kernel/vmlinux.lds.S @@ -345,8 +345,6 @@ SECTIONS DISCARDS /DISCARD/ : { *(.eh_frame) - *(__func_stack_frame_non_standard) - *(__unreachable) } } diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index b6bb9019d87f..0efef9cf014f 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -200,7 +200,7 @@ #ifdef CONFIG_STACK_VALIDATION #define annotate_unreachable() ({ \ asm("%c0:\t\n" \ - ".pushsection __unreachable, \"a\"\t\n" \ + ".pushsection .discard.unreachable\t\n" \ ".long %c0b - .\t\n" \ ".popsection\t\n" : : "i" (__LINE__)); \ }) diff --git a/include/linux/frame.h b/include/linux/frame.h index e6baaba3f1ae..d772c61c31da 100644 --- a/include/linux/frame.h +++ b/include/linux/frame.h @@ -11,7 +11,7 @@ * For more information, see tools/objtool/Documentation/stack-validation.txt. */ #define STACK_FRAME_NON_STANDARD(func) \ - static void __used __section(__func_stack_frame_non_standard) \ + static void __used __section(.discard.func_stack_frame_non_standard) \ *__func_stack_frame_non_standard_##func = func #else /* !CONFIG_STACK_VALIDATION */ diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c index 4dedd0d3d3a7..30d752a4a6a6 100644 --- a/scripts/mod/modpost.c +++ b/scripts/mod/modpost.c @@ -854,6 +854,7 @@ static const char *const section_white_list[] = ".cmem*", /* EZchip */ ".fmt_slot*", /* EZchip */ ".gnu.lto*", + ".discard.*", NULL }; diff --git a/scripts/module-common.lds b/scripts/module-common.lds index 73a2c7da0e55..cf7e52e4781b 100644 --- a/scripts/module-common.lds +++ b/scripts/module-common.lds @@ -4,7 +4,10 @@ * combine them automatically. */ SECTIONS { - /DISCARD/ : { *(.discard) } + /DISCARD/ : { + *(.discard) + *(.discard.*) + } __ksymtab 0 : { *(SORT(___ksymtab+*)) } __ksymtab_gpl 0 : { *(SORT(___ksymtab_gpl+*)) } diff --git a/tools/objtool/builtin-check.c b/tools/objtool/builtin-check.c index 5fc52ee3264c..4cfdbb5b6967 100644 --- a/tools/objtool/builtin-check.c +++ b/tools/objtool/builtin-check.c @@ -339,13 +339,13 @@ static int add_dead_ends(struct objtool_file *file) struct instruction *insn; bool found; - sec = find_section_by_name(file->elf, ".rela__unreachable"); + sec = find_section_by_name(file->elf, ".rela.discard.unreachable"); if (!sec) return 0; list_for_each_entry(rela, &sec->rela_list, list) { if (rela->sym->type != STT_SECTION) { - WARN("unexpected relocation symbol type in .rela__unreachable"); + WARN("unexpected relocation symbol type in %s", sec->name); return -1; } insn = find_insn(file, rela->sym->sec, rela->addend); @@ -1272,7 +1272,7 @@ int cmd_check(int argc, const char **argv) INIT_LIST_HEAD(&file.insn_list); hash_init(file.insn_hash); - file.whitelist = find_section_by_name(file.elf, "__func_stack_frame_non_standard"); + file.whitelist = find_section_by_name(file.elf, ".discard.func_stack_frame_non_standard"); file.rodata = find_section_by_name(file.elf, ".rodata"); file.ignore_unreachables = false; file.c_file = find_section_by_name(file.elf, ".comment"); -- cgit v1.2.3 From 105ab3d8ce7269887d24d224054677125e18037c Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/kernel/topology.c | 1 + arch/arm64/kernel/topology.c | 1 + arch/powerpc/kernel/smp.c | 1 + arch/s390/kernel/topology.c | 1 + arch/x86/kernel/smpboot.c | 1 + block/blk-mq.c | 1 + block/blk-softirq.c | 1 + include/linux/cpuset.h | 1 + include/linux/sched/topology.h | 6 ++++++ kernel/sched/fair.c | 2 ++ kernel/sched/sched.h | 1 + 11 files changed, 17 insertions(+) create mode 100644 include/linux/sched/topology.h (limited to 'arch/x86') diff --git a/arch/arm/kernel/topology.c b/arch/arm/kernel/topology.c index ebf47d91b804..f8a3ab82e77f 100644 --- a/arch/arm/kernel/topology.c +++ b/arch/arm/kernel/topology.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kernel/topology.c b/arch/arm64/kernel/topology.c index 565dd69888cc..08243533e5ee 100644 --- a/arch/arm64/kernel/topology.c +++ b/arch/arm64/kernel/topology.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index 21fdf02583fe..fce17789c675 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 2cd5f4f1013c..17660e800e74 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index a0d38685f7df..fe7a66e6b5a0 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/block/blk-mq.c b/block/blk-mq.c index 9e6b064e5339..746c14e0d157 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 06cf9807f49a..87b7df4851bf 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -9,6 +9,7 @@ #include #include #include +#include #include "blk.h" diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index bfc204e70338..c608c39cb161 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h new file mode 100644 index 000000000000..4f1159f76f92 --- /dev/null +++ b/include/linux/sched/topology.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_TOPOLOGY_H +#define _LINUX_SCHED_TOPOLOGY_H + +#include + +#endif /* _LINUX_SCHED_TOPOLOGY_H */ diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 3b60d73ab290..11e0ab57748a 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -21,6 +21,8 @@ */ #include +#include + #include #include #include diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 71b10a9b73cf..9b9cb260d9cf 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -1,6 +1,7 @@ #include #include +#include #include #include #include -- cgit v1.2.3 From 4c822698cba8bdd93724117eded12bf34eb80252 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/include/asm/mwait.h | 1 + arch/x86/kernel/process.c | 1 + drivers/cpuidle/driver.c | 1 + include/linux/sched/idle.h | 6 ++++++ include/linux/sched/topology.h | 2 ++ kernel/sched/idle.c | 1 + kernel/smp.c | 1 + 7 files changed, 13 insertions(+) create mode 100644 include/linux/sched/idle.h (limited to 'arch/x86') diff --git a/arch/x86/include/asm/mwait.h b/arch/x86/include/asm/mwait.h index f37f2d8a2989..bda3c27f0da0 100644 --- a/arch/x86/include/asm/mwait.h +++ b/arch/x86/include/asm/mwait.h @@ -2,6 +2,7 @@ #define _ASM_X86_MWAIT_H #include +#include #include diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 7780efa635b9..6395e0cd7dd6 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cpuidle/driver.c b/drivers/cpuidle/driver.c index ab264d393233..e53fb861beb0 100644 --- a/drivers/cpuidle/driver.c +++ b/drivers/cpuidle/driver.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/sched/idle.h b/include/linux/sched/idle.h new file mode 100644 index 000000000000..a3cf79b86986 --- /dev/null +++ b/include/linux/sched/idle.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_IDLE_H +#define _LINUX_SCHED_IDLE_H + +#include + +#endif /* _LINUX_SCHED_IDLE_H */ diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h index 4f1159f76f92..184b56b8aa64 100644 --- a/include/linux/sched/topology.h +++ b/include/linux/sched/topology.h @@ -3,4 +3,6 @@ #include +#include + #endif /* _LINUX_SCHED_TOPOLOGY_H */ diff --git a/kernel/sched/idle.c b/kernel/sched/idle.c index 6a4bae0a649d..ac6d5176463d 100644 --- a/kernel/sched/idle.c +++ b/kernel/sched/idle.c @@ -2,6 +2,7 @@ * Generic entry point for the idle threads */ #include +#include #include #include #include diff --git a/kernel/smp.c b/kernel/smp.c index 77fcdb9f2775..a817769b53c0 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include "smpboot.h" -- cgit v1.2.3 From e601757102cfd3eeae068f53b3bc1234f3a2b2e9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 16:36:40 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/mach-bcm/platsmp.c | 1 + arch/arm/mach-omap2/pm-debug.c | 1 + arch/arm/probes/kprobes/test-core.c | 1 + arch/cris/kernel/time.c | 2 +- arch/ia64/kernel/setup.c | 1 + arch/microblaze/kernel/timer.c | 1 + arch/mn10300/kernel/time.c | 1 + arch/parisc/kernel/setup.c | 1 + arch/parisc/kernel/time.c | 1 + arch/powerpc/kernel/time.c | 1 + arch/s390/kernel/time.c | 1 + arch/sparc/kernel/ds.c | 1 + arch/sparc/kernel/viohs.c | 1 + arch/tile/kernel/time.c | 1 + arch/x86/events/amd/ibs.c | 1 + arch/x86/events/core.c | 1 + arch/x86/kernel/cpu/amd.c | 1 + arch/x86/kernel/cpu/centaur.c | 1 + arch/x86/kernel/cpu/common.c | 1 + arch/x86/kernel/cpu/cyrix.c | 1 + arch/x86/kernel/cpu/intel.c | 1 + arch/x86/kernel/cpu/transmeta.c | 1 + arch/x86/kernel/kvmclock.c | 1 + arch/x86/kernel/nmi.c | 1 + arch/x86/kernel/tsc.c | 1 + block/cfq-iosched.c | 1 + drivers/acpi/apei/ghes.c | 1 + drivers/clocksource/arm_arch_timer.c | 1 + drivers/clocksource/pxa_timer.c | 1 + drivers/clocksource/timer-digicolor.c | 1 + drivers/cpuidle/cpuidle.c | 1 + drivers/firmware/tegra/bpmp.c | 1 + drivers/gpu/drm/i915/i915_gem_request.c | 2 ++ drivers/gpu/drm/i915/intel_drv.h | 1 + drivers/md/bcache/bset.c | 1 + drivers/md/bcache/btree.c | 1 + drivers/md/bcache/sysfs.c | 1 + drivers/md/bcache/util.c | 1 + drivers/md/bcache/util.h | 1 + drivers/md/bcache/writeback.c | 1 + drivers/misc/cxl/native.c | 1 + drivers/net/irda/pxaficp_ir.c | 1 + drivers/perf/arm_pmu.c | 1 + drivers/vhost/net.c | 1 + include/linux/blkdev.h | 1 + include/linux/sched/clock.h | 6 ++++++ include/linux/skbuff.h | 1 + include/net/busy_poll.h | 1 + kernel/events/core.c | 1 + kernel/locking/lockdep.c | 1 + kernel/locking/qspinlock_stat.h | 1 + kernel/printk/printk.c | 1 + kernel/sched/clock.c | 1 + kernel/sched/core.c | 1 + kernel/sched/sched.h | 1 + kernel/time/sched_clock.c | 1 + kernel/time/tick-sched.c | 1 + kernel/torture.c | 1 + kernel/trace/ring_buffer.c | 1 + kernel/trace/trace_clock.c | 1 + kernel/trace/trace_hwlat.c | 1 + kernel/trace/trace_output.c | 1 + kernel/watchdog.c | 1 + lib/plist.c | 1 + net/ipv4/tcp_cdg.c | 2 ++ 65 files changed, 72 insertions(+), 1 deletion(-) create mode 100644 include/linux/sched/clock.h (limited to 'arch/x86') diff --git a/arch/arm/mach-bcm/platsmp.c b/arch/arm/mach-bcm/platsmp.c index 582886d0d02f..9e3f275934eb 100644 --- a/arch/arm/mach-bcm/platsmp.c +++ b/arch/arm/mach-bcm/platsmp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mach-omap2/pm-debug.c b/arch/arm/mach-omap2/pm-debug.c index 003a6cb248be..5c46ea6756d7 100644 --- a/arch/arm/mach-omap2/pm-debug.c +++ b/arch/arm/mach-omap2/pm-debug.c @@ -21,6 +21,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/probes/kprobes/test-core.c b/arch/arm/probes/kprobes/test-core.c index 9775de22e2ff..c893726aa52d 100644 --- a/arch/arm/probes/kprobes/test-core.c +++ b/arch/arm/probes/kprobes/test-core.c @@ -203,6 +203,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/cris/kernel/time.c b/arch/cris/kernel/time.c index 2dda6da71521..bc562cf511a6 100644 --- a/arch/cris/kernel/time.c +++ b/arch/cris/kernel/time.c @@ -29,7 +29,7 @@ #include #include #include -#include /* just for sched_clock() - funny that */ +#include #define D(x) diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index d68322966f33..32a6cc36296f 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/microblaze/kernel/timer.c b/arch/microblaze/kernel/timer.c index 1d6fad50fa76..999066192715 100644 --- a/arch/microblaze/kernel/timer.c +++ b/arch/microblaze/kernel/timer.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mn10300/kernel/time.c b/arch/mn10300/kernel/time.c index 67c6416a58f8..06b83b17c5f1 100644 --- a/arch/mn10300/kernel/time.c +++ b/arch/mn10300/kernel/time.c @@ -10,6 +10,7 @@ * 2 of the Licence, or (at your option) any later version. */ #include +#include #include #include #include diff --git a/arch/parisc/kernel/setup.c b/arch/parisc/kernel/setup.c index 068ed3607bac..dee6f9d6a153 100644 --- a/arch/parisc/kernel/setup.c +++ b/arch/parisc/kernel/setup.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include diff --git a/arch/parisc/kernel/time.c b/arch/parisc/kernel/time.c index 1e22f981cd81..89421df70160 100644 --- a/arch/parisc/kernel/time.c +++ b/arch/parisc/kernel/time.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index bc84a8d47b9e..37833c5dc274 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/time.c b/arch/s390/kernel/time.c index de66abb479c9..c31da46bc037 100644 --- a/arch/s390/kernel/time.c +++ b/arch/s390/kernel/time.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/ds.c b/arch/sparc/kernel/ds.c index f87a55d77094..b542cc7c8d94 100644 --- a/arch/sparc/kernel/ds.c +++ b/arch/sparc/kernel/ds.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index 526fcb5d8ce9..b30b30ab3ddd 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/tile/kernel/time.c b/arch/tile/kernel/time.c index c9357012b1c8..5bd4e88c7c60 100644 --- a/arch/tile/kernel/time.c +++ b/arch/tile/kernel/time.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/events/amd/ibs.c b/arch/x86/events/amd/ibs.c index 496e60391fac..786fd875de92 100644 --- a/arch/x86/events/amd/ibs.c +++ b/arch/x86/events/amd/ibs.c @@ -12,6 +12,7 @@ #include #include #include +#include #include diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1635c0c8df23..2ecc0e97772b 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4e95b2e0d95f..35a5d5dca2fa 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 2c234a6d94c4..adc0ebd8bed0 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -1,5 +1,6 @@ #include +#include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c64ca5929cb5..c7e2ca966386 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 47416f959a48..0a3bc19de017 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -10,6 +10,7 @@ #include #include #include +#include #include "cpu.h" diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 017ecd3bb553..fe0a615a051b 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index c1ea5b999839..8457b4978668 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/kvmclock.c b/arch/x86/kernel/kvmclock.c index bae6ea6cfb94..d88967659098 100644 --- a/arch/x86/kernel/kvmclock.c +++ b/arch/x86/kernel/kvmclock.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index bfe4d6c96fbd..d17b1a940add 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -20,6 +20,7 @@ #include #include #include +#include #if defined(CONFIG_EDAC) #include diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2724dc82f992..46bcda4cb1c2 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -2,6 +2,7 @@ #include #include +#include #include #include #include diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 137944777859..440b95ee593c 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -8,6 +8,7 @@ */ #include #include +#include #include #include #include diff --git a/drivers/acpi/apei/ghes.c b/drivers/acpi/apei/ghes.c index e53bef6cf53c..b192b42a8351 100644 --- a/drivers/acpi/apei/ghes.c +++ b/drivers/acpi/apei/ghes.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include diff --git a/drivers/clocksource/arm_arch_timer.c b/drivers/clocksource/arm_arch_timer.c index 93aa1364376a..7a8a4117f123 100644 --- a/drivers/clocksource/arm_arch_timer.c +++ b/drivers/clocksource/arm_arch_timer.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/drivers/clocksource/pxa_timer.c b/drivers/clocksource/pxa_timer.c index 9cae38eebec2..1c24de215c14 100644 --- a/drivers/clocksource/pxa_timer.c +++ b/drivers/clocksource/pxa_timer.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/drivers/clocksource/timer-digicolor.c b/drivers/clocksource/timer-digicolor.c index 10318cc99c0e..e9f50d289362 100644 --- a/drivers/clocksource/timer-digicolor.c +++ b/drivers/clocksource/timer-digicolor.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 62810ff3b00f..548b90be7685 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/firmware/tegra/bpmp.c b/drivers/firmware/tegra/bpmp.c index 4ff02d310868..84e4c9a58a0c 100644 --- a/drivers/firmware/tegra/bpmp.c +++ b/drivers/firmware/tegra/bpmp.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/drivers/gpu/drm/i915/i915_gem_request.c b/drivers/gpu/drm/i915/i915_gem_request.c index f31deeb72703..df3fef393dbe 100644 --- a/drivers/gpu/drm/i915/i915_gem_request.c +++ b/drivers/gpu/drm/i915/i915_gem_request.c @@ -24,6 +24,8 @@ #include #include +#include +#include #include "i915_drv.h" diff --git a/drivers/gpu/drm/i915/intel_drv.h b/drivers/gpu/drm/i915/intel_drv.h index b9cde116dab3..344f238b283f 100644 --- a/drivers/gpu/drm/i915/intel_drv.h +++ b/drivers/gpu/drm/i915/intel_drv.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "i915_drv.h" #include diff --git a/drivers/md/bcache/bset.c b/drivers/md/bcache/bset.c index 646fe85261c1..18526d44688d 100644 --- a/drivers/md/bcache/bset.c +++ b/drivers/md/bcache/bset.c @@ -11,6 +11,7 @@ #include "bset.h" #include +#include #include #include diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index a43eedd5804d..384559af310e 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -32,6 +32,7 @@ #include #include #include +#include #include /* diff --git a/drivers/md/bcache/sysfs.c b/drivers/md/bcache/sysfs.c index b3ff57d61dde..f90f13616980 100644 --- a/drivers/md/bcache/sysfs.c +++ b/drivers/md/bcache/sysfs.c @@ -13,6 +13,7 @@ #include #include +#include static const char * const cache_replacement_policies[] = { "lru", diff --git a/drivers/md/bcache/util.c b/drivers/md/bcache/util.c index dde6172f3f10..8c3a938f4bf0 100644 --- a/drivers/md/bcache/util.c +++ b/drivers/md/bcache/util.c @@ -12,6 +12,7 @@ #include #include #include +#include #include "util.h" diff --git a/drivers/md/bcache/util.h b/drivers/md/bcache/util.h index cf2cbc211d83..a126919ed102 100644 --- a/drivers/md/bcache/util.h +++ b/drivers/md/bcache/util.h @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index 69e1ae59cab8..6ac2e48b9235 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -13,6 +13,7 @@ #include #include +#include #include /* Rate limiting */ diff --git a/drivers/misc/cxl/native.c b/drivers/misc/cxl/native.c index 09505f432eda..7ae710585267 100644 --- a/drivers/misc/cxl/native.c +++ b/drivers/misc/cxl/native.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/irda/pxaficp_ir.c b/drivers/net/irda/pxaficp_ir.c index 6e8f616be48e..1dba16bc7f8d 100644 --- a/drivers/net/irda/pxaficp_ir.c +++ b/drivers/net/irda/pxaficp_ir.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/drivers/perf/arm_pmu.c b/drivers/perf/arm_pmu.c index 6d9335865880..9612b84bc3e0 100644 --- a/drivers/perf/arm_pmu.c +++ b/drivers/perf/arm_pmu.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 2fe35354f20e..5c98ad4d2f4c 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aecca0e7d9ca..796016e63c1d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -2,6 +2,7 @@ #define _LINUX_BLKDEV_H #include +#include #ifdef CONFIG_BLOCK diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h new file mode 100644 index 000000000000..7cb7d79b2cf9 --- /dev/null +++ b/include/linux/sched/clock.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_CLOCK_H +#define _LINUX_SCHED_CLOCK_H + +#include + +#endif /* _LINUX_SCHED_CLOCK_H */ diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 69ccd2636911..c776abd86937 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index b8d637225a07..b96832df239e 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -25,6 +25,7 @@ #define _LINUX_NET_BUSY_POLL_H #include +#include #include #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/kernel/events/core.c b/kernel/events/core.c index 1031bdf9f012..42fe16a84f97 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -46,6 +46,7 @@ #include #include #include +#include #include "internal.h" diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index 9812e5dd409e..cdafaff926ca 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -28,6 +28,7 @@ #define DISABLE_BRANCH_PROFILING #include #include +#include #include #include #include diff --git a/kernel/locking/qspinlock_stat.h b/kernel/locking/qspinlock_stat.h index e852be4851fc..4a30ef63c607 100644 --- a/kernel/locking/qspinlock_stat.h +++ b/kernel/locking/qspinlock_stat.h @@ -63,6 +63,7 @@ enum qlock_stats { */ #include #include +#include #include static const char * const qstat_names[qstat_num + 1] = { diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 34da86e73d00..47050eedc206 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index ad64efe41722..dd7817cdbf58 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1bd317db9810..1a7fd3d21e5a 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6,6 +6,7 @@ * Copyright (C) 1991-2002 Linus Torvalds */ #include +#include #include #include #include diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 683570739f46..4d386461f13a 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c index a26036d37a38..ea6b610c4c57 100644 --- a/kernel/time/sched_clock.c +++ b/kernel/time/sched_clock.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 2c115fdab397..4fee1c3abd0b 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/torture.c b/kernel/torture.c index 01a99976f072..55de96529287 100644 --- a/kernel/torture.c +++ b/kernel/torture.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/trace/ring_buffer.c b/kernel/trace/ring_buffer.c index a85739efcc30..96fc3c043ad6 100644 --- a/kernel/trace/ring_buffer.c +++ b/kernel/trace/ring_buffer.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/trace/trace_clock.c b/kernel/trace/trace_clock.c index 0f06532a755b..5fdc779f411d 100644 --- a/kernel/trace/trace_clock.c +++ b/kernel/trace/trace_clock.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/kernel/trace/trace_hwlat.c b/kernel/trace/trace_hwlat.c index edfacd954e1b..21ea6ae77d93 100644 --- a/kernel/trace/trace_hwlat.c +++ b/kernel/trace/trace_hwlat.c @@ -44,6 +44,7 @@ #include #include #include +#include #include "trace.h" static struct trace_array *hwlat_trace; diff --git a/kernel/trace/trace_output.c b/kernel/trace/trace_output.c index 070866c32eb9..107afca97649 100644 --- a/kernel/trace/trace_output.c +++ b/kernel/trace/trace_output.c @@ -8,6 +8,7 @@ #include #include #include +#include #include "trace_output.h" diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 63177be0159e..144d7b1b0364 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/lib/plist.c b/lib/plist.c index 3a30c53db061..199408f91057 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -175,6 +175,7 @@ void plist_requeue(struct plist_node *node, struct plist_head *head) #ifdef CONFIG_DEBUG_PI_LIST #include +#include #include #include diff --git a/net/ipv4/tcp_cdg.c b/net/ipv4/tcp_cdg.c index 35b280361cb2..50a0f3e51d5b 100644 --- a/net/ipv4/tcp_cdg.c +++ b/net/ipv4/tcp_cdg.c @@ -27,6 +27,8 @@ #include #include #include +#include + #include #define HYSTART_ACK_TRAIN 1 -- cgit v1.2.3 From 3f07c0144132e4f59d88055ac8ff3e691a5fa2b8 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:30 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/kernel/osf_sys.c | 2 +- arch/alpha/kernel/signal.c | 2 +- arch/alpha/kernel/traps.c | 2 +- arch/alpha/mm/fault.c | 2 +- arch/arc/kernel/traps.c | 2 +- arch/arc/mm/fault.c | 2 +- arch/arm/kernel/ptrace.c | 2 +- arch/arm/kernel/traps.c | 2 +- arch/arm/mm/alignment.c | 2 +- arch/arm/mm/fault.c | 2 +- arch/arm/mm/init.c | 1 + arch/arm/mm/mmap.c | 2 +- arch/arm/vfp/vfpmodule.c | 2 +- arch/arm64/kernel/fpsimd.c | 2 +- arch/arm64/kernel/ptrace.c | 2 +- arch/arm64/kernel/traps.c | 2 +- arch/arm64/mm/fault.c | 2 +- arch/arm64/mm/mmap.c | 2 +- arch/avr32/kernel/traps.c | 2 +- arch/blackfin/kernel/trace.c | 2 +- arch/blackfin/kernel/traps.c | 1 + arch/cris/mm/fault.c | 1 + arch/frv/kernel/traps.c | 2 +- arch/h8300/kernel/ptrace_s.c | 2 +- arch/hexagon/kernel/traps.c | 2 +- arch/hexagon/mm/vm_fault.c | 1 + arch/ia64/kernel/asm-offsets.c | 2 +- arch/ia64/kernel/brl_emu.c | 2 +- arch/ia64/kernel/mca.c | 2 +- arch/ia64/kernel/traps.c | 2 +- arch/ia64/kernel/unaligned.c | 2 +- arch/ia64/mm/fault.c | 2 +- arch/ia64/mm/init.c | 1 + arch/mips/kernel/branch.c | 2 +- arch/mips/kernel/signal_o32.c | 1 + arch/mips/mm/mmap.c | 2 +- arch/mips/sgi-ip22/ip22-berr.c | 2 +- arch/mips/sgi-ip22/ip22-reset.c | 2 +- arch/mn10300/kernel/fpu.c | 2 ++ arch/openrisc/mm/fault.c | 2 +- arch/parisc/kernel/sys_parisc.c | 1 + arch/parisc/kernel/unaligned.c | 2 +- arch/parisc/math-emu/driver.c | 3 ++- arch/powerpc/kvm/book3s_64_vio.c | 1 + arch/powerpc/mm/mmap.c | 2 +- arch/powerpc/mm/mmu_context_iommu.c | 2 +- arch/powerpc/platforms/cell/spufs/fault.c | 2 +- arch/powerpc/xmon/xmon.c | 2 +- arch/s390/kernel/nmi.c | 3 +++ arch/s390/mm/mmap.c | 1 + arch/score/kernel/traps.c | 2 +- arch/sh/kernel/cpu/sh2a/fpu.c | 2 +- arch/sh/kernel/hw_breakpoint.c | 1 + arch/sh/kernel/traps.c | 2 ++ arch/sh/math-emu/math.c | 2 +- arch/sh/mm/asids-debugfs.c | 2 ++ arch/sh/mm/fault.c | 1 + arch/sparc/kernel/sys_sparc_32.c | 2 +- arch/sparc/kernel/sys_sparc_64.c | 2 +- arch/sparc/kernel/unaligned_32.c | 2 +- arch/tile/mm/mmap.c | 2 +- arch/um/drivers/line.c | 3 ++- arch/um/kernel/reboot.c | 2 +- arch/um/kernel/skas/mmu.c | 3 ++- arch/um/kernel/tlb.c | 3 ++- arch/um/kernel/trap.c | 2 +- arch/unicore32/kernel/fpu-ucf64.c | 2 +- arch/unicore32/kernel/traps.c | 1 + arch/unicore32/mm/fault.c | 2 +- arch/x86/entry/vsyscall/vsyscall_64.c | 1 + arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +- arch/x86/kvm/mmu.c | 1 + arch/x86/mm/mmap.c | 2 +- arch/xtensa/kernel/traps.c | 2 +- drivers/android/binder.c | 2 +- drivers/block/drbd/drbd_int.h | 2 +- drivers/char/snsc_event.c | 2 +- drivers/gpu/drm/amd/amdkfd/kfd_events.c | 2 +- drivers/gpu/drm/drm_lock.c | 2 ++ drivers/gpu/drm/ttm/ttm_lock.c | 2 +- drivers/infiniband/core/umem.c | 2 +- drivers/infiniband/hw/hfi1/user_pages.c | 2 +- drivers/infiniband/hw/qib/qib_user_pages.c | 1 + drivers/infiniband/hw/usnic/usnic_uiom.c | 2 +- drivers/isdn/i4l/isdn_tty.c | 1 + drivers/isdn/mISDN/l1oip_core.c | 2 ++ drivers/md/md.c | 1 + drivers/md/raid1.c | 3 +++ drivers/md/raid5.c | 2 ++ drivers/misc/genwqe/card_dev.c | 2 +- drivers/misc/mic/cosm/cosm_scif_server.c | 2 ++ drivers/misc/mic/cosm_client/cosm_scif_client.c | 2 ++ drivers/misc/mic/scif/scif_rma.c | 1 + drivers/net/slip/slip.c | 2 +- drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 2 +- drivers/parisc/power.c | 2 +- drivers/ps3/ps3-sys-manager.c | 1 + drivers/s390/char/fs3270.c | 1 + drivers/s390/char/keyboard.c | 2 +- drivers/scsi/bnx2fc/bnx2fc.h | 2 +- drivers/scsi/bnx2i/bnx2i.h | 2 +- drivers/scsi/libiscsi.c | 1 + drivers/staging/android/lowmemorykiller.c | 2 +- drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c | 2 +- drivers/staging/rtl8188eu/include/osdep_service.h | 2 +- drivers/staging/rtl8712/osdep_service.h | 2 +- drivers/staging/rtl8712/rtl8712_cmd.c | 1 + drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c | 1 + drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h | 2 +- drivers/target/iscsi/iscsi_target.c | 1 + drivers/target/iscsi/iscsi_target_erl0.c | 2 ++ drivers/target/iscsi/iscsi_target_login.c | 1 + drivers/target/iscsi/iscsi_target_nego.c | 1 + drivers/tty/pty.c | 2 +- drivers/tty/sysrq.c | 2 +- drivers/tty/tty_io.c | 2 +- drivers/tty/vt/keyboard.c | 2 +- drivers/tty/vt/vt.c | 2 +- drivers/tty/vt/vt_ioctl.c | 2 +- drivers/usb/atm/usbatm.c | 2 +- drivers/usb/core/devio.c | 1 + drivers/usb/gadget/function/f_mass_storage.c | 1 + drivers/vfio/vfio_iommu_spapr_tce.c | 1 + drivers/vfio/vfio_iommu_type1.c | 2 +- drivers/w1/w1_family.c | 2 +- drivers/w1/w1_int.c | 1 + fs/attr.c | 1 + fs/autofs4/waitq.c | 1 + fs/cifs/connect.c | 1 + fs/coda/upcall.c | 2 +- fs/coredump.c | 2 +- fs/exec.c | 1 + fs/file.c | 2 +- fs/fs_struct.c | 2 +- fs/jffs2/background.c | 2 +- fs/lockd/svc.c | 2 +- fs/ncpfs/inode.c | 1 + fs/ncpfs/sock.c | 1 + fs/nfs/callback.c | 1 + fs/nfsd/nfssvc.c | 2 +- fs/proc/fd.c | 2 +- fs/select.c | 4 ++-- include/linux/oom.h | 2 +- include/linux/ptrace.h | 1 + include/linux/sched/signal.h | 6 ++++++ include/linux/signalfd.h | 2 +- include/linux/taskstats_kern.h | 2 +- ipc/mqueue.c | 1 + kernel/bpf/syscall.c | 1 + kernel/cpu.c | 2 +- kernel/debug/gdbstub.c | 1 + kernel/debug/kdb/kdb_bt.c | 2 +- kernel/hung_task.c | 2 ++ kernel/rcu/update.c | 2 +- kernel/sched/sched.h | 1 + kernel/time/itimer.c | 1 + kernel/time/posix-cpu-timers.c | 2 +- kernel/time/tick-sched.c | 2 +- kernel/tracepoint.c | 2 +- kernel/tsacct.c | 2 +- kernel/user_namespace.c | 1 + lib/is_single_threaded.c | 3 +-- mm/filemap.c | 1 + mm/kmemleak.c | 2 +- mm/memory-failure.c | 2 +- mm/vmacache.c | 2 +- net/9p/client.c | 2 +- net/atm/common.c | 2 +- net/ax25/af_ax25.c | 2 +- net/caif/caif_socket.c | 2 +- net/core/stream.c | 1 + net/decnet/af_decnet.c | 2 +- net/irda/af_irda.c | 1 + net/netrom/af_netrom.c | 2 +- net/rose/af_rose.c | 2 +- net/sctp/socket.c | 1 + net/sunrpc/svc.c | 2 +- net/unix/af_unix.c | 2 +- net/x25/af_x25.c | 2 +- security/selinux/hooks.c | 2 +- 180 files changed, 204 insertions(+), 121 deletions(-) create mode 100644 include/linux/sched/signal.h (limited to 'arch/x86') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 9d27a7d333dc..568ca29f2ad9 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index 17308f925306..b8221f112eee 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -6,7 +6,7 @@ * 1997-11-02 Modified for POSIX.1b signals by Richard Henderson */ -#include +#include #include #include #include diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index af2994206b4b..6448ab00043d 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/alpha/mm/fault.c b/arch/alpha/mm/fault.c index 47948b4dd157..c25e8827e7cd 100644 --- a/arch/alpha/mm/fault.c +++ b/arch/alpha/mm/fault.c @@ -4,7 +4,7 @@ * Copyright (C) 1995 Linus Torvalds */ -#include +#include #include #include #include diff --git a/arch/arc/kernel/traps.c b/arch/arc/kernel/traps.c index c927aa84e652..ff83e78d0cfb 100644 --- a/arch/arc/kernel/traps.c +++ b/arch/arc/kernel/traps.c @@ -13,7 +13,7 @@ * Rahul Trivedi: Codito Technologies 2004 */ -#include +#include #include #include #include diff --git a/arch/arc/mm/fault.c b/arch/arc/mm/fault.c index e94e5aa33985..162c97528872 100644 --- a/arch/arc/mm/fault.c +++ b/arch/arc/mm/fault.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index ae738a6319f6..46f7bab81c40 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -10,7 +10,7 @@ * published by the Free Software Foundation. */ #include -#include +#include #include #include #include diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index 9688ec0c6ef4..dc5f1a22b3c9 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index 7d5f4c736a16..d7fe2de9cc9e 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index c2b5b9892fd1..520c7778d330 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index bf4d3bc41a7a..2a9040dcf47e 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index 66353caa35b9..d448f9cd7715 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -5,7 +5,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm/vfp/vfpmodule.c b/arch/arm/vfp/vfpmodule.c index 569d5a650a4a..a71a48e71fff 100644 --- a/arch/arm/vfp/vfpmodule.c +++ b/arch/arm/vfp/vfpmodule.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index b883f1f75216..06da8ea16bbe 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index a22161ccf447..64fc32ea3422 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 7d47c2cdfd93..5b8779a849a2 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 81283851c9af..30dd60ab8a65 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 01c171723bb3..1e0a2650c88b 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/avr32/kernel/traps.c b/arch/avr32/kernel/traps.c index eb4a3fcfbaff..50b541325025 100644 --- a/arch/avr32/kernel/traps.c +++ b/arch/avr32/kernel/traps.c @@ -14,7 +14,7 @@ #include #include /* print_modules */ #include -#include +#include #include #include diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c index 719dd796c12c..01e546ad2f7a 100644 --- a/arch/blackfin/kernel/trace.c +++ b/arch/blackfin/kernel/trace.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c index 1ed85ddadc0d..32676d7721b1 100644 --- a/arch/blackfin/kernel/traps.c +++ b/arch/blackfin/kernel/traps.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/cris/mm/fault.c b/arch/cris/mm/fault.c index 94183d3639ef..1fca464f1b9e 100644 --- a/arch/cris/mm/fault.c +++ b/arch/cris/mm/fault.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index 31221fb4348e..43c134d6081c 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -9,7 +9,7 @@ * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include #include diff --git a/arch/h8300/kernel/ptrace_s.c b/arch/h8300/kernel/ptrace_s.c index ef5a9c13e76d..c0af930052c0 100644 --- a/arch/h8300/kernel/ptrace_s.c +++ b/arch/h8300/kernel/ptrace_s.c @@ -10,7 +10,7 @@ */ #include -#include +#include #include #include diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 110dab152f82..4496bcf605ef 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -19,7 +19,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/hexagon/mm/vm_fault.c b/arch/hexagon/mm/vm_fault.c index 489875fd2be4..3eec33c5cfd7 100644 --- a/arch/hexagon/mm/vm_fault.c +++ b/arch/hexagon/mm/vm_fault.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/asm-offsets.c b/arch/ia64/kernel/asm-offsets.c index 60ef83e6db71..8786c8b4f187 100644 --- a/arch/ia64/kernel/asm-offsets.c +++ b/arch/ia64/kernel/asm-offsets.c @@ -6,7 +6,7 @@ #define ASM_OFFSETS_C 1 -#include +#include #include #include #include diff --git a/arch/ia64/kernel/brl_emu.c b/arch/ia64/kernel/brl_emu.c index 8682df6263d6..987b11be0021 100644 --- a/arch/ia64/kernel/brl_emu.c +++ b/arch/ia64/kernel/brl_emu.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 9509cc73b9c6..5ac51069e453 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -72,7 +72,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 8981ce98afb3..48ba46b025e1 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include /* For unblank_screen() */ #include diff --git a/arch/ia64/kernel/unaligned.c b/arch/ia64/kernel/unaligned.c index 99348d7f2255..a13680ca1e61 100644 --- a/arch/ia64/kernel/unaligned.c +++ b/arch/ia64/kernel/unaligned.c @@ -15,7 +15,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/arch/ia64/mm/fault.c b/arch/ia64/mm/fault.c index 7f2feb21753c..15f09cfff335 100644 --- a/arch/ia64/mm/fault.c +++ b/arch/ia64/mm/fault.c @@ -4,7 +4,7 @@ * Copyright (C) 1998-2002 Hewlett-Packard Co * David Mosberger-Tang */ -#include +#include #include #include #include diff --git a/arch/ia64/mm/init.c b/arch/ia64/mm/init.c index 06cdaef54b2e..8f3efa682ee8 100644 --- a/arch/ia64/mm/init.c +++ b/arch/ia64/mm/init.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/kernel/branch.c b/arch/mips/kernel/branch.c index ae037a304ee4..b11facd11c9d 100644 --- a/arch/mips/kernel/branch.c +++ b/arch/mips/kernel/branch.c @@ -7,7 +7,7 @@ * Copyright (C) 2001 MIPS Technologies, Inc. */ #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/signal_o32.c b/arch/mips/kernel/signal_o32.c index 5e169fc5ca5c..2b3572fb5f1b 100644 --- a/arch/mips/kernel/signal_o32.c +++ b/arch/mips/kernel/signal_o32.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index d6d92c02308d..374d71e61ef6 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); diff --git a/arch/mips/sgi-ip22/ip22-berr.c b/arch/mips/sgi-ip22/ip22-berr.c index 3f6ccd53c15d..ff8e1935c873 100644 --- a/arch/mips/sgi-ip22/ip22-berr.c +++ b/arch/mips/sgi-ip22/ip22-berr.c @@ -6,7 +6,7 @@ #include #include -#include +#include #include #include diff --git a/arch/mips/sgi-ip22/ip22-reset.c b/arch/mips/sgi-ip22/ip22-reset.c index a36f6b87548a..03a39ac5ead9 100644 --- a/arch/mips/sgi-ip22/ip22-reset.c +++ b/arch/mips/sgi-ip22/ip22-reset.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mn10300/kernel/fpu.c b/arch/mn10300/kernel/fpu.c index 2578b7ae7dd5..50ce7b447fed 100644 --- a/arch/mn10300/kernel/fpu.c +++ b/arch/mn10300/kernel/fpu.c @@ -9,6 +9,8 @@ * 2 of the Licence, or (at your option) any later version. */ #include +#include + #include #include #include diff --git a/arch/openrisc/mm/fault.c b/arch/openrisc/mm/fault.c index 53592a639744..e310ab499385 100644 --- a/arch/openrisc/mm/fault.c +++ b/arch/openrisc/mm/fault.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index bf3294171230..ce07cd3f2507 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index 0a21067ac0a3..a08ab481e556 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/parisc/math-emu/driver.c b/arch/parisc/math-emu/driver.c index 09ef4136c693..2fb59d2e2b29 100644 --- a/arch/parisc/math-emu/driver.c +++ b/arch/parisc/math-emu/driver.c @@ -27,7 +27,8 @@ * Copyright (C) 2001 Hewlett-Packard */ -#include +#include + #include "float.h" #include "math-emu.h" diff --git a/arch/powerpc/kvm/book3s_64_vio.c b/arch/powerpc/kvm/book3s_64_vio.c index ab9d14c0e460..3e26cd4979f9 100644 --- a/arch/powerpc/kvm/book3s_64_vio.c +++ b/arch/powerpc/kvm/book3s_64_vio.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 2f1e44362198..8013861aeaa7 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 7de7124ac91b..497130c5c742 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -10,7 +10,7 @@ * */ -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/fault.c b/arch/powerpc/platforms/cell/spufs/fault.c index e29e4d5afa2d..870c0a82d560 100644 --- a/arch/powerpc/platforms/cell/spufs/fault.c +++ b/arch/powerpc/platforms/cell/spufs/fault.c @@ -19,7 +19,7 @@ * along with this program; if not, write to the Free Software * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#include +#include #include #include diff --git a/arch/powerpc/xmon/xmon.c b/arch/powerpc/xmon/xmon.c index 26fa03fc9f3c..16321ad9e70c 100644 --- a/arch/powerpc/xmon/xmon.c +++ b/arch/powerpc/xmon/xmon.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/s390/kernel/nmi.c b/arch/s390/kernel/nmi.c index 80c093e0c6f1..9bf8327154ee 100644 --- a/arch/s390/kernel/nmi.c +++ b/arch/s390/kernel/nmi.c @@ -13,6 +13,9 @@ #include #include #include +#include +#include + #include #include #include diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 7ae1282d5be9..5ea09403bb87 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index 569ac02f68df..fa624f30f783 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -24,7 +24,7 @@ */ #include -#include +#include #include #include diff --git a/arch/sh/kernel/cpu/sh2a/fpu.c b/arch/sh/kernel/cpu/sh2a/fpu.c index 98bbaa447c93..352f894bece1 100644 --- a/arch/sh/kernel/cpu/sh2a/fpu.c +++ b/arch/sh/kernel/cpu/sh2a/fpu.c @@ -9,7 +9,7 @@ * * FIXME! These routines can be optimized in big endian case. */ -#include +#include #include #include #include diff --git a/arch/sh/kernel/hw_breakpoint.c b/arch/sh/kernel/hw_breakpoint.c index 2197fc584186..afe965712a69 100644 --- a/arch/sh/kernel/hw_breakpoint.c +++ b/arch/sh/kernel/hw_breakpoint.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 9513fa7840aa..3036dee854d1 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -8,6 +8,8 @@ #include #include #include +#include + #include #include /* print_modules */ #include diff --git a/arch/sh/math-emu/math.c b/arch/sh/math-emu/math.c index 5078cb809750..c86f4360c6ce 100644 --- a/arch/sh/math-emu/math.c +++ b/arch/sh/math-emu/math.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c index bf95fdaedd0c..110bd35165bf 100644 --- a/arch/sh/mm/asids-debugfs.c +++ b/arch/sh/mm/asids-debugfs.c @@ -20,6 +20,8 @@ #include #include #include +#include + #include #include diff --git a/arch/sh/mm/fault.c b/arch/sh/mm/fault.c index 9bf876780cef..6fd1bf7481c7 100644 --- a/arch/sh/mm/fault.c +++ b/arch/sh/mm/fault.c @@ -13,6 +13,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index fb7b185ee941..ae49639a484e 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 884c70331345..54d3999d8119 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -7,7 +7,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/unaligned_32.c b/arch/sparc/kernel/unaligned_32.c index d20d4e3fd129..8367dce5f41b 100644 --- a/arch/sparc/kernel/unaligned_32.c +++ b/arch/sparc/kernel/unaligned_32.c @@ -8,7 +8,7 @@ #include -#include +#include #include #include #include diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c index ef61c597898b..377e312dc27e 100644 --- a/arch/tile/mm/mmap.c +++ b/arch/tile/mm/mmap.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/um/drivers/line.c b/arch/um/drivers/line.c index 62087028a9ce..366e57f5e8d6 100644 --- a/arch/um/drivers/line.c +++ b/arch/um/drivers/line.c @@ -5,8 +5,9 @@ #include #include -#include +#include #include + #include "chan.h" #include #include diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index b60a9f8cda75..79218106a033 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -3,7 +3,7 @@ * Licensed under the GPL */ -#include +#include #include #include #include diff --git a/arch/um/kernel/skas/mmu.c b/arch/um/kernel/skas/mmu.c index 3943e9d7d13d..7a1f2a936fd1 100644 --- a/arch/um/kernel/skas/mmu.c +++ b/arch/um/kernel/skas/mmu.c @@ -5,8 +5,9 @@ */ #include -#include +#include #include + #include #include #include diff --git a/arch/um/kernel/tlb.c b/arch/um/kernel/tlb.c index 3777b82759bd..37508b190106 100644 --- a/arch/um/kernel/tlb.c +++ b/arch/um/kernel/tlb.c @@ -5,7 +5,8 @@ #include #include -#include +#include + #include #include #include diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index ad8f206ab5e8..9711ae4aaa6a 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -4,7 +4,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/unicore32/kernel/fpu-ucf64.c b/arch/unicore32/kernel/fpu-ucf64.c index a53343a90ca2..12c8c9527b8e 100644 --- a/arch/unicore32/kernel/fpu-ucf64.c +++ b/arch/unicore32/kernel/fpu-ucf64.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index c54e32410ead..7f5e06f9a202 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -14,6 +14,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/unicore32/mm/fault.c b/arch/unicore32/mm/fault.c index b656d216a8a8..bbefcc46a45e 100644 --- a/arch/unicore32/mm/fault.c +++ b/arch/unicore32/mm/fault.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index 636c4b341f36..df91fb393a01 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -27,6 +27,7 @@ #include #include +#include #include #include diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index 8af04afdfcb9..d48af18e7baf 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kvm/mmu.c b/arch/x86/kvm/mmu.c index 1cda35277278..ac7810513d0e 100644 --- a/arch/x86/kvm/mmu.c +++ b/arch/x86/kvm/mmu.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index d2dc0438d654..5eabf34008f1 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include struct va_alignment __read_mostly va_align = { diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 282bf721a4d6..84abd66e680d 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -24,7 +24,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/android/binder.c b/drivers/android/binder.c index e33e7fbe870b..aae4d8d4be36 100644 --- a/drivers/android/binder.c +++ b/drivers/android/binder.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index 4cb8f21ff4ef..724d1c50fc52 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -30,7 +30,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/snsc_event.c b/drivers/char/snsc_event.c index 59bcefd6ec7c..e452673dff66 100644 --- a/drivers/char/snsc_event.c +++ b/drivers/char/snsc_event.c @@ -16,7 +16,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_events.c b/drivers/gpu/drm/amd/amdkfd/kfd_events.c index 6a3470f84998..d1ce83d73a87 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_events.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_events.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/gpu/drm/drm_lock.c b/drivers/gpu/drm/drm_lock.c index 32d43f86a8f2..96bb6badb818 100644 --- a/drivers/gpu/drm/drm_lock.c +++ b/drivers/gpu/drm/drm_lock.c @@ -34,6 +34,8 @@ */ #include +#include + #include #include "drm_legacy.h" #include "drm_internal.h" diff --git a/drivers/gpu/drm/ttm/ttm_lock.c b/drivers/gpu/drm/ttm/ttm_lock.c index f154fb1929bd..913f4318cdc0 100644 --- a/drivers/gpu/drm/ttm/ttm_lock.c +++ b/drivers/gpu/drm/ttm/ttm_lock.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #define TTM_WRITE_LOCK_PENDING (1 << 0) diff --git a/drivers/infiniband/core/umem.c b/drivers/infiniband/core/umem.c index d525b1a2986a..27f155d2df8d 100644 --- a/drivers/infiniband/core/umem.c +++ b/drivers/infiniband/core/umem.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/infiniband/hw/hfi1/user_pages.c b/drivers/infiniband/hw/hfi1/user_pages.c index 20f4ddcac3b0..68295a12b771 100644 --- a/drivers/infiniband/hw/hfi1/user_pages.c +++ b/drivers/infiniband/hw/hfi1/user_pages.c @@ -46,7 +46,7 @@ */ #include -#include +#include #include #include diff --git a/drivers/infiniband/hw/qib/qib_user_pages.c b/drivers/infiniband/hw/qib/qib_user_pages.c index 75f08624ac05..ce83ba9a12ef 100644 --- a/drivers/infiniband/hw/qib/qib_user_pages.c +++ b/drivers/infiniband/hw/qib/qib_user_pages.c @@ -32,6 +32,7 @@ */ #include +#include #include #include "qib.h" diff --git a/drivers/infiniband/hw/usnic/usnic_uiom.c b/drivers/infiniband/hw/usnic/usnic_uiom.c index ba868be5af2e..c49db7c33979 100644 --- a/drivers/infiniband/hw/usnic/usnic_uiom.c +++ b/drivers/infiniband/hw/usnic/usnic_uiom.c @@ -34,7 +34,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/isdn/i4l/isdn_tty.c b/drivers/isdn/i4l/isdn_tty.c index 63eaa0a9f8a1..1b169559a240 100644 --- a/drivers/isdn/i4l/isdn_tty.c +++ b/drivers/isdn/i4l/isdn_tty.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "isdn_common.h" #include "isdn_tty.h" #ifdef CONFIG_ISDN_AUDIO diff --git a/drivers/isdn/mISDN/l1oip_core.c b/drivers/isdn/mISDN/l1oip_core.c index 67c21876c35f..6ceca7db62ad 100644 --- a/drivers/isdn/mISDN/l1oip_core.c +++ b/drivers/isdn/mISDN/l1oip_core.c @@ -234,6 +234,8 @@ #include #include #include +#include + #include #include "core.h" #include "l1oip.h" diff --git a/drivers/md/md.c b/drivers/md/md.c index 985374f20e2e..548d1b8014f8 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -44,6 +44,7 @@ */ +#include #include #include #include diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 7453d94eeed7..fbc2d7851b49 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -37,7 +37,10 @@ #include #include #include +#include + #include + #include "md.h" #include "raid1.h" #include "bitmap.h" diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 2ce23b01dbb2..4fb09b3fcb41 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -55,6 +55,8 @@ #include #include #include +#include + #include #include "md.h" diff --git a/drivers/misc/genwqe/card_dev.c b/drivers/misc/genwqe/card_dev.c index cb290b8ca0c8..dd4617764f14 100644 --- a/drivers/misc/genwqe/card_dev.c +++ b/drivers/misc/genwqe/card_dev.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/misc/mic/cosm/cosm_scif_server.c b/drivers/misc/mic/cosm/cosm_scif_server.c index 5696df4326b5..85f7d09cc65f 100644 --- a/drivers/misc/mic/cosm/cosm_scif_server.c +++ b/drivers/misc/mic/cosm/cosm_scif_server.c @@ -19,6 +19,8 @@ * */ #include +#include + #include "cosm_main.h" /* diff --git a/drivers/misc/mic/cosm_client/cosm_scif_client.c b/drivers/misc/mic/cosm_client/cosm_scif_client.c index 03e98bf1ac15..aa530fcceaa9 100644 --- a/drivers/misc/mic/cosm_client/cosm_scif_client.c +++ b/drivers/misc/mic/cosm_client/cosm_scif_client.c @@ -22,6 +22,8 @@ #include #include #include +#include + #include "../cosm/cosm_main.h" #define COSM_SCIF_MAX_RETRIES 10 diff --git a/drivers/misc/mic/scif/scif_rma.c b/drivers/misc/mic/scif/scif_rma.c index d0e9c60f944e..329727e00e97 100644 --- a/drivers/misc/mic/scif/scif_rma.c +++ b/drivers/misc/mic/scif/scif_rma.c @@ -18,6 +18,7 @@ #include #include #include +#include #include "scif_main.h" #include "scif_map.h" diff --git a/drivers/net/slip/slip.c b/drivers/net/slip/slip.c index 08db4d687533..1da31dc47f86 100644 --- a/drivers/net/slip/slip.c +++ b/drivers/net/slip/slip.c @@ -66,7 +66,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c index c5744b45ec8f..65689469c5a1 100644 --- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c +++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/parisc/power.c b/drivers/parisc/power.c index ef31b77404ef..e2a3112f1c98 100644 --- a/drivers/parisc/power.c +++ b/drivers/parisc/power.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/ps3/ps3-sys-manager.c b/drivers/ps3/ps3-sys-manager.c index f2ab435954f6..73e496a72113 100644 --- a/drivers/ps3/ps3-sys-manager.c +++ b/drivers/ps3/ps3-sys-manager.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/drivers/s390/char/fs3270.c b/drivers/s390/char/fs3270.c index 85eca1cef063..c4518168fd02 100644 --- a/drivers/s390/char/fs3270.c +++ b/drivers/s390/char/fs3270.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/s390/char/keyboard.c b/drivers/s390/char/keyboard.c index 82c913318b73..ba0e4f93503d 100644 --- a/drivers/s390/char/keyboard.c +++ b/drivers/s390/char/keyboard.c @@ -7,7 +7,7 @@ */ #include -#include +#include #include #include diff --git a/drivers/scsi/bnx2fc/bnx2fc.h b/drivers/scsi/bnx2fc/bnx2fc.h index fdd4eb4e41b2..4fc8ed5fe067 100644 --- a/drivers/scsi/bnx2fc/bnx2fc.h +++ b/drivers/scsi/bnx2fc/bnx2fc.h @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/scsi/bnx2i/bnx2i.h b/drivers/scsi/bnx2i/bnx2i.h index ed7f3228e234..89ef1a1678d1 100644 --- a/drivers/scsi/bnx2i/bnx2i.h +++ b/drivers/scsi/bnx2i/bnx2i.h @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/libiscsi.c b/drivers/scsi/libiscsi.c index 834d1212b6d5..07c08ce68d70 100644 --- a/drivers/scsi/libiscsi.c +++ b/drivers/scsi/libiscsi.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/staging/android/lowmemorykiller.c b/drivers/staging/android/lowmemorykiller.c index ec3b66561412..054660049395 100644 --- a/drivers/staging/android/lowmemorykiller.c +++ b/drivers/staging/android/lowmemorykiller.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c index cf902154f0aa..bcf9f3dd0310 100644 --- a/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c +++ b/drivers/staging/lustre/lnet/libcfs/linux/linux-prim.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include "../../../include/linux/libcfs/libcfs.h" diff --git a/drivers/staging/rtl8188eu/include/osdep_service.h b/drivers/staging/rtl8188eu/include/osdep_service.h index ee3f5ee06529..9e390648d93e 100644 --- a/drivers/staging/rtl8188eu/include/osdep_service.h +++ b/drivers/staging/rtl8188eu/include/osdep_service.h @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/staging/rtl8712/osdep_service.h b/drivers/staging/rtl8712/osdep_service.h index b8a170978434..5d33020554cd 100644 --- a/drivers/staging/rtl8712/osdep_service.h +++ b/drivers/staging/rtl8712/osdep_service.h @@ -33,7 +33,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/staging/rtl8712/rtl8712_cmd.c b/drivers/staging/rtl8712/rtl8712_cmd.c index f19b6b27aa71..5346c657485d 100644 --- a/drivers/staging/rtl8712/rtl8712_cmd.c +++ b/drivers/staging/rtl8712/rtl8712_cmd.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c index cb0b7ca36b1e..8a0d214f6e9b 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_arm.c @@ -34,6 +34,7 @@ #include #include +#include #include #include #include diff --git a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h index 4055d4bf9f74..e63964f5a18a 100644 --- a/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h +++ b/drivers/staging/vc04_services/interface/vchiq_arm/vchiq_util.h @@ -47,7 +47,7 @@ #include #include #include -#include +#include #include #include #include /* for time_t */ diff --git a/drivers/target/iscsi/iscsi_target.c b/drivers/target/iscsi/iscsi_target.c index da2c73a255de..fa1d578d56bd 100644 --- a/drivers/target/iscsi/iscsi_target.c +++ b/drivers/target/iscsi/iscsi_target.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/target/iscsi/iscsi_target_erl0.c b/drivers/target/iscsi/iscsi_target_erl0.c index b54e72c7ab0f..a4d5e6749932 100644 --- a/drivers/target/iscsi/iscsi_target_erl0.c +++ b/drivers/target/iscsi/iscsi_target_erl0.c @@ -17,6 +17,8 @@ * GNU General Public License for more details. ******************************************************************************/ +#include + #include #include #include diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c index eab274d17b5c..b03cc03423c1 100644 --- a/drivers/target/iscsi/iscsi_target_login.c +++ b/drivers/target/iscsi/iscsi_target_login.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include /* TCP_NODELAY */ #include /* ipv6_addr_v4mapped() */ diff --git a/drivers/target/iscsi/iscsi_target_nego.c b/drivers/target/iscsi/iscsi_target_nego.c index 46388c9e08da..29eb09e0cd15 100644 --- a/drivers/target/iscsi/iscsi_target_nego.c +++ b/drivers/target/iscsi/iscsi_target_nego.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/tty/pty.c b/drivers/tty/pty.c index a23fa5ed1d67..66b59a15780d 100644 --- a/drivers/tty/pty.c +++ b/drivers/tty/pty.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 71136742e606..65db0aeb3d80 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -14,7 +14,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include +#include #include #include #include diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index a1fd3f7d487a..985d33f0f315 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -69,7 +69,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 397e1509fe51..6b3a2c00974d 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -26,7 +26,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/tty/vt/vt.c b/drivers/tty/vt/vt.c index 9d3ce505e7ab..5c4933bb4b53 100644 --- a/drivers/tty/vt/vt.c +++ b/drivers/tty/vt/vt.c @@ -72,7 +72,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/tty/vt/vt_ioctl.c b/drivers/tty/vt/vt_ioctl.c index a56edf2d58eb..0cbfe1ff6f6c 100644 --- a/drivers/tty/vt/vt_ioctl.c +++ b/drivers/tty/vt/vt_ioctl.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/usb/atm/usbatm.c b/drivers/usb/atm/usbatm.c index 5a59da0dc98a..3e80aa3b917a 100644 --- a/drivers/usb/atm/usbatm.c +++ b/drivers/usb/atm/usbatm.c @@ -74,7 +74,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/usb/core/devio.c b/drivers/usb/core/devio.c index ca425e8099ea..cfc3cff6e8d5 100644 --- a/drivers/usb/core/devio.c +++ b/drivers/usb/core/devio.c @@ -36,6 +36,7 @@ #include #include +#include #include #include #include diff --git a/drivers/usb/gadget/function/f_mass_storage.c b/drivers/usb/gadget/function/f_mass_storage.c index 8f3659b65f53..4c8aacc232c0 100644 --- a/drivers/usb/gadget/function/f_mass_storage.c +++ b/drivers/usb/gadget/function/f_mass_storage.c @@ -207,6 +207,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 185d50ee1b12..cf3de91fbfe7 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/drivers/vfio/vfio_iommu_type1.c b/drivers/vfio/vfio_iommu_type1.c index 7e94c7fc90ae..c26fa1f3ed86 100644 --- a/drivers/vfio/vfio_iommu_type1.c +++ b/drivers/vfio/vfio_iommu_type1.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/w1/w1_family.c b/drivers/w1/w1_family.c index df1c9bb90eb5..2096f460498f 100644 --- a/drivers/w1/w1_family.c +++ b/drivers/w1/w1_family.c @@ -14,7 +14,7 @@ #include #include -#include /* schedule_timeout() */ +#include #include #include diff --git a/drivers/w1/w1_int.c b/drivers/w1/w1_int.c index 4ce1b66d5092..2cae7b29bb5f 100644 --- a/drivers/w1/w1_int.c +++ b/drivers/w1/w1_int.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/fs/attr.c b/fs/attr.c index c902b3d53508..135304146120 100644 --- a/fs/attr.c +++ b/fs/attr.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/autofs4/waitq.c b/fs/autofs4/waitq.c index 79fbd85db4ba..24a58bf9ca72 100644 --- a/fs/autofs4/waitq.c +++ b/fs/autofs4/waitq.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "autofs_i.h" diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index 777ad9f4fc3c..9bf25be05636 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/coda/upcall.c b/fs/coda/upcall.c index f6c6c8adbc01..e82357c89979 100644 --- a/fs/coda/upcall.c +++ b/fs/coda/upcall.c @@ -15,7 +15,7 @@ */ #include -#include +#include #include #include #include diff --git a/fs/coredump.c b/fs/coredump.c index 23a539b29225..c74ab43b8383 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -34,7 +35,6 @@ #include #include #include -#include #include #include #include diff --git a/fs/exec.c b/fs/exec.c index 9c80d011594e..aa228b98ad5b 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -34,6 +34,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/file.c b/fs/file.c index 69d6990e3021..ad6f094f2eff 100644 --- a/fs/file.c +++ b/fs/file.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 7dca743b2ce1..543ed50f0387 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include #include diff --git a/fs/jffs2/background.c b/fs/jffs2/background.c index e5c1783ab64a..453a6a1fff34 100644 --- a/fs/jffs2/background.c +++ b/fs/jffs2/background.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include "nodelist.h" diff --git a/fs/lockd/svc.c b/fs/lockd/svc.c index 7e4ea3b9f472..e7c8b9c76e48 100644 --- a/fs/lockd/svc.c +++ b/fs/lockd/svc.c @@ -17,7 +17,7 @@ #include #include -#include +#include #include #include #include diff --git a/fs/ncpfs/inode.c b/fs/ncpfs/inode.c index 7eb89c23c847..d5606099712a 100644 --- a/fs/ncpfs/inode.c +++ b/fs/ncpfs/inode.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/fs/ncpfs/sock.c b/fs/ncpfs/sock.c index 97b111d79489..bdea177aa405 100644 --- a/fs/ncpfs/sock.c +++ b/fs/ncpfs/sock.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/nfs/callback.c b/fs/nfs/callback.c index 484bebc20bca..bb79972dc638 100644 --- a/fs/nfs/callback.c +++ b/fs/nfs/callback.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/nfsd/nfssvc.c b/fs/nfsd/nfssvc.c index efd66da99201..786a4a2cb2d7 100644 --- a/fs/nfsd/nfssvc.c +++ b/fs/nfsd/nfssvc.c @@ -6,7 +6,7 @@ * Copyright (C) 1995, 1996, 1997 Olaf Kirch */ -#include +#include #include #include #include diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 00ce1531b2f5..c330495c3115 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/fs/select.c b/fs/select.c index 305c0daf5d67..e2112270d75a 100644 --- a/fs/select.c +++ b/fs/select.c @@ -15,7 +15,8 @@ */ #include -#include +#include +#include #include #include #include @@ -26,7 +27,6 @@ #include #include #include -#include #include #include #include diff --git a/include/linux/oom.h b/include/linux/oom.h index b4e36e92bc87..8a266e2be5a6 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -2,7 +2,7 @@ #define __INCLUDE_LINUX_OOM_H -#include +#include #include #include #include diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index e0e539321ab9..422bc2e4cb6a 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -3,6 +3,7 @@ #include /* For unlikely. */ #include /* For struct task_struct. */ +#include /* For send_sig(), same_thread_group(), etc. */ #include /* for IS_ERR_VALUE */ #include /* For BUG_ON. */ #include /* For task_active_pid_ns. */ diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h new file mode 100644 index 000000000000..da69cd05cd68 --- /dev/null +++ b/include/linux/sched/signal.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_SIGNAL_H +#define _LINUX_SCHED_SIGNAL_H + +#include + +#endif /* _LINUX_SCHED_SIGNAL_H */ diff --git a/include/linux/signalfd.h b/include/linux/signalfd.h index eadbe227c256..4985048640a7 100644 --- a/include/linux/signalfd.h +++ b/include/linux/signalfd.h @@ -8,7 +8,7 @@ #define _LINUX_SIGNALFD_H #include - +#include #ifdef CONFIG_SIGNALFD diff --git a/include/linux/taskstats_kern.h b/include/linux/taskstats_kern.h index 58de6edf751f..e2a5daf8d14f 100644 --- a/include/linux/taskstats_kern.h +++ b/include/linux/taskstats_kern.h @@ -8,7 +8,7 @@ #define _LINUX_TASKSTATS_KERN_H #include -#include +#include #include #ifdef CONFIG_TASKSTATS diff --git a/ipc/mqueue.c b/ipc/mqueue.c index 40e448de8a7e..4f7241fbeff3 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include "util.h" diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 461eb1e66a0f..7af0dcc5d755 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/cpu.c b/kernel/cpu.c index 0a5f630f5c54..0aae3183b029 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/debug/gdbstub.c b/kernel/debug/gdbstub.c index 19d9a578c753..7510dc687c0d 100644 --- a/kernel/debug/gdbstub.c +++ b/kernel/debug/gdbstub.c @@ -29,6 +29,7 @@ */ #include +#include #include #include #include diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index fe15fff5df53..9b976a42376d 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include "kdb_private.h" diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 40c07e4fa116..129247e56902 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -16,6 +16,8 @@ #include #include #include +#include + #include /* diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index a0e90e0afc75..da128deb10ec 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e2307a6c29f1..641249471952 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index a95f13c31464..f6b961c5e58c 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index b4377a5e4269..a2475a9f57d8 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -2,7 +2,7 @@ * Implement CPU time clocks for the POSIX clock interface. */ -#include +#include #include #include #include diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 4fee1c3abd0b..0f411a4690a0 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 1f9a31f934a4..9f6984d52fec 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include extern struct tracepoint * const __start___tracepoints_ptrs[]; diff --git a/kernel/tsacct.c b/kernel/tsacct.c index 571a2d3821d8..d9a03b80b75d 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -17,7 +17,7 @@ */ #include -#include +#include #include #include #include diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 86b7854fec8e..2f735cbe05e8 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c index 391fd23976a2..9745cfffcb69 100644 --- a/lib/is_single_threaded.c +++ b/lib/is_single_threaded.c @@ -9,8 +9,7 @@ * as published by the Free Software Foundation; either version * 2 of the Licence, or (at your option) any later version. */ - -#include +#include /* * Returns true if the task does not share ->mm with another thread/process. diff --git a/mm/filemap.c b/mm/filemap.c index 1944c631e3e6..1694623a6289 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/kmemleak.c b/mm/kmemleak.c index da3436953022..2df6d3687b2a 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -73,7 +73,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/memory-failure.c b/mm/memory-failure.c index 3d0f2fd4bf73..b74984db386e 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -40,7 +40,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/vmacache.c b/mm/vmacache.c index 7c233f8e20ee..4355d34c68a6 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -1,7 +1,7 @@ /* * Copyright (C) 2014 Davidlohr Bueso. */ -#include +#include #include #include diff --git a/net/9p/client.c b/net/9p/client.c index 3fc94a49ccd5..25cfd8a4bc36 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/atm/common.c b/net/atm/common.c index a3ca922d307b..9613381f5db0 100644 --- a/net/atm/common.c +++ b/net/atm/common.c @@ -13,7 +13,7 @@ #include /* error codes */ #include #include -#include +#include #include /* struct timeval */ #include #include diff --git a/net/ax25/af_ax25.c b/net/ax25/af_ax25.c index 90fcf5fc2e0a..a8e42cedf1db 100644 --- a/net/ax25/af_ax25.c +++ b/net/ax25/af_ax25.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/caif/caif_socket.c b/net/caif/caif_socket.c index 92cbbd2afddb..adcad344c843 100644 --- a/net/caif/caif_socket.c +++ b/net/caif/caif_socket.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/core/stream.c b/net/core/stream.c index f575bcf64af2..20231dbb1da0 100644 --- a/net/core/stream.c +++ b/net/core/stream.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include diff --git a/net/decnet/af_decnet.c b/net/decnet/af_decnet.c index a90ed67027b0..e6e79eda9763 100644 --- a/net/decnet/af_decnet.c +++ b/net/decnet/af_decnet.c @@ -106,7 +106,7 @@ Version 0.0.6 2.1.110 07-aug-98 Eduardo Marcelo Serrat #include #include #include -#include +#include #include #include #include diff --git a/net/irda/af_irda.c b/net/irda/af_irda.c index ab254041dab7..81adc29a448d 100644 --- a/net/irda/af_irda.c +++ b/net/irda/af_irda.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c index ed212ffc1d9d..4bbf4526b885 100644 --- a/net/netrom/af_netrom.c +++ b/net/netrom/af_netrom.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c index 9ad301c46b88..b8a1df2c9785 100644 --- a/net/rose/af_rose.c +++ b/net/rose/af_rose.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/sctp/socket.c b/net/sctp/socket.c index 465a9c8464f9..6f0a9be50f50 100644 --- a/net/sctp/socket.c +++ b/net/sctp/socket.c @@ -57,6 +57,7 @@ #include #include #include +#include #include #include #include diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index b94efd93d3e4..a08aeb56b8e4 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index e2d18b9f910f..ee37b390260a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -85,7 +85,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 079c883aa96e..fd28a49dbe8f 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 9a8f12f8d5b7..b12f873f92ba 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From 38b8d208a4544c9a26b10baec89b8a21042e5305 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:31 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to move softlockup APIs out of , which will have to be picked up from other headers and a couple of .c files. already includes . Include the header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/ia64/kernel/time.c | 1 + arch/ia64/kernel/uncached.c | 1 + arch/powerpc/kernel/swsusp_64.c | 1 + arch/x86/kernel/pvclock.c | 2 ++ arch/x86/xen/smp.c | 1 + drivers/ide/ide-taskfile.c | 1 + drivers/mtd/nand/nand_base.c | 1 + drivers/video/fbdev/nvidia/nv_accel.c | 2 ++ init/main.c | 1 + kernel/debug/debug_core.c | 1 + kernel/power/hibernate.c | 1 + kernel/power/snapshot.c | 1 + kernel/sched/clock.c | 1 + kernel/time/tick-sched.c | 1 + kernel/time/timekeeping.c | 1 + 15 files changed, 17 insertions(+) (limited to 'arch/x86') diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index faa116822c4c..144f9db7a876 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/uncached.c b/arch/ia64/kernel/uncached.c index f3976da36721..583f7ff6b589 100644 --- a/arch/ia64/kernel/uncached.c +++ b/arch/ia64/kernel/uncached.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/swsusp_64.c b/arch/powerpc/kernel/swsusp_64.c index 0e899e47c325..51db012808f5 100644 --- a/arch/powerpc/kernel/swsusp_64.c +++ b/arch/powerpc/kernel/swsusp_64.c @@ -10,6 +10,7 @@ #include #include #include +#include void do_after_copyback(void) { diff --git a/arch/x86/kernel/pvclock.c b/arch/x86/kernel/pvclock.c index 9e93fe5803b4..5c3f6d6a5078 100644 --- a/arch/x86/kernel/pvclock.c +++ b/arch/x86/kernel/pvclock.c @@ -21,6 +21,8 @@ #include #include #include +#include + #include #include diff --git a/arch/x86/xen/smp.c b/arch/x86/xen/smp.c index 0dee6f59ea82..7ff2f1bfb7ec 100644 --- a/arch/x86/xen/smp.c +++ b/arch/x86/xen/smp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/drivers/ide/ide-taskfile.c b/drivers/ide/ide-taskfile.c index 247b9faccce1..4c0007cb74e3 100644 --- a/drivers/ide/ide-taskfile.c +++ b/drivers/ide/ide-taskfile.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/drivers/mtd/nand/nand_base.c b/drivers/mtd/nand/nand_base.c index 1492c12906f6..b0524f8accb6 100644 --- a/drivers/mtd/nand/nand_base.c +++ b/drivers/mtd/nand/nand_base.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/video/fbdev/nvidia/nv_accel.c b/drivers/video/fbdev/nvidia/nv_accel.c index ad6472a894ea..7341fed63e35 100644 --- a/drivers/video/fbdev/nvidia/nv_accel.c +++ b/drivers/video/fbdev/nvidia/nv_accel.c @@ -48,6 +48,8 @@ */ #include +#include + #include "nv_type.h" #include "nv_proto.h" #include "nv_dma.h" diff --git a/init/main.c b/init/main.c index ae9f2008fb86..c891cfb8928f 100644 --- a/init/main.c +++ b/init/main.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index a603ef28f70c..65c0f1363788 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -49,6 +49,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/power/hibernate.c b/kernel/power/hibernate.c index 86385af1080f..b8be5c803cdd 100644 --- a/kernel/power/hibernate.c +++ b/kernel/power/hibernate.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/power/snapshot.c b/kernel/power/snapshot.c index 905d5bbd595f..d79a38de425a 100644 --- a/kernel/power/snapshot.c +++ b/kernel/power/snapshot.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index dd7817cdbf58..a08795e21628 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -58,6 +58,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index 0f411a4690a0..5e9e123b4321 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index fb564acee0f3..5b63a2102c29 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 010426079ec1228a7f980d2eef766a84c0f9241a Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:31 +0100 Subject: sched/headers: Prepare for new header dependencies before moving more code to We are going to split more MM APIs out of , which will have to be picked up from a couple of .c files. The APIs that we are going to move are: arch_pick_mmap_layout() arch_get_unmapped_area() arch_get_unmapped_area_topdown() mm_update_next_owner() Include the header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/kernel/osf_sys.c | 1 + arch/arc/mm/mmap.c | 3 ++- arch/arm/mm/mmap.c | 1 + arch/arm64/mm/mmap.c | 1 + arch/frv/mm/elf-fdpic.c | 1 + arch/ia64/kernel/sys_ia64.c | 1 + arch/mips/mm/mmap.c | 1 + arch/parisc/kernel/cache.c | 1 + arch/parisc/kernel/sys_parisc.c | 1 + arch/parisc/mm/hugetlbpage.c | 1 + arch/powerpc/mm/mmap.c | 1 + arch/s390/mm/mmap.c | 1 + arch/sh/mm/mmap.c | 1 + arch/sparc/kernel/sys_sparc_32.c | 1 + arch/sparc/kernel/sys_sparc_64.c | 1 + arch/sparc/mm/hugetlbpage.c | 1 + arch/tile/mm/hugetlbpage.c | 1 + arch/tile/mm/mmap.c | 1 + arch/x86/kernel/sys_x86_64.c | 1 + arch/x86/mm/hugetlbpage.c | 1 + arch/x86/mm/mmap.c | 1 + arch/xtensa/kernel/syscall.c | 1 + 22 files changed, 23 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 568ca29f2ad9..3b9b2a382ba2 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/arch/arc/mm/mmap.c b/arch/arc/mm/mmap.c index 2e06d56e987b..3e25e8d6486b 100644 --- a/arch/arc/mm/mmap.c +++ b/arch/arc/mm/mmap.c @@ -13,7 +13,8 @@ #include #include #include -#include +#include + #include #define COLOUR_ALIGN(addr, pgoff) \ diff --git a/arch/arm/mm/mmap.c b/arch/arm/mm/mmap.c index d448f9cd7715..2239fde10b80 100644 --- a/arch/arm/mm/mmap.c +++ b/arch/arm/mm/mmap.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/mm/mmap.c b/arch/arm64/mm/mmap.c index 1e0a2650c88b..7b0d55756eb1 100644 --- a/arch/arm64/mm/mmap.c +++ b/arch/arm64/mm/mmap.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/frv/mm/elf-fdpic.c b/arch/frv/mm/elf-fdpic.c index 836f14707a62..da82c25301e7 100644 --- a/arch/frv/mm/elf-fdpic.c +++ b/arch/frv/mm/elf-fdpic.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index a09c12230bc5..ce4cc60d519b 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include /* doh, must come after sched.h... */ #include diff --git a/arch/mips/mm/mmap.c b/arch/mips/mm/mmap.c index 374d71e61ef6..64dd8bdd92c3 100644 --- a/arch/mips/mm/mmap.c +++ b/arch/mips/mm/mmap.c @@ -14,6 +14,7 @@ #include #include #include +#include unsigned long shm_align_mask = PAGE_SIZE - 1; /* Sane caches */ EXPORT_SYMBOL(shm_align_mask); diff --git a/arch/parisc/kernel/cache.c b/arch/parisc/kernel/cache.c index 977f0a4f5ecf..7820b864de1a 100644 --- a/arch/parisc/kernel/cache.c +++ b/arch/parisc/kernel/cache.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/parisc/kernel/sys_parisc.c b/arch/parisc/kernel/sys_parisc.c index ce07cd3f2507..e5288638a1d9 100644 --- a/arch/parisc/kernel/sys_parisc.c +++ b/arch/parisc/kernel/sys_parisc.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/parisc/mm/hugetlbpage.c b/arch/parisc/mm/hugetlbpage.c index 5d6eea925cf4..aa50ac090e9b 100644 --- a/arch/parisc/mm/hugetlbpage.c +++ b/arch/parisc/mm/hugetlbpage.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/mmap.c b/arch/powerpc/mm/mmap.c index 8013861aeaa7..a5d9ef59debe 100644 --- a/arch/powerpc/mm/mmap.c +++ b/arch/powerpc/mm/mmap.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/mm/mmap.c b/arch/s390/mm/mmap.c index 5ea09403bb87..50618614881f 100644 --- a/arch/s390/mm/mmap.c +++ b/arch/s390/mm/mmap.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/mm/mmap.c b/arch/sh/mm/mmap.c index 6777177807c2..08e7af0be4a7 100644 --- a/arch/sh/mm/mmap.c +++ b/arch/sh/mm/mmap.c @@ -9,6 +9,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index ae49639a484e..ff3573059936 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index 54d3999d8119..c521ee277083 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/mm/hugetlbpage.c b/arch/sparc/mm/hugetlbpage.c index e98a3f2e8f0f..323bc6b6e3ad 100644 --- a/arch/sparc/mm/hugetlbpage.c +++ b/arch/sparc/mm/hugetlbpage.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/arch/tile/mm/hugetlbpage.c b/arch/tile/mm/hugetlbpage.c index 77ceaa343fce..cb10153b5c9f 100644 --- a/arch/tile/mm/hugetlbpage.c +++ b/arch/tile/mm/hugetlbpage.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/tile/mm/mmap.c b/arch/tile/mm/mmap.c index 377e312dc27e..8ab28167c44b 100644 --- a/arch/tile/mm/mmap.c +++ b/arch/tile/mm/mmap.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/sys_x86_64.c b/arch/x86/kernel/sys_x86_64.c index a55ed63b9f91..50215a4b9347 100644 --- a/arch/x86/kernel/sys_x86_64.c +++ b/arch/x86/kernel/sys_x86_64.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/arch/x86/mm/hugetlbpage.c b/arch/x86/mm/hugetlbpage.c index 2ae8584b44c7..c5066a260803 100644 --- a/arch/x86/mm/hugetlbpage.c +++ b/arch/x86/mm/hugetlbpage.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/mm/mmap.c b/arch/x86/mm/mmap.c index 5eabf34008f1..7940166c799b 100644 --- a/arch/x86/mm/mmap.c +++ b/arch/x86/mm/mmap.c @@ -29,6 +29,7 @@ #include #include #include +#include #include struct va_alignment __read_mostly va_align = { diff --git a/arch/xtensa/kernel/syscall.c b/arch/xtensa/kernel/syscall.c index d3fd100dffc9..06937928cb72 100644 --- a/arch/xtensa/kernel/syscall.c +++ b/arch/xtensa/kernel/syscall.c @@ -25,6 +25,7 @@ #include #include #include +#include #include typedef void (*syscall_t)(void); -- cgit v1.2.3 From 5b825c3af1d8a0af4deb4a5eb349d0d0050c62e5 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 17:54:15 +0100 Subject: sched/headers: Prepare to remove inclusion from Add #include dependencies to all .c files rely on sched.h doing that for them. Note that even if the count where we need to add extra headers seems high, it's still a net win, because is included in over 2,200 files ... Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/kernel/sys_oabi-compat.c | 1 + arch/mips/kernel/mips-mt-fpaff.c | 1 + arch/x86/include/asm/intel_rdt.h | 1 + arch/x86/kernel/cpu/intel_cacheinfo.c | 1 + block/ioprio.c | 1 + drivers/misc/eeprom/eeprom.c | 1 + drivers/misc/lkdtm_heap.c | 1 + drivers/misc/vmw_vmci/vmci_context.c | 1 + drivers/misc/vmw_vmci/vmci_host.c | 1 + drivers/staging/lustre/lustre/include/lustre_compat.h | 1 + drivers/staging/lustre/lustre/ptlrpc/sec.c | 1 + drivers/xen/balloon.c | 1 + fs/9p/v9fs.c | 1 + fs/affs/inode.c | 1 + fs/affs/super.c | 1 + fs/autofs4/dev-ioctl.c | 1 + fs/befs/linuxvfs.c | 1 + fs/binfmt_elf.c | 1 + fs/cachefiles/internal.h | 1 + fs/compat.c | 1 + fs/exportfs/expfs.c | 1 + fs/ext2/balloc.c | 1 + fs/ext4/ialloc.c | 2 ++ fs/file_table.c | 1 + fs/gfs2/inode.c | 1 + fs/gfs2/sys.c | 1 + fs/hfs/inode.c | 1 + fs/hfsplus/inode.c | 1 + fs/isofs/inode.c | 1 + fs/jffs2/fs.c | 1 + fs/libfs.c | 1 + fs/namespace.c | 1 + fs/ncpfs/ioctl.c | 1 + fs/notify/fanotify/fanotify.c | 1 + fs/notify/inotify/inotify_fsnotify.c | 1 + fs/omfs/inode.c | 1 + fs/overlayfs/copy_up.c | 1 + fs/overlayfs/inode.c | 1 + fs/overlayfs/namei.c | 1 + fs/overlayfs/super.c | 1 + fs/overlayfs/util.c | 1 + fs/posix_acl.c | 1 + fs/proc/proc_sysctl.c | 1 + fs/proc/root.c | 1 + fs/quota/dquot.c | 1 + fs/stat.c | 1 + fs/xfs/xfs_ioctl.c | 1 + include/linux/cred.h | 1 + include/linux/sched/signal.h | 1 + include/linux/wait.h | 1 + include/net/scm.h | 1 + include/rdma/ib.h | 1 + ipc/namespace.c | 1 + kernel/pid_namespace.c | 1 + kernel/ucount.c | 1 + kernel/uid16.c | 1 + kernel/utsname.c | 1 + mm/usercopy.c | 1 + net/dns_resolver/dns_query.c | 2 ++ net/netfilter/xt_owner.c | 2 ++ net/sunrpc/auth.c | 1 + security/apparmor/policy.c | 1 + security/keys/internal.h | 1 + security/keys/keyctl.c | 1 + security/keys/persistent.c | 2 ++ 65 files changed, 69 insertions(+) (limited to 'arch/x86') diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 5f221acd21ae..b9786f491873 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -76,6 +76,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index 1a0a3b4ecc3e..ffc6bd3464d9 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/include/asm/intel_rdt.h b/arch/x86/include/asm/intel_rdt.h index 95ce5c85b009..0d64397cee58 100644 --- a/arch/x86/include/asm/intel_rdt.h +++ b/arch/x86/include/asm/intel_rdt.h @@ -3,6 +3,7 @@ #ifdef CONFIG_INTEL_RDT_A +#include #include #include diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 0282b0df004a..c55fb2cb2acc 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/block/ioprio.c b/block/ioprio.c index 89c43e07787d..4b9ab8367dda 100644 --- a/block/ioprio.c +++ b/block/ioprio.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/misc/eeprom/eeprom.c b/drivers/misc/eeprom/eeprom.c index 3d1d55157e5f..2fad790db3bf 100644 --- a/drivers/misc/eeprom/eeprom.c +++ b/drivers/misc/eeprom/eeprom.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/misc/lkdtm_heap.c b/drivers/misc/lkdtm_heap.c index 0f1581664c1c..ffb6aeac07b3 100644 --- a/drivers/misc/lkdtm_heap.c +++ b/drivers/misc/lkdtm_heap.c @@ -4,6 +4,7 @@ */ #include "lkdtm.h" #include +#include /* * This tries to stay within the next largest power-of-2 kmalloc cache diff --git a/drivers/misc/vmw_vmci/vmci_context.c b/drivers/misc/vmw_vmci/vmci_context.c index f35f0c8606b9..21d0fa592145 100644 --- a/drivers/misc/vmw_vmci/vmci_context.c +++ b/drivers/misc/vmw_vmci/vmci_context.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "vmci_queue_pair.h" diff --git a/drivers/misc/vmw_vmci/vmci_host.c b/drivers/misc/vmw_vmci/vmci_host.c index ec090105eb4b..8a16a26e9658 100644 --- a/drivers/misc/vmw_vmci/vmci_host.c +++ b/drivers/misc/vmw_vmci/vmci_host.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/staging/lustre/lustre/include/lustre_compat.h b/drivers/staging/lustre/lustre/include/lustre_compat.h index 300e96fb032a..da9ce195c52e 100644 --- a/drivers/staging/lustre/lustre/include/lustre_compat.h +++ b/drivers/staging/lustre/lustre/include/lustre_compat.h @@ -35,6 +35,7 @@ #include #include +#include #include "lustre_patchless_compat.h" diff --git a/drivers/staging/lustre/lustre/ptlrpc/sec.c b/drivers/staging/lustre/lustre/ptlrpc/sec.c index e860df7c45a2..49f34fd655c3 100644 --- a/drivers/staging/lustre/lustre/ptlrpc/sec.c +++ b/drivers/staging/lustre/lustre/ptlrpc/sec.c @@ -38,6 +38,7 @@ #include "../../include/linux/libcfs/libcfs.h" #include +#include #include #include "../include/obd.h" diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index db107fa50ca1..a6d4378eb8d9 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index 072e7599583a..a89f3cfe3c7d 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/affs/inode.c b/fs/affs/inode.c index a5e6097eb5a9..abcc59899229 100644 --- a/fs/affs/inode.c +++ b/fs/affs/inode.c @@ -10,6 +10,7 @@ * (C) 1991 Linus Torvalds - minix filesystem */ #include +#include #include #include "affs.h" diff --git a/fs/affs/super.c b/fs/affs/super.c index 37532538e8ab..c2c27a8f128e 100644 --- a/fs/affs/super.c +++ b/fs/affs/super.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/autofs4/dev-ioctl.c b/fs/autofs4/dev-ioctl.c index 806df746f1a9..734cbf8d9676 100644 --- a/fs/autofs4/dev-ioctl.c +++ b/fs/autofs4/dev-ioctl.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/befs/linuxvfs.c b/fs/befs/linuxvfs.c index 19407165f4aa..c500e954debb 100644 --- a/fs/befs/linuxvfs.c +++ b/fs/befs/linuxvfs.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include "befs.h" diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index fbbe52e1250e..2c95257fa4da 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/cachefiles/internal.h b/fs/cachefiles/internal.h index cd1effee8a49..9bf90bcc56ac 100644 --- a/fs/cachefiles/internal.h +++ b/fs/cachefiles/internal.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/fs/compat.c b/fs/compat.c index e50a2114f474..c61b506f5bc9 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index a4b531be9168..9ec1038f937e 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -15,6 +15,7 @@ #include #include #include +#include #define dprintk(fmt, args...) do{}while(0) diff --git a/fs/ext2/balloc.c b/fs/ext2/balloc.c index 4c40c0786e16..d0bdb74f0e15 100644 --- a/fs/ext2/balloc.c +++ b/fs/ext2/balloc.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c index b14bae2598bc..17bc043308f3 100644 --- a/fs/ext4/ialloc.c +++ b/fs/ext4/ialloc.c @@ -21,6 +21,8 @@ #include #include #include +#include + #include #include "ext4.h" diff --git a/fs/file_table.c b/fs/file_table.c index 6d982b57de92..954d510b765a 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index eb7724b8578a..9d28f55fbd1d 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/gfs2/sys.c b/fs/gfs2/sys.c index f8d30e41d1d3..7a515345610c 100644 --- a/fs/gfs2/sys.c +++ b/fs/gfs2/sys.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include diff --git a/fs/hfs/inode.c b/fs/hfs/inode.c index f776acf2378a..bfbba799430f 100644 --- a/fs/hfs/inode.c +++ b/fs/hfs/inode.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/fs/hfsplus/inode.c b/fs/hfsplus/inode.c index 2e796f8302ff..e8638d528195 100644 --- a/fs/hfsplus/inode.c +++ b/fs/hfsplus/inode.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include "hfsplus_fs.h" diff --git a/fs/isofs/inode.c b/fs/isofs/inode.c index 871c8b392099..020ba0936146 100644 --- a/fs/isofs/inode.c +++ b/fs/isofs/inode.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/fs/jffs2/fs.c b/fs/jffs2/fs.c index 567653f7c0ce..76fa814df3d1 100644 --- a/fs/jffs2/fs.c +++ b/fs/jffs2/fs.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/libfs.c b/fs/libfs.c index 28d6f35feed6..217896ca4fae 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/namespace.c b/fs/namespace.c index 8bfad42c1ccf..131cd7b94f47 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include /* init_rootfs */ #include /* get_fs_root et.al. */ diff --git a/fs/ncpfs/ioctl.c b/fs/ncpfs/ioctl.c index 4434e4977cf3..12550c2320cc 100644 --- a/fs/ncpfs/ioctl.c +++ b/fs/ncpfs/ioctl.c @@ -19,6 +19,7 @@ #include #include #include +#include #include diff --git a/fs/notify/fanotify/fanotify.c b/fs/notify/fanotify/fanotify.c index a4c46221755e..e5f7e47de68e 100644 --- a/fs/notify/fanotify/fanotify.c +++ b/fs/notify/fanotify/fanotify.c @@ -6,6 +6,7 @@ #include /* UINT_MAX */ #include #include +#include #include #include diff --git a/fs/notify/inotify/inotify_fsnotify.c b/fs/notify/inotify/inotify_fsnotify.c index f36c29398de3..1aeb837ae414 100644 --- a/fs/notify/inotify/inotify_fsnotify.c +++ b/fs/notify/inotify/inotify_fsnotify.c @@ -30,6 +30,7 @@ #include /* kmem_* */ #include #include +#include #include "inotify.h" diff --git a/fs/omfs/inode.c b/fs/omfs/inode.c index df7ea8543a2e..8c9034ee7383 100644 --- a/fs/omfs/inode.c +++ b/fs/omfs/inode.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index f57043dace62..53d3f830358f 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 08643ac44a02..6639f487f835 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include "overlayfs.h" diff --git a/fs/overlayfs/namei.c b/fs/overlayfs/namei.c index 023bb0b03352..b8b077821fb0 100644 --- a/fs/overlayfs/namei.c +++ b/fs/overlayfs/namei.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/fs/overlayfs/super.c b/fs/overlayfs/super.c index 20f48abbb82f..9aa37c2f7f7d 100644 --- a/fs/overlayfs/super.c +++ b/fs/overlayfs/super.c @@ -7,6 +7,7 @@ * the Free Software Foundation. */ +#include #include #include #include diff --git a/fs/overlayfs/util.c b/fs/overlayfs/util.c index 952286f4826c..9dc1c0af586b 100644 --- a/fs/overlayfs/util.c +++ b/fs/overlayfs/util.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include "overlayfs.h" #include "ovl_entry.h" diff --git a/fs/posix_acl.c b/fs/posix_acl.c index c9d48dc78495..eebf5f6cf6d5 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 3e64c6502dc8..3d203b1f5a02 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/proc/root.c b/fs/proc/root.c index b90da888b81a..5d5fed20bfff 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "internal.h" diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index 406fed92362a..74b489e3714d 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -72,6 +72,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/stat.c b/fs/stat.c index 3f14d1ef0868..95bd41762770 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c index cf1363dbf32b..2fd7fdf5438f 100644 --- a/fs/xfs/xfs_ioctl.c +++ b/fs/xfs/xfs_ioctl.c @@ -43,6 +43,7 @@ #include "xfs_acl.h" #include +#include #include #include #include diff --git a/include/linux/cred.h b/include/linux/cred.h index 045d33e48069..b03e7d049a64 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -18,6 +18,7 @@ #include #include #include +#include #include struct cred; diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index ce93c4a02b79..0f4e9f4a43fd 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -1,6 +1,7 @@ #ifndef _LINUX_SCHED_SIGNAL_H #define _LINUX_SCHED_SIGNAL_H +#include #include #include diff --git a/include/linux/wait.h b/include/linux/wait.h index 1421132e9086..aacb1282d19a 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -6,6 +6,7 @@ #include #include #include + #include #include diff --git a/include/net/scm.h b/include/net/scm.h index 59fa93c01d2a..142ea9e7a6d0 100644 --- a/include/net/scm.h +++ b/include/net/scm.h @@ -3,6 +3,7 @@ #include #include +#include #include #include #include diff --git a/include/rdma/ib.h b/include/rdma/ib.h index a6b93706b0fc..9b4c22a36931 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -35,6 +35,7 @@ #include #include +#include struct ib_addr { union { diff --git a/ipc/namespace.c b/ipc/namespace.c index 0abdea496493..1f1d713ac19c 100644 --- a/ipc/namespace.c +++ b/ipc/namespace.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index eef2ce968636..25314c96fbe6 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/ucount.c b/kernel/ucount.c index 8a11fc0cb459..62630a40ab3a 100644 --- a/kernel/ucount.c +++ b/kernel/ucount.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/kernel/uid16.c b/kernel/uid16.c index 71645ae9303a..5c2dc5b2bf4f 100644 --- a/kernel/uid16.c +++ b/kernel/uid16.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/kernel/utsname.c b/kernel/utsname.c index 6976cd47dcf6..06585ad296ff 100644 --- a/kernel/utsname.c +++ b/kernel/utsname.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/mm/usercopy.c b/mm/usercopy.c index 8345299e3e3b..7ccad05c0d5c 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -16,6 +16,7 @@ #include #include +#include #include enum { diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c index ecc28cff08ab..ab9fec00a788 100644 --- a/net/dns_resolver/dns_query.c +++ b/net/dns_resolver/dns_query.c @@ -37,8 +37,10 @@ #include #include +#include #include #include + #include #include diff --git a/net/netfilter/xt_owner.c b/net/netfilter/xt_owner.c index 16477df45b3b..3d705c688a27 100644 --- a/net/netfilter/xt_owner.c +++ b/net/netfilter/xt_owner.c @@ -13,6 +13,8 @@ #include #include #include +#include + #include #include #include diff --git a/net/sunrpc/auth.c b/net/sunrpc/auth.c index a1ee933e3029..d2623b9f23d6 100644 --- a/net/sunrpc/auth.c +++ b/net/sunrpc/auth.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index f44312a19522..462c5d36b871 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -76,6 +76,7 @@ #include #include #include +#include #include #include "include/apparmor.h" diff --git a/security/keys/internal.h b/security/keys/internal.h index a705a7d92ad7..a2f4c0abb8d8 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -13,6 +13,7 @@ #define _INTERNAL_H #include +#include #include #include #include diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index 04a764f71ec8..bcb0b597c391 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/security/keys/persistent.c b/security/keys/persistent.c index 1edc1f0a0ce2..d0cb5b32eff7 100644 --- a/security/keys/persistent.c +++ b/security/keys/persistent.c @@ -10,6 +10,8 @@ */ #include +#include + #include "internal.h" unsigned persistent_keyring_expiry = 3 * 24 * 3600; /* Expire after 3 days of non-use */ -- cgit v1.2.3 From 174cd4b1e5fbd0d74c68cf3a74f5bd4923485512 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Thu, 2 Feb 2017 19:15:33 +0100 Subject: sched/headers: Prepare to move signal wakeup & sigpending methods from into Fix up affected files that include this signal functionality via sched.h. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/common/bL_switcher.c | 2 +- arch/cris/arch-v10/drivers/sync_serial.c | 2 +- arch/cris/arch-v32/drivers/sync_serial.c | 2 +- arch/mips/kernel/rtlx.c | 2 ++ arch/mips/kvm/mips.c | 2 ++ arch/powerpc/kvm/book3s_hv.c | 2 +- arch/powerpc/kvm/powerpc.c | 1 + arch/powerpc/platforms/83xx/suspend.c | 1 + arch/powerpc/platforms/cell/spufs/sched.c | 2 +- arch/powerpc/platforms/cell/spufs/spufs.h | 1 + arch/s390/crypto/prng.c | 2 ++ arch/s390/kvm/kvm-s390.c | 2 ++ arch/s390/kvm/vsie.c | 2 ++ arch/sh/kernel/cpu/fpu.c | 2 +- arch/sh/kernel/disassemble.c | 2 ++ arch/sh/kernel/process.c | 2 +- arch/um/drivers/random.c | 2 +- arch/x86/kernel/apm_32.c | 2 +- block/blk-cgroup.c | 1 + block/blk-mq.c | 1 + crypto/algboss.c | 2 +- crypto/algif_aead.c | 1 + crypto/algif_skcipher.c | 1 + crypto/api.c | 2 +- drivers/atm/horizon.c | 1 + drivers/base/core.c | 1 + drivers/base/power/wakeup.c | 2 +- drivers/block/drbd/drbd_main.c | 1 + drivers/block/drbd/drbd_receiver.c | 1 + drivers/block/drbd/drbd_worker.c | 2 +- drivers/block/swim3.c | 2 +- drivers/char/applicom.c | 2 +- drivers/char/hpet.c | 1 + drivers/char/hw_random/core.c | 1 + drivers/char/ipmi/ipmi_watchdog.c | 1 + drivers/char/lp.c | 2 +- drivers/char/ppdev.c | 2 +- drivers/char/rtc.c | 2 +- drivers/char/snsc.c | 2 +- drivers/dma-buf/dma-fence.c | 1 + drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c | 2 +- drivers/gpu/drm/vc4/vc4_gem.c | 1 + drivers/gpu/vga/vgaarb.c | 2 +- drivers/hid/hid-debug.c | 2 +- drivers/hid/hid-roccat.c | 2 +- drivers/hid/hidraw.c | 2 +- drivers/hid/usbhid/hiddev.c | 1 + drivers/hsi/clients/cmt_speech.c | 2 +- drivers/i2c/busses/i2c-ibm_iic.c | 2 ++ drivers/i2c/busses/i2c-mpc.c | 2 +- drivers/iio/industrialio-buffer.c | 2 +- drivers/infiniband/ulp/ipoib/ipoib_cm.c | 1 + drivers/infiniband/ulp/ipoib/ipoib_vlan.c | 1 + drivers/isdn/capi/kcapi.c | 2 +- drivers/isdn/mISDN/timerdev.c | 2 ++ drivers/lguest/core.c | 1 + drivers/macintosh/adb.c | 2 +- drivers/macintosh/smu.c | 1 + drivers/macintosh/via-pmu.c | 2 +- drivers/mailbox/mailbox-test.c | 1 + drivers/md/dm.c | 1 + drivers/media/dvb-core/dvb_ca_en50221.c | 2 +- drivers/media/dvb-core/dvb_demux.c | 2 +- drivers/media/dvb-core/dvb_frontend.c | 2 +- drivers/media/pci/cx18/cx18-driver.h | 2 +- drivers/media/pci/ivtv/ivtv-driver.h | 35 +++++++++++----------- drivers/media/pci/pt1/pt1.c | 1 + drivers/media/pci/pt3/pt3.c | 1 + drivers/media/pci/solo6x10/solo6x10-i2c.c | 1 + drivers/media/pci/zoran/zoran_device.c | 1 + drivers/media/platform/vivid/vivid-radio-rx.c | 2 ++ drivers/media/platform/vivid/vivid-radio-tx.c | 1 + drivers/media/rc/lirc_dev.c | 2 +- drivers/media/usb/cpia2/cpia2_core.c | 1 + drivers/media/usb/gspca/cpia1.c | 2 ++ drivers/misc/cxl/fault.c | 2 +- drivers/misc/cxl/file.c | 2 +- drivers/misc/ibmasm/r_heartbeat.c | 2 +- drivers/misc/lis3lv02d/lis3lv02d.c | 1 + drivers/misc/mei/bus.c | 2 +- drivers/misc/mei/client.c | 2 +- drivers/misc/mei/main.c | 2 +- drivers/misc/mic/scif/scif_main.h | 2 +- drivers/misc/vexpress-syscfg.c | 2 +- drivers/mtd/tests/mtd_test.h | 2 +- drivers/net/bonding/bond_options.c | 2 ++ drivers/net/bonding/bond_sysfs.c | 2 +- drivers/net/can/softing/softing_fw.c | 2 +- drivers/net/ethernet/broadcom/tg3.c | 1 + drivers/net/ethernet/cavium/liquidio/octeon_main.h | 2 ++ drivers/net/ethernet/sfc/falcon/falcon.c | 2 ++ drivers/net/irda/stir4200.c | 1 + drivers/net/macvtap.c | 2 +- drivers/net/ppp/ppp_generic.c | 1 + drivers/net/tun.c | 1 + drivers/net/usb/hso.c | 2 +- drivers/net/usb/qmi_wwan.c | 1 + drivers/net/wan/cosa.c | 2 +- drivers/net/wireless/ath/ath6kl/cfg80211.c | 1 + drivers/net/wireless/broadcom/b43legacy/main.c | 2 +- drivers/net/wireless/intersil/hostap/hostap_hw.c | 2 +- .../net/wireless/intersil/hostap/hostap_ioctl.c | 2 +- drivers/oprofile/event_buffer.c | 2 +- drivers/parport/daisy.c | 2 +- drivers/parport/ieee1284.c | 2 +- drivers/parport/ieee1284_ops.c | 2 +- drivers/parport/parport_ip32.c | 2 +- drivers/parport/parport_pc.c | 2 +- drivers/parport/share.c | 2 +- drivers/pci/access.c | 2 +- drivers/pci/hotplug/cpci_hotplug_core.c | 1 + drivers/pci/hotplug/cpqphp.h | 2 +- drivers/pci/hotplug/pciehp.h | 2 +- drivers/pci/hotplug/shpchp.h | 2 +- drivers/rtc/rtc-dev.c | 2 +- drivers/s390/cio/device.c | 1 + drivers/scsi/lpfc/lpfc_vport.c | 1 + drivers/scsi/osst.c | 2 +- drivers/scsi/st.c | 2 +- drivers/soc/fsl/qbman/dpaa_sys.h | 1 + drivers/staging/comedi/comedi_fops.c | 2 +- drivers/staging/dgnc/dgnc_tty.c | 2 +- drivers/staging/dgnc/dgnc_utils.c | 2 +- drivers/staging/greybus/uart.c | 2 +- .../lustre/lustre/include/lustre/lustre_user.h | 1 + drivers/staging/lustre/lustre/include/lustre_lib.h | 2 +- .../staging/lustre/lustre/include/obd_support.h | 2 ++ drivers/staging/media/lirc/lirc_sir.c | 2 +- drivers/staging/media/lirc/lirc_zilog.c | 2 +- drivers/staging/speakup/speakup_soft.c | 2 +- drivers/target/iscsi/cxgbit/cxgbit_target.c | 2 ++ drivers/tty/n_gsm.c | 2 +- drivers/tty/serial/crisv10.c | 2 +- drivers/tty/serial/serial_core.c | 1 + drivers/tty/tty_ioctl.c | 2 +- drivers/tty/tty_port.c | 2 +- drivers/uio/uio.c | 2 +- drivers/usb/class/cdc-acm.c | 1 + drivers/usb/class/usblp.c | 2 +- drivers/usb/gadget/function/f_fs.c | 1 + drivers/usb/image/mdc800.c | 2 +- drivers/usb/misc/adutux.c | 1 + drivers/usb/misc/idmouse.c | 1 + drivers/usb/misc/rio500.c | 2 +- drivers/usb/misc/uss720.c | 1 + drivers/usb/mon/mon_bin.c | 1 + drivers/usb/mon/mon_text.c | 1 + drivers/usb/serial/digi_acceleport.c | 1 + drivers/usb/serial/generic.c | 1 + drivers/vhost/net.c | 1 + drivers/vhost/vhost.c | 1 + drivers/video/fbdev/cobalt_lcdfb.c | 1 + .../fbdev/omap2/omapfb/displays/panel-dsi-cm.c | 2 +- fs/afs/rxrpc.c | 2 ++ fs/aio.c | 2 +- fs/btrfs/ctree.h | 1 + fs/ceph/caps.c | 2 +- fs/cifs/inode.c | 2 ++ fs/coda/psdev.c | 2 +- fs/dlm/user.c | 1 + fs/ecryptfs/read_write.c | 2 ++ fs/eventfd.c | 2 +- fs/eventpoll.c | 2 +- fs/ext4/ext4.h | 1 + fs/f2fs/data.c | 1 + fs/fuse/dev.c | 1 + fs/gfs2/lock_dlm.c | 1 + fs/gfs2/super.c | 2 +- fs/hpfs/hpfs_fn.h | 2 +- fs/hugetlbfs/inode.c | 2 +- fs/jffs2/nodemgmt.c | 2 +- fs/nfs/inode.c | 2 +- fs/nilfs2/segment.c | 2 ++ fs/notify/fanotify/fanotify_user.c | 1 + fs/notify/inotify/inotify_user.c | 2 +- fs/ntfs/file.c | 2 +- fs/ocfs2/alloc.c | 1 + fs/ocfs2/dlm/dlmdomain.c | 1 + fs/ocfs2/dlmfs/userdlm.c | 1 + fs/ocfs2/dlmglue.c | 1 + fs/orangefs/orangefs-kernel.h | 2 +- fs/overlayfs/copy_up.c | 2 +- fs/splice.c | 2 ++ fs/userfaultfd.c | 2 +- fs/xfs/xfs_linux.h | 2 +- include/drm/drm_os_linux.h | 1 + include/linux/sunrpc/types.h | 1 + include/media/v4l2-ioctl.h | 1 + include/net/busy_poll.h | 1 + kernel/locking/mutex.c | 2 +- kernel/locking/rtmutex.c | 2 +- kernel/locking/rwsem-spinlock.c | 2 +- kernel/locking/rwsem-xadd.c | 2 +- kernel/rcu/rcutorture.c | 2 +- kernel/sched/completion.c | 2 +- kernel/sched/swait.c | 2 +- kernel/sched/wait.c | 2 +- kernel/time/alarmtimer.c | 1 + kernel/time/hrtimer.c | 2 +- kernel/time/timer.c | 2 +- lib/percpu_ida.c | 2 +- mm/compaction.c | 1 + mm/gup.c | 2 +- mm/hugetlb.c | 1 + mm/memory_hotplug.c | 1 + mm/shmem.c | 1 + mm/userfaultfd.c | 1 + net/atm/svc.c | 2 +- net/bluetooth/af_bluetooth.c | 2 ++ net/bluetooth/cmtp/capi.c | 2 +- net/bluetooth/hci_request.c | 2 ++ net/bluetooth/l2cap_sock.c | 1 + net/bluetooth/rfcomm/sock.c | 1 + net/bluetooth/sco.c | 1 + net/bridge/br_sysfs_br.c | 1 + net/bridge/br_sysfs_if.c | 1 + net/core/ethtool.c | 2 +- net/core/net-sysfs.c | 1 + net/dccp/output.c | 1 + net/ipv4/devinet.c | 1 + net/ipv6/addrconf.c | 1 + net/irda/ircomm/ircomm_tty.c | 2 +- net/irda/irnet/irnet_ppp.c | 3 +- net/iucv/af_iucv.c | 2 +- net/kcm/kcmsock.c | 2 ++ net/llc/af_llc.c | 2 ++ net/nfc/llcp_sock.c | 1 + net/phonet/pep.c | 1 + net/phonet/socket.c | 2 ++ net/rxrpc/conn_client.c | 2 ++ net/rxrpc/recvmsg.c | 2 ++ net/rxrpc/sendmsg.c | 2 ++ net/tipc/socket.c | 2 ++ net/vmw_vsock/af_vsock.c | 1 + net/vmw_vsock/virtio_transport_common.c | 1 + sound/core/control.c | 1 + sound/core/hwdep.c | 1 + sound/core/oss/pcm_oss.c | 1 + sound/core/pcm_lib.c | 1 + sound/core/pcm_native.c | 1 + sound/core/rawmidi.c | 2 +- sound/core/seq/oss/seq_oss_device.h | 2 +- sound/core/seq/oss/seq_oss_writeq.c | 1 + sound/core/seq/seq_fifo.c | 2 ++ sound/core/seq/seq_memory.c | 1 + sound/core/timer.c | 1 + sound/firewire/bebob/bebob.h | 1 + sound/firewire/dice/dice.h | 1 + sound/firewire/digi00x/digi00x.h | 1 + sound/firewire/fireworks/fireworks.h | 1 + sound/firewire/oxfw/oxfw.h | 1 + sound/firewire/tascam/tascam.h | 1 + sound/isa/gus/gus_pcm.c | 2 ++ sound/isa/msnd/msnd.c | 1 + sound/isa/sb/emu8000.c | 2 +- sound/isa/sb/emu8000_patch.c | 2 ++ sound/isa/sb/emu8000_pcm.c | 2 ++ sound/isa/wavefront/wavefront_synth.c | 1 + sound/oss/dmabuf.c | 2 ++ sound/oss/dmasound/dmasound_core.c | 1 + sound/oss/midibuf.c | 2 ++ sound/oss/msnd_pinnacle.c | 2 ++ sound/oss/sound_config.h | 1 + sound/oss/swarm_cs4297a.c | 2 +- virt/kvm/kvm_main.c | 2 +- 265 files changed, 319 insertions(+), 139 deletions(-) (limited to 'arch/x86') diff --git a/arch/arm/common/bL_switcher.c b/arch/arm/common/bL_switcher.c index 083c9e517d22..57f3b7512636 100644 --- a/arch/arm/common/bL_switcher.c +++ b/arch/arm/common/bL_switcher.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/cris/arch-v10/drivers/sync_serial.c b/arch/cris/arch-v10/drivers/sync_serial.c index 9ac75d68f184..cc62572c1b94 100644 --- a/arch/cris/arch-v10/drivers/sync_serial.c +++ b/arch/cris/arch-v10/drivers/sync_serial.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/cris/arch-v32/drivers/sync_serial.c b/arch/cris/arch-v32/drivers/sync_serial.c index ef515af1a377..8efcc1a899a8 100644 --- a/arch/cris/arch-v32/drivers/sync_serial.c +++ b/arch/cris/arch-v32/drivers/sync_serial.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/rtlx.c b/arch/mips/kernel/rtlx.c index c5c4fd54d797..b80dd8b17a76 100644 --- a/arch/mips/kernel/rtlx.c +++ b/arch/mips/kernel/rtlx.c @@ -12,6 +12,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/mips/kvm/mips.c b/arch/mips/kvm/mips.c index ed81e5ac1426..15a1b1716c2e 100644 --- a/arch/mips/kvm/mips.c +++ b/arch/mips/kvm/mips.c @@ -16,8 +16,10 @@ #include #include #include +#include #include #include + #include #include #include diff --git a/arch/powerpc/kvm/book3s_hv.c b/arch/powerpc/kvm/book3s_hv.c index 1e107ece4e37..fc56a9ce785d 100644 --- a/arch/powerpc/kvm/book3s_hv.c +++ b/arch/powerpc/kvm/book3s_hv.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kvm/powerpc.c b/arch/powerpc/kvm/powerpc.c index 2b38d824e9e5..95c91a9de351 100644 --- a/arch/powerpc/kvm/powerpc.c +++ b/arch/powerpc/kvm/powerpc.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/83xx/suspend.c b/arch/powerpc/platforms/83xx/suspend.c index 08f92f6ed228..978b85bb3233 100644 --- a/arch/powerpc/platforms/83xx/suspend.c +++ b/arch/powerpc/platforms/83xx/suspend.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/sched.c b/arch/powerpc/platforms/cell/spufs/sched.c index d317c84dc794..1fbb5da17dd2 100644 --- a/arch/powerpc/platforms/cell/spufs/sched.c +++ b/arch/powerpc/platforms/cell/spufs/sched.c @@ -23,7 +23,7 @@ #undef DEBUG #include -#include +#include #include #include #include diff --git a/arch/powerpc/platforms/cell/spufs/spufs.h b/arch/powerpc/platforms/cell/spufs/spufs.h index aac733966092..5e59f80e95db 100644 --- a/arch/powerpc/platforms/cell/spufs/spufs.h +++ b/arch/powerpc/platforms/cell/spufs/spufs.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/arch/s390/crypto/prng.c b/arch/s390/crypto/prng.c index 85b7f5efe06a..5a3ec04a7082 100644 --- a/arch/s390/crypto/prng.c +++ b/arch/s390/crypto/prng.c @@ -20,6 +20,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/s390/kvm/kvm-s390.c b/arch/s390/kvm/kvm-s390.c index f5694838234d..fd6cd05bb6a7 100644 --- a/arch/s390/kvm/kvm-s390.c +++ b/arch/s390/kvm/kvm-s390.c @@ -29,6 +29,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/s390/kvm/vsie.c b/arch/s390/kvm/vsie.c index 38556e395915..5491be39776b 100644 --- a/arch/s390/kvm/vsie.c +++ b/arch/s390/kvm/vsie.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index 4d4737d099c6..37e35330aae6 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -1,4 +1,4 @@ -#include +#include #include #include #include diff --git a/arch/sh/kernel/disassemble.c b/arch/sh/kernel/disassemble.c index 64d5d8dded7c..015fee58014b 100644 --- a/arch/sh/kernel/disassemble.c +++ b/arch/sh/kernel/disassemble.c @@ -12,6 +12,8 @@ #include #include +#include + /* * Format of an instruction in memory. */ diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index 30bff33e810e..80a61c5e3015 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/um/drivers/random.c b/arch/um/drivers/random.c index 57f03050c850..37c51a6be690 100644 --- a/arch/um/drivers/random.c +++ b/arch/um/drivers/random.c @@ -6,7 +6,7 @@ * This software may be used and distributed according to the terms * of the GNU General Public License, incorporated herein by reference. */ -#include +#include #include #include #include diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index 4a7080c84a5a..dc04b30cbd60 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -218,7 +218,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index 295e98c2c8cc..bbe7ee00bd3d 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/block/blk-mq.c b/block/blk-mq.c index 746c14e0d157..6f35b6fd4799 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/crypto/algboss.c b/crypto/algboss.c index ccb85e1798f2..960d8548171b 100644 --- a/crypto/algboss.c +++ b/crypto/algboss.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include diff --git a/crypto/algif_aead.c b/crypto/algif_aead.c index 533265f110e0..5a8053758657 100644 --- a/crypto/algif_aead.c +++ b/crypto/algif_aead.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/crypto/algif_skcipher.c b/crypto/algif_skcipher.c index a9e79d8eff87..43839b00fe6c 100644 --- a/crypto/algif_skcipher.c +++ b/crypto/algif_skcipher.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/crypto/api.c b/crypto/api.c index b16ce1653284..941cd4c6c7ec 100644 --- a/crypto/api.c +++ b/crypto/api.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include "internal.h" diff --git a/drivers/atm/horizon.c b/drivers/atm/horizon.c index 2bf1ef1c3c78..0f18480b33b5 100644 --- a/drivers/atm/horizon.c +++ b/drivers/atm/horizon.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include diff --git a/drivers/base/core.c b/drivers/base/core.c index 3050e6f99403..684bda4d14a1 100644 --- a/drivers/base/core.c +++ b/drivers/base/core.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include "base.h" diff --git a/drivers/base/power/wakeup.c b/drivers/base/power/wakeup.c index f546f8f107b0..136854970489 100644 --- a/drivers/base/power/wakeup.c +++ b/drivers/base/power/wakeup.c @@ -8,7 +8,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c index 116509852a34..c7d530a95e53 100644 --- a/drivers/block/drbd/drbd_main.c +++ b/drivers/block/drbd/drbd_main.c @@ -52,6 +52,7 @@ #define __KERNEL_SYSCALLS__ #include #include +#include #include #include "drbd_int.h" diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8b40a5b2f8e6..aa6bf9692eff 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #define __KERNEL_SYSCALLS__ #include diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c index c6755c9a0aea..3bff33f21435 100644 --- a/drivers/block/drbd/drbd_worker.c +++ b/drivers/block/drbd/drbd_worker.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/block/swim3.c b/drivers/block/swim3.c index aabd8e9d3035..61b3ffa4f458 100644 --- a/drivers/block/swim3.c +++ b/drivers/block/swim3.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/char/applicom.c b/drivers/char/applicom.c index e5c62dcf2c11..e770ad977472 100644 --- a/drivers/char/applicom.c +++ b/drivers/char/applicom.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/hpet.c b/drivers/char/hpet.c index 20b32bb8c2af..8bdc38d81adf 100644 --- a/drivers/char/hpet.c +++ b/drivers/char/hpet.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/hw_random/core.c b/drivers/char/hw_random/core.c index 5c654b5d4adf..503a41dfa193 100644 --- a/drivers/char/hw_random/core.c +++ b/drivers/char/hw_random/core.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/char/ipmi/ipmi_watchdog.c b/drivers/char/ipmi/ipmi_watchdog.c index 30b9e83bf1bf..5ca24d9b101b 100644 --- a/drivers/char/ipmi/ipmi_watchdog.c +++ b/drivers/char/ipmi/ipmi_watchdog.c @@ -53,6 +53,7 @@ #include #include #include +#include #ifdef CONFIG_X86 /* diff --git a/drivers/char/lp.c b/drivers/char/lp.c index 5b6742770656..565e4cf04a02 100644 --- a/drivers/char/lp.c +++ b/drivers/char/lp.c @@ -117,7 +117,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/ppdev.c b/drivers/char/ppdev.c index 87885d146dbb..2a558c706581 100644 --- a/drivers/char/ppdev.c +++ b/drivers/char/ppdev.c @@ -58,7 +58,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/char/rtc.c b/drivers/char/rtc.c index 35259961cc38..974d48927b07 100644 --- a/drivers/char/rtc.c +++ b/drivers/char/rtc.c @@ -74,7 +74,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/char/snsc.c b/drivers/char/snsc.c index ec07f0e99732..6aa32679fd58 100644 --- a/drivers/char/snsc.c +++ b/drivers/char/snsc.c @@ -16,7 +16,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/dma-buf/dma-fence.c b/drivers/dma-buf/dma-fence.c index d1f1f456f5c4..d195d617076d 100644 --- a/drivers/dma-buf/dma-fence.c +++ b/drivers/dma-buf/dma-fence.c @@ -22,6 +22,7 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include diff --git a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c index a2bb855a2851..ac5800c72cb4 100644 --- a/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c +++ b/drivers/gpu/drm/omapdrm/displays/panel-dsi-cm.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/gpu/drm/vc4/vc4_gem.c b/drivers/gpu/drm/vc4/vc4_gem.c index ab3016982466..1eef98c3331d 100644 --- a/drivers/gpu/drm/vc4/vc4_gem.c +++ b/drivers/gpu/drm/vc4/vc4_gem.c @@ -26,6 +26,7 @@ #include #include #include +#include #include "uapi/drm/vc4_drm.h" #include "vc4_drv.h" diff --git a/drivers/gpu/vga/vgaarb.c b/drivers/gpu/vga/vgaarb.c index 0f5b2dd24507..92f1452dad57 100644 --- a/drivers/gpu/vga/vgaarb.c +++ b/drivers/gpu/vga/vgaarb.c @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/hid/hid-debug.c b/drivers/hid/hid-debug.c index acfb522a432a..c6c9c51c806f 100644 --- a/drivers/hid/hid-debug.c +++ b/drivers/hid/hid-debug.c @@ -30,7 +30,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/hid/hid-roccat.c b/drivers/hid/hid-roccat.c index 76d06cf87b2a..fb77dec720a4 100644 --- a/drivers/hid/hid-roccat.c +++ b/drivers/hid/hid-roccat.c @@ -25,7 +25,7 @@ #include #include -#include +#include #include #include diff --git a/drivers/hid/hidraw.c b/drivers/hid/hidraw.c index f0e2757cb909..ec530454e6f6 100644 --- a/drivers/hid/hidraw.c +++ b/drivers/hid/hidraw.c @@ -33,7 +33,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/hid/usbhid/hiddev.c b/drivers/hid/usbhid/hiddev.c index 700145b15088..774bd701dae0 100644 --- a/drivers/hid/usbhid/hiddev.c +++ b/drivers/hid/usbhid/hiddev.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include diff --git a/drivers/hsi/clients/cmt_speech.c b/drivers/hsi/clients/cmt_speech.c index 7175e6bedf21..727f968ac1cb 100644 --- a/drivers/hsi/clients/cmt_speech.c +++ b/drivers/hsi/clients/cmt_speech.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/i2c/busses/i2c-ibm_iic.c b/drivers/i2c/busses/i2c-ibm_iic.c index 412b91d255ad..961c5f42d956 100644 --- a/drivers/i2c/busses/i2c-ibm_iic.c +++ b/drivers/i2c/busses/i2c-ibm_iic.c @@ -37,6 +37,8 @@ #include #include #include +#include + #include #include #include diff --git a/drivers/i2c/busses/i2c-mpc.c b/drivers/i2c/busses/i2c-mpc.c index 565a49a0c564..96caf378b1dc 100644 --- a/drivers/i2c/busses/i2c-mpc.c +++ b/drivers/i2c/busses/i2c-mpc.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/iio/industrialio-buffer.c b/drivers/iio/industrialio-buffer.c index 4972986f6455..d2b465140a6b 100644 --- a/drivers/iio/industrialio-buffer.c +++ b/drivers/iio/industrialio-buffer.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include "iio_core.h" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_cm.c b/drivers/infiniband/ulp/ipoib/ipoib_cm.c index a6d6c617b597..0cdf2b7f272f 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_cm.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_cm.c @@ -38,6 +38,7 @@ #include #include #include +#include #include "ipoib.h" diff --git a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c index deedb6fc1b05..3e10e3dac2e7 100644 --- a/drivers/infiniband/ulp/ipoib/ipoib_vlan.c +++ b/drivers/infiniband/ulp/ipoib/ipoib_vlan.c @@ -31,6 +31,7 @@ */ #include +#include #include #include diff --git a/drivers/isdn/capi/kcapi.c b/drivers/isdn/capi/kcapi.c index 49d0f70c2bae..1dfd1085a04f 100644 --- a/drivers/isdn/capi/kcapi.c +++ b/drivers/isdn/capi/kcapi.c @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/isdn/mISDN/timerdev.c b/drivers/isdn/mISDN/timerdev.c index 9438d7ec3308..b1e135fc1fb5 100644 --- a/drivers/isdn/mISDN/timerdev.c +++ b/drivers/isdn/mISDN/timerdev.c @@ -25,6 +25,8 @@ #include #include #include +#include + #include "core.h" static DEFINE_MUTEX(mISDN_mutex); diff --git a/drivers/lguest/core.c b/drivers/lguest/core.c index ac219045daf7..395ed1961dbf 100644 --- a/drivers/lguest/core.c +++ b/drivers/lguest/core.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/macintosh/adb.c b/drivers/macintosh/adb.c index 152414e6378a..fee939efc4fc 100644 --- a/drivers/macintosh/adb.c +++ b/drivers/macintosh/adb.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/macintosh/smu.c b/drivers/macintosh/smu.c index 227869159ac0..1ac66421877a 100644 --- a/drivers/macintosh/smu.c +++ b/drivers/macintosh/smu.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include diff --git a/drivers/macintosh/via-pmu.c b/drivers/macintosh/via-pmu.c index 43b8db2b5445..cce99f72e4ae 100644 --- a/drivers/macintosh/via-pmu.c +++ b/drivers/macintosh/via-pmu.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/mailbox/mailbox-test.c b/drivers/mailbox/mailbox-test.c index 9c79f8019d2a..97fb956bb6e0 100644 --- a/drivers/mailbox/mailbox-test.c +++ b/drivers/mailbox/mailbox-test.c @@ -21,6 +21,7 @@ #include #include #include +#include #define MBOX_MAX_SIG_LEN 8 #define MBOX_MAX_MSG_LEN 128 diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9f37d7fc2786..f4ffd1eb8f44 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/media/dvb-core/dvb_ca_en50221.c b/drivers/media/dvb-core/dvb_ca_en50221.c index 000d737ad827..8d65028c7a74 100644 --- a/drivers/media/dvb-core/dvb_ca_en50221.c +++ b/drivers/media/dvb-core/dvb_ca_en50221.c @@ -34,7 +34,7 @@ #include #include #include -#include +#include #include #include "dvb_ca_en50221.h" diff --git a/drivers/media/dvb-core/dvb_demux.c b/drivers/media/dvb-core/dvb_demux.c index 4eac71e50c5f..6628f80d184f 100644 --- a/drivers/media/dvb-core/dvb_demux.c +++ b/drivers/media/dvb-core/dvb_demux.c @@ -19,7 +19,7 @@ #define pr_fmt(fmt) "dvb_demux: " fmt -#include +#include #include #include #include diff --git a/drivers/media/dvb-core/dvb_frontend.c b/drivers/media/dvb-core/dvb_frontend.c index 85ae3669aa66..e3fff8f64d37 100644 --- a/drivers/media/dvb-core/dvb_frontend.c +++ b/drivers/media/dvb-core/dvb_frontend.c @@ -29,7 +29,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/media/pci/cx18/cx18-driver.h b/drivers/media/pci/cx18/cx18-driver.h index fef3c736fcba..7be2088c45fe 100644 --- a/drivers/media/pci/cx18/cx18-driver.h +++ b/drivers/media/pci/cx18/cx18-driver.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/pci/ivtv/ivtv-driver.h b/drivers/media/pci/ivtv/ivtv-driver.h index cde452e30746..d27c5c2c07ea 100644 --- a/drivers/media/pci/ivtv/ivtv-driver.h +++ b/drivers/media/pci/ivtv/ivtv-driver.h @@ -38,37 +38,38 @@ * using information provided by Jiun-Kuei Jung @ AVerMedia. */ -#include +#include +#include #include -#include +#include #include +#include +#include +#include #include #include -#include -#include -#include -#include -#include #include -#include -#include +#include #include -#include #include -#include +#include +#include #include -#include #include -#include +#include -#include -#include -#include +#include +#include #include +#include #include #include #include -#include +#include +#include +#include + +#include /* Memory layout */ #define IVTV_ENCODER_OFFSET 0x00000000 diff --git a/drivers/media/pci/pt1/pt1.c b/drivers/media/pci/pt1/pt1.c index da1eebd2016f..3219d2f3271e 100644 --- a/drivers/media/pci/pt1/pt1.c +++ b/drivers/media/pci/pt1/pt1.c @@ -18,6 +18,7 @@ */ #include +#include #include #include #include diff --git a/drivers/media/pci/pt3/pt3.c b/drivers/media/pci/pt3/pt3.c index 77f4d15f322b..e8b5d0992157 100644 --- a/drivers/media/pci/pt3/pt3.c +++ b/drivers/media/pci/pt3/pt3.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "dmxdev.h" #include "dvbdev.h" diff --git a/drivers/media/pci/solo6x10/solo6x10-i2c.c b/drivers/media/pci/solo6x10/solo6x10-i2c.c index c908672b2c40..e83bb79f9349 100644 --- a/drivers/media/pci/solo6x10/solo6x10-i2c.c +++ b/drivers/media/pci/solo6x10/solo6x10-i2c.c @@ -27,6 +27,7 @@ * thread context, ACK the interrupt, and move on. -- BenC */ #include +#include #include "solo6x10.h" diff --git a/drivers/media/pci/zoran/zoran_device.c b/drivers/media/pci/zoran/zoran_device.c index 671907a6e6b6..40adceebca7e 100644 --- a/drivers/media/pci/zoran/zoran_device.c +++ b/drivers/media/pci/zoran/zoran_device.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/drivers/media/platform/vivid/vivid-radio-rx.c b/drivers/media/platform/vivid/vivid-radio-rx.c index f99092ca8f5c..47c36c26096b 100644 --- a/drivers/media/platform/vivid/vivid-radio-rx.c +++ b/drivers/media/platform/vivid/vivid-radio-rx.c @@ -22,6 +22,8 @@ #include #include #include +#include + #include #include #include diff --git a/drivers/media/platform/vivid/vivid-radio-tx.c b/drivers/media/platform/vivid/vivid-radio-tx.c index 8c59d4f53200..0e8025b7b4dd 100644 --- a/drivers/media/platform/vivid/vivid-radio-tx.c +++ b/drivers/media/platform/vivid/vivid-radio-tx.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include diff --git a/drivers/media/rc/lirc_dev.c b/drivers/media/rc/lirc_dev.c index a54ca531d8ef..393dccaabdd0 100644 --- a/drivers/media/rc/lirc_dev.c +++ b/drivers/media/rc/lirc_dev.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/media/usb/cpia2/cpia2_core.c b/drivers/media/usb/cpia2/cpia2_core.c index 431dd0b4b332..b1d13444ff30 100644 --- a/drivers/media/usb/cpia2/cpia2_core.c +++ b/drivers/media/usb/cpia2/cpia2_core.c @@ -32,6 +32,7 @@ #include #include #include +#include #define FIRMWARE "cpia2/stv0672_vp4.bin" MODULE_FIRMWARE(FIRMWARE); diff --git a/drivers/media/usb/gspca/cpia1.c b/drivers/media/usb/gspca/cpia1.c index 23d3285f182a..e91d00762e94 100644 --- a/drivers/media/usb/gspca/cpia1.c +++ b/drivers/media/usb/gspca/cpia1.c @@ -27,6 +27,8 @@ #define MODULE_NAME "cpia1" #include +#include + #include "gspca.h" MODULE_AUTHOR("Hans de Goede "); diff --git a/drivers/misc/cxl/fault.c b/drivers/misc/cxl/fault.c index e33011e3e7e2..2fa015c05561 100644 --- a/drivers/misc/cxl/fault.c +++ b/drivers/misc/cxl/fault.c @@ -8,7 +8,7 @@ */ #include -#include +#include #include #include #include diff --git a/drivers/misc/cxl/file.c b/drivers/misc/cxl/file.c index 859959f19f10..e7139c76f961 100644 --- a/drivers/misc/cxl/file.c +++ b/drivers/misc/cxl/file.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/misc/ibmasm/r_heartbeat.c b/drivers/misc/ibmasm/r_heartbeat.c index 232034f5da48..5c7dd26db716 100644 --- a/drivers/misc/ibmasm/r_heartbeat.c +++ b/drivers/misc/ibmasm/r_heartbeat.c @@ -20,7 +20,7 @@ * */ -#include +#include #include "ibmasm.h" #include "dot_command.h" diff --git a/drivers/misc/lis3lv02d/lis3lv02d.c b/drivers/misc/lis3lv02d/lis3lv02d.c index fb8705fc3aca..e389b0b5278d 100644 --- a/drivers/misc/lis3lv02d/lis3lv02d.c +++ b/drivers/misc/lis3lv02d/lis3lv02d.c @@ -23,6 +23,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include diff --git a/drivers/misc/mei/bus.c b/drivers/misc/mei/bus.c index cb3e9e0ca049..df5f78ae3d25 100644 --- a/drivers/misc/mei/bus.c +++ b/drivers/misc/mei/bus.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/misc/mei/client.c b/drivers/misc/mei/client.c index 68fe37b5bc52..d3e3372424d6 100644 --- a/drivers/misc/mei/client.c +++ b/drivers/misc/mei/client.c @@ -14,7 +14,7 @@ * */ -#include +#include #include #include #include diff --git a/drivers/misc/mei/main.c b/drivers/misc/mei/main.c index 9d0b7050c79a..bf816449cd40 100644 --- a/drivers/misc/mei/main.c +++ b/drivers/misc/mei/main.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/misc/mic/scif/scif_main.h b/drivers/misc/mic/scif/scif_main.h index a08f0b600a9e..0e5eff9ad080 100644 --- a/drivers/misc/mic/scif/scif_main.h +++ b/drivers/misc/mic/scif/scif_main.h @@ -18,7 +18,7 @@ #ifndef SCIF_MAIN_H #define SCIF_MAIN_H -#include +#include #include #include #include diff --git a/drivers/misc/vexpress-syscfg.c b/drivers/misc/vexpress-syscfg.c index c344483fa7d6..2cde80c7bb93 100644 --- a/drivers/misc/vexpress-syscfg.c +++ b/drivers/misc/vexpress-syscfg.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/mtd/tests/mtd_test.h b/drivers/mtd/tests/mtd_test.h index 4b7bee17c924..04afd0e7074f 100644 --- a/drivers/mtd/tests/mtd_test.h +++ b/drivers/mtd/tests/mtd_test.h @@ -1,5 +1,5 @@ #include -#include +#include static inline int mtdtest_relax(void) { diff --git a/drivers/net/bonding/bond_options.c b/drivers/net/bonding/bond_options.c index 577e57cad1dc..1bcbb8913e17 100644 --- a/drivers/net/bonding/bond_options.c +++ b/drivers/net/bonding/bond_options.c @@ -16,6 +16,8 @@ #include #include #include +#include + #include static int bond_option_active_slave_set(struct bonding *bond, diff --git a/drivers/net/bonding/bond_sysfs.c b/drivers/net/bonding/bond_sysfs.c index e23c3ed737de..770623a0cc01 100644 --- a/drivers/net/bonding/bond_sysfs.c +++ b/drivers/net/bonding/bond_sysfs.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/can/softing/softing_fw.c b/drivers/net/can/softing/softing_fw.c index 4063215c9b54..aac58ce6e371 100644 --- a/drivers/net/can/softing/softing_fw.c +++ b/drivers/net/can/softing/softing_fw.c @@ -17,7 +17,7 @@ */ #include -#include +#include #include #include diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c index a448177990fe..30d1eb9ebec9 100644 --- a/drivers/net/ethernet/broadcom/tg3.c +++ b/drivers/net/ethernet/broadcom/tg3.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_main.h b/drivers/net/ethernet/cavium/liquidio/octeon_main.h index 8cd389148166..aa36e9ae7676 100644 --- a/drivers/net/ethernet/cavium/liquidio/octeon_main.h +++ b/drivers/net/ethernet/cavium/liquidio/octeon_main.h @@ -23,6 +23,8 @@ #ifndef _OCTEON_MAIN_H_ #define _OCTEON_MAIN_H_ +#include + #if BITS_PER_LONG == 32 #define CVM_CAST64(v) ((long long)(v)) #elif BITS_PER_LONG == 64 diff --git a/drivers/net/ethernet/sfc/falcon/falcon.c b/drivers/net/ethernet/sfc/falcon/falcon.c index c6ff0cc5ef18..93c713c1f627 100644 --- a/drivers/net/ethernet/sfc/falcon/falcon.c +++ b/drivers/net/ethernet/sfc/falcon/falcon.c @@ -16,6 +16,8 @@ #include #include #include +#include + #include "net_driver.h" #include "bitfield.h" #include "efx.h" diff --git a/drivers/net/irda/stir4200.c b/drivers/net/irda/stir4200.c index 42da094b68dd..7ee514879531 100644 --- a/drivers/net/irda/stir4200.c +++ b/drivers/net/irda/stir4200.c @@ -40,6 +40,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/macvtap.c b/drivers/net/macvtap.c index a4bfc10b61dd..da85057680d6 100644 --- a/drivers/net/macvtap.c +++ b/drivers/net/macvtap.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c index a411b43a69eb..f9c0e62716ea 100644 --- a/drivers/net/ppp/ppp_generic.c +++ b/drivers/net/ppp/ppp_generic.c @@ -24,6 +24,7 @@ #include #include +#include #include #include #include diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 30863e378925..dc1b1dd9157c 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/net/usb/hso.c b/drivers/net/usb/hso.c index e7b516342678..4f2e8141dbe2 100644 --- a/drivers/net/usb/hso.c +++ b/drivers/net/usb/hso.c @@ -52,7 +52,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt -#include +#include #include #include #include diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c index 24d5272cdce5..805674550683 100644 --- a/drivers/net/usb/qmi_wwan.c +++ b/drivers/net/usb/qmi_wwan.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include diff --git a/drivers/net/wan/cosa.c b/drivers/net/wan/cosa.c index 087eb266601f..4ca71bca39ac 100644 --- a/drivers/net/wan/cosa.c +++ b/drivers/net/wan/cosa.c @@ -78,7 +78,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c index b7fe0af4cb24..363b30a549c2 100644 --- a/drivers/net/wireless/ath/ath6kl/cfg80211.c +++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c @@ -20,6 +20,7 @@ #include #include #include +#include #include "core.h" #include "cfg80211.h" diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c index e97ab2b91663..cdafebb9c936 100644 --- a/drivers/net/wireless/broadcom/b43legacy/main.c +++ b/drivers/net/wireless/broadcom/b43legacy/main.c @@ -36,7 +36,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c index 544ef7adde7d..04dfd040a650 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_hw.c +++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c index a5656bc0e6aa..b2c6b065b542 100644 --- a/drivers/net/wireless/intersil/hostap/hostap_ioctl.c +++ b/drivers/net/wireless/intersil/hostap/hostap_ioctl.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/oprofile/event_buffer.c b/drivers/oprofile/event_buffer.c index 67935fbbbcab..32888f2bd1a9 100644 --- a/drivers/oprofile/event_buffer.c +++ b/drivers/oprofile/event_buffer.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/parport/daisy.c b/drivers/parport/daisy.c index d998d0ed2bec..46eb15fb57ff 100644 --- a/drivers/parport/daisy.c +++ b/drivers/parport/daisy.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/parport/ieee1284.c b/drivers/parport/ieee1284.c index f9fd4b33a546..74cc6dd982d2 100644 --- a/drivers/parport/ieee1284.c +++ b/drivers/parport/ieee1284.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #undef DEBUG /* undef me for production */ diff --git a/drivers/parport/ieee1284_ops.c b/drivers/parport/ieee1284_ops.c index 75071605d22f..a959224d011b 100644 --- a/drivers/parport/ieee1284_ops.c +++ b/drivers/parport/ieee1284_ops.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #undef DEBUG /* undef me for production */ diff --git a/drivers/parport/parport_ip32.c b/drivers/parport/parport_ip32.c index 30e981be14c2..dcbeeb220dda 100644 --- a/drivers/parport/parport_ip32.c +++ b/drivers/parport/parport_ip32.c @@ -102,7 +102,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/parport/parport_pc.c b/drivers/parport/parport_pc.c index 3e56e7deab8e..9d42dfe65d44 100644 --- a/drivers/parport/parport_pc.c +++ b/drivers/parport/parport_pc.c @@ -44,7 +44,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/parport/share.c b/drivers/parport/share.c index 3308427ed9f7..bc090daa850a 100644 --- a/drivers/parport/share.c +++ b/drivers/parport/share.c @@ -27,7 +27,7 @@ #include #include #include -#include +#include #include #include diff --git a/drivers/pci/access.c b/drivers/pci/access.c index b9dd37c8c9ce..8b7382705bf2 100644 --- a/drivers/pci/access.c +++ b/drivers/pci/access.c @@ -1,7 +1,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/pci/hotplug/cpci_hotplug_core.c b/drivers/pci/hotplug/cpci_hotplug_core.c index 7ec8a8f72c69..95f689f53920 100644 --- a/drivers/pci/hotplug/cpci_hotplug_core.c +++ b/drivers/pci/hotplug/cpci_hotplug_core.c @@ -27,6 +27,7 @@ #include #include +#include #include #include #include diff --git a/drivers/pci/hotplug/cpqphp.h b/drivers/pci/hotplug/cpqphp.h index 9103a7b9f3b9..48c8a066a6b7 100644 --- a/drivers/pci/hotplug/cpqphp.h +++ b/drivers/pci/hotplug/cpqphp.h @@ -32,7 +32,7 @@ #include /* for read? and write? functions */ #include /* for delays */ #include -#include /* for signal_pending() */ +#include /* for signal_pending() */ #define MY_NAME "cpqphp" diff --git a/drivers/pci/hotplug/pciehp.h b/drivers/pci/hotplug/pciehp.h index 37d70b5ad22f..06109d40c4ac 100644 --- a/drivers/pci/hotplug/pciehp.h +++ b/drivers/pci/hotplug/pciehp.h @@ -33,7 +33,7 @@ #include #include #include -#include /* signal_pending() */ +#include /* signal_pending() */ #include #include #include diff --git a/drivers/pci/hotplug/shpchp.h b/drivers/pci/hotplug/shpchp.h index 4da8fc601467..70c7ea6af034 100644 --- a/drivers/pci/hotplug/shpchp.h +++ b/drivers/pci/hotplug/shpchp.h @@ -33,7 +33,7 @@ #include #include #include -#include /* signal_pending(), struct timer_list */ +#include /* signal_pending(), struct timer_list */ #include #include diff --git a/drivers/rtc/rtc-dev.c b/drivers/rtc/rtc-dev.c index a6d9434addf6..6dc8f29697ab 100644 --- a/drivers/rtc/rtc-dev.c +++ b/drivers/rtc/rtc-dev.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include "rtc-core.h" static dev_t rtc_devt; diff --git a/drivers/s390/cio/device.c b/drivers/s390/cio/device.c index 79823ee9c100..b8006ea9099c 100644 --- a/drivers/s390/cio/device.c +++ b/drivers/s390/cio/device.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include diff --git a/drivers/scsi/lpfc/lpfc_vport.c b/drivers/scsi/lpfc/lpfc_vport.c index e18bbc66e83b..4e36998a266c 100644 --- a/drivers/scsi/lpfc/lpfc_vport.c +++ b/drivers/scsi/lpfc/lpfc_vport.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/drivers/scsi/osst.c b/drivers/scsi/osst.c index 75ac662793a3..c47f4b349bac 100644 --- a/drivers/scsi/osst.c +++ b/drivers/scsi/osst.c @@ -35,7 +35,7 @@ static const char * osst_version = "0.99.4"; #include #include -#include +#include #include #include #include diff --git a/drivers/scsi/st.c b/drivers/scsi/st.c index 81212d4bd9bf..e5ef78a6848e 100644 --- a/drivers/scsi/st.c +++ b/drivers/scsi/st.c @@ -23,7 +23,7 @@ static const char *verstr = "20160209"; #include #include -#include +#include #include #include #include diff --git a/drivers/soc/fsl/qbman/dpaa_sys.h b/drivers/soc/fsl/qbman/dpaa_sys.h index 2eaf3184f61d..2ce394aa4c95 100644 --- a/drivers/soc/fsl/qbman/dpaa_sys.h +++ b/drivers/soc/fsl/qbman/dpaa_sys.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/staging/comedi/comedi_fops.c b/drivers/staging/comedi/comedi_fops.c index 57e8599b54e6..8deac8d9225d 100644 --- a/drivers/staging/comedi/comedi_fops.c +++ b/drivers/staging/comedi/comedi_fops.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/staging/dgnc/dgnc_tty.c b/drivers/staging/dgnc/dgnc_tty.c index c63e591631f6..c3b8fc54883d 100644 --- a/drivers/staging/dgnc/dgnc_tty.c +++ b/drivers/staging/dgnc/dgnc_tty.c @@ -19,7 +19,7 @@ */ #include -#include /* For jiffies, task states */ +#include /* For jiffies, task states, etc. */ #include /* For tasklet and interrupt structs/defines */ #include #include diff --git a/drivers/staging/dgnc/dgnc_utils.c b/drivers/staging/dgnc/dgnc_utils.c index 95272f4765fc..6f59240024d1 100644 --- a/drivers/staging/dgnc/dgnc_utils.c +++ b/drivers/staging/dgnc/dgnc_utils.c @@ -1,5 +1,5 @@ #include -#include +#include #include "dgnc_utils.h" /* diff --git a/drivers/staging/greybus/uart.c b/drivers/staging/greybus/uart.c index ab0dbf5cab5a..43255e2e9276 100644 --- a/drivers/staging/greybus/uart.c +++ b/drivers/staging/greybus/uart.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h index 21aec0ca9ad3..7d8628ce0d3b 100644 --- a/drivers/staging/lustre/lustre/include/lustre/lustre_user.h +++ b/drivers/staging/lustre/lustre/include/lustre/lustre_user.h @@ -44,6 +44,7 @@ #ifdef __KERNEL__ # include +# include # include /* snprintf() */ # include #else /* !__KERNEL__ */ diff --git a/drivers/staging/lustre/lustre/include/lustre_lib.h b/drivers/staging/lustre/lustre/include/lustre_lib.h index 27f3148c4344..b04d613846ee 100644 --- a/drivers/staging/lustre/lustre/include/lustre_lib.h +++ b/drivers/staging/lustre/lustre/include/lustre_lib.h @@ -42,7 +42,7 @@ * @{ */ -#include +#include #include #include #include "../../include/linux/libcfs/libcfs.h" diff --git a/drivers/staging/lustre/lustre/include/obd_support.h b/drivers/staging/lustre/lustre/include/obd_support.h index aaedec7d793c..dace6591a0a4 100644 --- a/drivers/staging/lustre/lustre/include/obd_support.h +++ b/drivers/staging/lustre/lustre/include/obd_support.h @@ -34,6 +34,8 @@ #define _OBD_SUPPORT #include +#include + #include "../../include/linux/libcfs/libcfs.h" #include "lustre_compat.h" #include "lprocfs_status.h" diff --git a/drivers/staging/media/lirc/lirc_sir.c b/drivers/staging/media/lirc/lirc_sir.c index c75ae43095ba..c6c3de94adaa 100644 --- a/drivers/staging/media/lirc/lirc_sir.c +++ b/drivers/staging/media/lirc/lirc_sir.c @@ -36,7 +36,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include +#include #include #include #include diff --git a/drivers/staging/media/lirc/lirc_zilog.c b/drivers/staging/media/lirc/lirc_zilog.c index 34aac3e2eb87..e4a533b6beb3 100644 --- a/drivers/staging/media/lirc/lirc_zilog.c +++ b/drivers/staging/media/lirc/lirc_zilog.c @@ -42,7 +42,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/staging/speakup/speakup_soft.c b/drivers/staging/speakup/speakup_soft.c index ff68a384f9c2..d2ff0afd685a 100644 --- a/drivers/staging/speakup/speakup_soft.c +++ b/drivers/staging/speakup/speakup_soft.c @@ -22,7 +22,7 @@ #include #include /* for misc_register, and SYNTH_MINOR */ #include /* for poll_wait() */ -#include /* schedule(), signal_pending(), TASK_INTERRUPTIBLE */ +#include /* schedule(), signal_pending(), TASK_INTERRUPTIBLE */ #include "spk_priv.h" #include "speakup.h" diff --git a/drivers/target/iscsi/cxgbit/cxgbit_target.c b/drivers/target/iscsi/cxgbit/cxgbit_target.c index 8bcb9b71f764..0c3e8fce3695 100644 --- a/drivers/target/iscsi/cxgbit/cxgbit_target.c +++ b/drivers/target/iscsi/cxgbit/cxgbit_target.c @@ -8,6 +8,8 @@ #include #include +#include + #include #include #include diff --git a/drivers/tty/n_gsm.c b/drivers/tty/n_gsm.c index f3932baed07d..55577cf9b6a4 100644 --- a/drivers/tty/n_gsm.c +++ b/drivers/tty/n_gsm.c @@ -39,7 +39,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/serial/crisv10.c b/drivers/tty/serial/crisv10.c index e92c23470e51..59a2a7e18b5a 100644 --- a/drivers/tty/serial/crisv10.c +++ b/drivers/tty/serial/crisv10.c @@ -12,7 +12,7 @@ static char *serial_version = "$Revision: 1.25 $"; #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/serial/serial_core.c b/drivers/tty/serial/serial_core.c index 9939c3d9912b..3fe56894974a 100644 --- a/drivers/tty/serial/serial_core.c +++ b/drivers/tty/serial/serial_core.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/tty/tty_ioctl.c b/drivers/tty/tty_ioctl.c index f27fc0f14c11..a9a978731c5b 100644 --- a/drivers/tty/tty_ioctl.c +++ b/drivers/tty/tty_ioctl.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/tty_port.c b/drivers/tty/tty_port.c index 5cd3cd932293..1d21a9c1d33e 100644 --- a/drivers/tty/tty_port.c +++ b/drivers/tty/tty_port.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/uio/uio.c b/drivers/uio/uio.c index 31d95dc9c202..60ce7fd54e89 100644 --- a/drivers/uio/uio.c +++ b/drivers/uio/uio.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/usb/class/cdc-acm.c b/drivers/usb/class/cdc-acm.c index 235e305f8473..d5388938bc7a 100644 --- a/drivers/usb/class/cdc-acm.c +++ b/drivers/usb/class/cdc-acm.c @@ -32,6 +32,7 @@ #undef VERBOSE_DEBUG #include +#include #include #include #include diff --git a/drivers/usb/class/usblp.c b/drivers/usb/class/usblp.c index 071964c7847f..cc61055fb9be 100644 --- a/drivers/usb/class/usblp.c +++ b/drivers/usb/class/usblp.c @@ -49,7 +49,7 @@ #include #include -#include +#include #include #include #include diff --git a/drivers/usb/gadget/function/f_fs.c b/drivers/usb/gadget/function/f_fs.c index 87fccf611b69..a5b7cd615698 100644 --- a/drivers/usb/gadget/function/f_fs.c +++ b/drivers/usb/gadget/function/f_fs.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/drivers/usb/image/mdc800.c b/drivers/usb/image/mdc800.c index 5cf2633cdb04..e92540a21b6b 100644 --- a/drivers/usb/image/mdc800.c +++ b/drivers/usb/image/mdc800.c @@ -85,7 +85,7 @@ * (20/10/1999) */ -#include +#include #include #include #include diff --git a/drivers/usb/misc/adutux.c b/drivers/usb/misc/adutux.c index c5fa584d8f0a..db9a9e6ff6be 100644 --- a/drivers/usb/misc/adutux.c +++ b/drivers/usb/misc/adutux.c @@ -21,6 +21,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include diff --git a/drivers/usb/misc/idmouse.c b/drivers/usb/misc/idmouse.c index debc1fd74b0d..8b9fd7534f69 100644 --- a/drivers/usb/misc/idmouse.c +++ b/drivers/usb/misc/idmouse.c @@ -17,6 +17,7 @@ */ #include +#include #include #include #include diff --git a/drivers/usb/misc/rio500.c b/drivers/usb/misc/rio500.c index fc329c98a6e8..b106ce76997b 100644 --- a/drivers/usb/misc/rio500.c +++ b/drivers/usb/misc/rio500.c @@ -31,7 +31,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/usb/misc/uss720.c b/drivers/usb/misc/uss720.c index 0a643fa74cab..e45a3a680db8 100644 --- a/drivers/usb/misc/uss720.c +++ b/drivers/usb/misc/uss720.c @@ -50,6 +50,7 @@ #include #include #include +#include /* * Version Information diff --git a/drivers/usb/mon/mon_bin.c b/drivers/usb/mon/mon_bin.c index 9fb8b1e6ecc2..b6d8bf475c92 100644 --- a/drivers/usb/mon/mon_bin.c +++ b/drivers/usb/mon/mon_bin.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/drivers/usb/mon/mon_text.c b/drivers/usb/mon/mon_text.c index db1a4abf2806..19c416d69eb9 100644 --- a/drivers/usb/mon/mon_text.c +++ b/drivers/usb/mon/mon_text.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/usb/serial/digi_acceleport.c b/drivers/usb/serial/digi_acceleport.c index eb433922598c..ab78111e0968 100644 --- a/drivers/usb/serial/digi_acceleport.c +++ b/drivers/usb/serial/digi_acceleport.c @@ -27,6 +27,7 @@ #include #include #include +#include #include /* Defines */ diff --git a/drivers/usb/serial/generic.c b/drivers/usb/serial/generic.c index 944de657a07a..49ce2be90fa0 100644 --- a/drivers/usb/serial/generic.c +++ b/drivers/usb/serial/generic.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c index 5c98ad4d2f4c..9b519897cc17 100644 --- a/drivers/vhost/net.c +++ b/drivers/vhost/net.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/drivers/vhost/vhost.c b/drivers/vhost/vhost.c index 4a00140a7624..dcbe2e29bf17 100644 --- a/drivers/vhost/vhost.c +++ b/drivers/vhost/vhost.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "vhost.h" diff --git a/drivers/video/fbdev/cobalt_lcdfb.c b/drivers/video/fbdev/cobalt_lcdfb.c index 038ac6934fe9..9da90bd242f4 100644 --- a/drivers/video/fbdev/cobalt_lcdfb.c +++ b/drivers/video/fbdev/cobalt_lcdfb.c @@ -26,6 +26,7 @@ #include #include #include +#include /* * Cursor position address diff --git a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c index 8b810696a42b..fd2b372d0264 100644 --- a/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c +++ b/drivers/video/fbdev/omap2/omapfb/displays/panel-dsi-cm.c @@ -19,7 +19,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c index 95f42872b787..4f6b00efc27c 100644 --- a/fs/afs/rxrpc.c +++ b/fs/afs/rxrpc.c @@ -10,6 +10,8 @@ */ #include +#include + #include #include #include diff --git a/fs/aio.c b/fs/aio.c index 7e2ab9c8e39c..0bb108476de2 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -20,7 +20,7 @@ #include #include -#include +#include #include #include #include diff --git a/fs/btrfs/ctree.h b/fs/btrfs/ctree.h index 105d4d43993e..a8812d95359d 100644 --- a/fs/btrfs/ctree.h +++ b/fs/btrfs/ctree.h @@ -20,6 +20,7 @@ #define __BTRFS_CTREE__ #include +#include #include #include #include diff --git a/fs/ceph/caps.c b/fs/ceph/caps.c index cd966f276a8d..68c78be19d5b 100644 --- a/fs/ceph/caps.c +++ b/fs/ceph/caps.c @@ -2,7 +2,7 @@ #include #include -#include +#include #include #include #include diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7ab5be7944aa..1858fc20eb7d 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -23,6 +23,8 @@ #include #include #include +#include + #include #include "cifsfs.h" #include "cifspdu.h" diff --git a/fs/coda/psdev.c b/fs/coda/psdev.c index 822629126e89..f40e3953e7fe 100644 --- a/fs/coda/psdev.c +++ b/fs/coda/psdev.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/dlm/user.c b/fs/dlm/user.c index 1ce908c2232c..23488f559cf9 100644 --- a/fs/dlm/user.c +++ b/fs/dlm/user.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "dlm_internal.h" #include "lockspace.h" diff --git a/fs/ecryptfs/read_write.c b/fs/ecryptfs/read_write.c index 158a3a39f82d..039e627194a9 100644 --- a/fs/ecryptfs/read_write.c +++ b/fs/ecryptfs/read_write.c @@ -22,6 +22,8 @@ #include #include +#include + #include "ecryptfs_kernel.h" /** diff --git a/fs/eventfd.c b/fs/eventfd.c index 1231cd1999d8..68b9fffcb2c8 100644 --- a/fs/eventfd.c +++ b/fs/eventfd.c @@ -9,7 +9,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 5ec16313da1a..341251421ced 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -13,7 +13,7 @@ #include #include -#include +#include #include #include #include diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2fd17e8e4984..77798a46b0c6 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/f2fs/data.c b/fs/f2fs/data.c index 1375fef11146..1602b4bccae6 100644 --- a/fs/f2fs/data.c +++ b/fs/f2fs/data.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "f2fs.h" #include "node.h" diff --git a/fs/fuse/dev.c b/fs/fuse/dev.c index f11792672977..b681b43c766e 100644 --- a/fs/fuse/dev.c +++ b/fs/fuse/dev.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/gfs2/lock_dlm.c b/fs/gfs2/lock_dlm.c index 8b907c5cc913..0515f0a68637 100644 --- a/fs/gfs2/lock_dlm.c +++ b/fs/gfs2/lock_dlm.c @@ -15,6 +15,7 @@ #include #include #include +#include #include "incore.h" #include "glock.h" diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index e3ee387a6dfe..361796a84fce 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -10,7 +10,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include -#include +#include #include #include #include diff --git a/fs/hpfs/hpfs_fn.h b/fs/hpfs/hpfs_fn.h index aebb78f9e47f..d352f3a6af7f 100644 --- a/fs/hpfs/hpfs_fn.h +++ b/fs/hpfs/hpfs_fn.h @@ -18,7 +18,7 @@ #include #include #include -#include +#include #include #include diff --git a/fs/hugetlbfs/inode.c b/fs/hugetlbfs/inode.c index 54de77e78775..8f96461236f6 100644 --- a/fs/hugetlbfs/inode.c +++ b/fs/hugetlbfs/inode.c @@ -11,7 +11,7 @@ #include #include -#include /* remove ASAP */ +#include /* remove ASAP */ #include #include #include diff --git a/fs/jffs2/nodemgmt.c b/fs/jffs2/nodemgmt.c index cda0774c2c9c..a7bbe879cfc3 100644 --- a/fs/jffs2/nodemgmt.c +++ b/fs/jffs2/nodemgmt.c @@ -14,7 +14,7 @@ #include #include #include -#include /* For cond_resched() */ +#include #include "nodelist.h" #include "debug.h" diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5ca4d96b1942..685565b229c3 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -15,7 +15,7 @@ #include #include -#include +#include #include #include #include diff --git a/fs/nilfs2/segment.c b/fs/nilfs2/segment.c index 7d18d62e8e07..febed1217b3f 100644 --- a/fs/nilfs2/segment.c +++ b/fs/nilfs2/segment.c @@ -30,6 +30,8 @@ #include #include #include +#include + #include "nilfs.h" #include "btnode.h" #include "page.h" diff --git a/fs/notify/fanotify/fanotify_user.c b/fs/notify/fanotify/fanotify_user.c index 7ebfca6a1427..2b37f2785834 100644 --- a/fs/notify/fanotify/fanotify_user.c +++ b/fs/notify/fanotify/fanotify_user.c @@ -14,6 +14,7 @@ #include #include #include +#include #include diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index 1cf41c623be1..498d609b26c7 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -30,7 +30,7 @@ #include #include /* roundup() */ #include /* LOOKUP_FOLLOW */ -#include /* struct user */ +#include #include /* struct kmem_cache */ #include #include diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index 358ed7e1195a..c4f68c338735 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/ocfs2/alloc.c b/fs/ocfs2/alloc.c index d4ec0d8961a6..fb15a96df0b6 100644 --- a/fs/ocfs2/alloc.c +++ b/fs/ocfs2/alloc.c @@ -30,6 +30,7 @@ #include #include #include +#include #include diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 32fd261ae13d..a2b19fbdcf46 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -33,6 +33,7 @@ #include #include #include +#include #include "cluster/heartbeat.h" #include "cluster/nodemanager.h" diff --git a/fs/ocfs2/dlmfs/userdlm.c b/fs/ocfs2/dlmfs/userdlm.c index f70cda2f090d..9cecf4857195 100644 --- a/fs/ocfs2/dlmfs/userdlm.c +++ b/fs/ocfs2/dlmfs/userdlm.c @@ -28,6 +28,7 @@ */ #include +#include #include #include diff --git a/fs/ocfs2/dlmglue.c b/fs/ocfs2/dlmglue.c index 8dce4099a6ca..3b7c937a36b5 100644 --- a/fs/ocfs2/dlmglue.c +++ b/fs/ocfs2/dlmglue.c @@ -33,6 +33,7 @@ #include #include #include +#include #define MLOG_MASK_PREFIX ML_DLM_GLUE #include diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 70355a9a2596..8948683b367f 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -41,7 +41,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index 53d3f830358f..a34aa7aa2563 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -15,7 +15,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/splice.c b/fs/splice.c index 4ef78aa8ef61..e49336555739 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -33,6 +33,8 @@ #include #include #include +#include + #include "internal.h" /* diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c index 18d12bfff770..973607df579d 100644 --- a/fs/userfaultfd.c +++ b/fs/userfaultfd.c @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 7a989de224f4..592fdf7111cb 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -55,7 +55,7 @@ typedef __u32 xfs_nlink_t; #include #include #include -#include +#include #include #include #include diff --git a/include/drm/drm_os_linux.h b/include/drm/drm_os_linux.h index 86ab99bc0ac5..35e1482ba8a1 100644 --- a/include/drm/drm_os_linux.h +++ b/include/drm/drm_os_linux.h @@ -4,6 +4,7 @@ */ #include /* For task queue support */ +#include #include #ifndef readq diff --git a/include/linux/sunrpc/types.h b/include/linux/sunrpc/types.h index d222f47550af..11a7536c0fd2 100644 --- a/include/linux/sunrpc/types.h +++ b/include/linux/sunrpc/types.h @@ -10,6 +10,7 @@ #define _LINUX_SUNRPC_TYPES_H_ #include +#include #include #include #include diff --git a/include/media/v4l2-ioctl.h b/include/media/v4l2-ioctl.h index 574ff2ae94be..6cd94e5ee113 100644 --- a/include/media/v4l2-ioctl.h +++ b/include/media/v4l2-ioctl.h @@ -12,6 +12,7 @@ #include #include #include +#include #include /* need __user */ #include diff --git a/include/net/busy_poll.h b/include/net/busy_poll.h index b96832df239e..c0452de83086 100644 --- a/include/net/busy_poll.h +++ b/include/net/busy_poll.h @@ -26,6 +26,7 @@ #include #include +#include #include #ifdef CONFIG_NET_RX_BUSY_POLL diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 57f6311e2405..82ff5726bc1b 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -19,7 +19,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index d4f798491361..1c8c8707853a 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -12,7 +12,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 5eacab880f67..91e7bc8e9d5a 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -6,7 +6,7 @@ * - Derived also from comments by Linus */ #include -#include +#include #include enum rwsem_waiter_type { diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 4fe8d8ad4396..9bd6e768164e 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/rcu/rcutorture.c b/kernel/rcu/rcutorture.c index 6a28b79710f0..cccc417a8135 100644 --- a/kernel/rcu/rcutorture.c +++ b/kernel/rcu/rcutorture.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index f063a25d4449..b294a8ee2842 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -11,7 +11,7 @@ * Waiting for completion is a typically sync point, but not an exclusion point. */ -#include +#include #include /** diff --git a/kernel/sched/swait.c b/kernel/sched/swait.c index 82f0dff90030..3d5610dcce11 100644 --- a/kernel/sched/swait.c +++ b/kernel/sched/swait.c @@ -1,4 +1,4 @@ -#include +#include #include void __init_swait_queue_head(struct swait_queue_head *q, const char *name, diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 9453efe9b25a..1fedfcf6fc9b 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -5,7 +5,7 @@ */ #include #include -#include +#include #include #include #include diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index e6dc9a538efa..04939053c823 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index 8e11d8d9f419..df512e8c04d4 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -43,7 +43,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 82a6bfa0c307..f4465d2a25d1 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -38,7 +38,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 6d40944960de..010410990bc6 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/mm/compaction.c b/mm/compaction.c index 0fdfde016ee2..81e1eaa2a2cf 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/gup.c b/mm/gup.c index 94fab8fa432b..9c047e951aa3 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -10,7 +10,7 @@ #include #include -#include +#include #include #include diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 2e0e8159ce8e..a7aa811b7d14 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 1d3ed58f92ab..295479b792ec 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/mm/shmem.c b/mm/shmem.c index a26649a6633f..de8cdef4ef9b 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/userfaultfd.c b/mm/userfaultfd.c index 9f0ad2a4f102..479e631d43c2 100644 --- a/mm/userfaultfd.c +++ b/mm/userfaultfd.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/net/atm/svc.c b/net/atm/svc.c index 878563a8354d..db9794ec61d8 100644 --- a/net/atm/svc.c +++ b/net/atm/svc.c @@ -10,7 +10,7 @@ #include /* printk */ #include #include -#include /* jiffies and HZ */ +#include #include /* O_NONBLOCK */ #include #include /* ATM stuff */ diff --git a/net/bluetooth/af_bluetooth.c b/net/bluetooth/af_bluetooth.c index cfb2faba46de..69e1f7d362a8 100644 --- a/net/bluetooth/af_bluetooth.c +++ b/net/bluetooth/af_bluetooth.c @@ -27,6 +27,8 @@ #include #include #include +#include + #include #include diff --git a/net/bluetooth/cmtp/capi.c b/net/bluetooth/cmtp/capi.c index 46ac686c8911..bb308224099c 100644 --- a/net/bluetooth/cmtp/capi.c +++ b/net/bluetooth/cmtp/capi.c @@ -26,7 +26,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c index 1015d9c8d97d..b5faff458d8b 100644 --- a/net/bluetooth/hci_request.c +++ b/net/bluetooth/hci_request.c @@ -21,6 +21,8 @@ SOFTWARE IS DISCLAIMED. */ +#include + #include #include #include diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c index a8ba752732c9..f307b145ea54 100644 --- a/net/bluetooth/l2cap_sock.c +++ b/net/bluetooth/l2cap_sock.c @@ -29,6 +29,7 @@ #include #include +#include #include #include diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c index 7511df72347f..aa1a814ceddc 100644 --- a/net/bluetooth/rfcomm/sock.c +++ b/net/bluetooth/rfcomm/sock.c @@ -27,6 +27,7 @@ #include #include +#include #include #include diff --git a/net/bluetooth/sco.c b/net/bluetooth/sco.c index 3125ce670c2f..e4e9a2da1e7e 100644 --- a/net/bluetooth/sco.c +++ b/net/bluetooth/sco.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c index 0f4034934d56..0b5dd607444c 100644 --- a/net/bridge/br_sysfs_br.c +++ b/net/bridge/br_sysfs_br.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "br_private.h" diff --git a/net/bridge/br_sysfs_if.c b/net/bridge/br_sysfs_if.c index 05e8946ccc03..79aee759aba5 100644 --- a/net/bridge/br_sysfs_if.c +++ b/net/bridge/br_sysfs_if.c @@ -17,6 +17,7 @@ #include #include #include +#include #include "br_private.h" diff --git a/net/core/ethtool.c b/net/core/ethtool.c index be7bab1adcde..aecb2c7241b6 100644 --- a/net/core/ethtool.c +++ b/net/core/ethtool.c @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include /* diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c index b0c04cf4851d..3945821e9c1f 100644 --- a/net/core/net-sysfs.c +++ b/net/core/net-sysfs.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/net/dccp/output.c b/net/dccp/output.c index b66c84db0766..91a15b3c4915 100644 --- a/net/dccp/output.c +++ b/net/dccp/output.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 5d367b7ff542..cebedd545e5e 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 3a2025f5bf2c..77362b88a661 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/net/irda/ircomm/ircomm_tty.c b/net/irda/ircomm/ircomm_tty.c index 817b1b186aff..f6061c4bb0a8 100644 --- a/net/irda/ircomm/ircomm_tty.c +++ b/net/irda/ircomm/ircomm_tty.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/irda/irnet/irnet_ppp.c b/net/irda/irnet/irnet_ppp.c index 35dbf3dc3d28..7025dcb853d0 100644 --- a/net/irda/irnet/irnet_ppp.c +++ b/net/irda/irnet/irnet_ppp.c @@ -13,8 +13,9 @@ * 2) as a control channel (write commands, read events) */ -#include +#include #include + #include "irnet_ppp.h" /* Private header */ /* Please put other headers in irnet.h - Thanks */ diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c index 13190b38f22e..89bbde1081ce 100644 --- a/net/iucv/af_iucv.c +++ b/net/iucv/af_iucv.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/net/kcm/kcmsock.c b/net/kcm/kcmsock.c index a646f3481240..309062f3debe 100644 --- a/net/kcm/kcmsock.c +++ b/net/kcm/kcmsock.c @@ -24,6 +24,8 @@ #include #include #include +#include + #include #include #include diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c index 5e9296382420..06186d608a27 100644 --- a/net/llc/af_llc.c +++ b/net/llc/af_llc.c @@ -26,6 +26,8 @@ #include #include #include +#include + #include #include #include diff --git a/net/nfc/llcp_sock.c b/net/nfc/llcp_sock.c index b9edf5fae6ae..879885b31cce 100644 --- a/net/nfc/llcp_sock.c +++ b/net/nfc/llcp_sock.c @@ -21,6 +21,7 @@ #include #include #include +#include #include "nfc.h" #include "llcp.h" diff --git a/net/phonet/pep.c b/net/phonet/pep.c index 8bad5624a27a..222bedcd9575 100644 --- a/net/phonet/pep.c +++ b/net/phonet/pep.c @@ -23,6 +23,7 @@ */ #include +#include #include #include #include diff --git a/net/phonet/socket.c b/net/phonet/socket.c index ffd5f2297584..a6c8da3ee893 100644 --- a/net/phonet/socket.c +++ b/net/phonet/socket.c @@ -27,6 +27,8 @@ #include #include #include +#include + #include #include diff --git a/net/rxrpc/conn_client.c b/net/rxrpc/conn_client.c index 40a1ef2adeb4..c3be03e8d098 100644 --- a/net/rxrpc/conn_client.c +++ b/net/rxrpc/conn_client.c @@ -76,6 +76,8 @@ #include #include #include +#include + #include "ar-internal.h" __read_mostly unsigned int rxrpc_max_client_connections = 1000; diff --git a/net/rxrpc/recvmsg.c b/net/rxrpc/recvmsg.c index f3a688e10843..28274a3c9831 100644 --- a/net/rxrpc/recvmsg.c +++ b/net/rxrpc/recvmsg.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include #include "ar-internal.h" diff --git a/net/rxrpc/sendmsg.c b/net/rxrpc/sendmsg.c index 0a6ef217aa8a..19b36c60fb4c 100644 --- a/net/rxrpc/sendmsg.c +++ b/net/rxrpc/sendmsg.c @@ -15,6 +15,8 @@ #include #include #include +#include + #include #include #include "ar-internal.h" diff --git a/net/tipc/socket.c b/net/tipc/socket.c index 6b09a778cc71..43e4045e72bc 100644 --- a/net/tipc/socket.c +++ b/net/tipc/socket.c @@ -35,6 +35,8 @@ */ #include +#include + #include "core.h" #include "name_table.h" #include "node.h" diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c index 8a398b3fb532..9192ead66751 100644 --- a/net/vmw_vsock/af_vsock.c +++ b/net/vmw_vsock/af_vsock.c @@ -90,6 +90,7 @@ #include #include #include +#include #include #include #include diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c index 849c4ad0411e..8d592a45b597 100644 --- a/net/vmw_vsock/virtio_transport_common.c +++ b/net/vmw_vsock/virtio_transport_common.c @@ -9,6 +9,7 @@ */ #include #include +#include #include #include #include diff --git a/sound/core/control.c b/sound/core/control.c index fb096cb20a80..c109b82eef4b 100644 --- a/sound/core/control.c +++ b/sound/core/control.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/core/hwdep.c b/sound/core/hwdep.c index 36d2416f90d9..9602a7e38d8a 100644 --- a/sound/core/hwdep.c +++ b/sound/core/hwdep.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/core/oss/pcm_oss.c b/sound/core/oss/pcm_oss.c index 698a01419515..36baf962f9b0 100644 --- a/sound/core/oss/pcm_oss.c +++ b/sound/core/oss/pcm_oss.c @@ -28,6 +28,7 @@ #include #include +#include #include #include #include diff --git a/sound/core/pcm_lib.c b/sound/core/pcm_lib.c index bb1261591a1f..5088d4b8db22 100644 --- a/sound/core/pcm_lib.c +++ b/sound/core/pcm_lib.c @@ -21,6 +21,7 @@ */ #include +#include #include #include #include diff --git a/sound/core/pcm_native.c b/sound/core/pcm_native.c index aec9c92250fd..13dec5ec93f2 100644 --- a/sound/core/pcm_native.c +++ b/sound/core/pcm_native.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/core/rawmidi.c b/sound/core/rawmidi.c index 8da9cb245d01..ab890336175f 100644 --- a/sound/core/rawmidi.c +++ b/sound/core/rawmidi.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sound/core/seq/oss/seq_oss_device.h b/sound/core/seq/oss/seq_oss_device.h index d7b4d016b547..afa007c0cc2d 100644 --- a/sound/core/seq/oss/seq_oss_device.h +++ b/sound/core/seq/oss/seq_oss_device.h @@ -24,7 +24,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/sound/core/seq/oss/seq_oss_writeq.c b/sound/core/seq/oss/seq_oss_writeq.c index 1f6788a18444..5e04f4df10e4 100644 --- a/sound/core/seq/oss/seq_oss_writeq.c +++ b/sound/core/seq/oss/seq_oss_writeq.c @@ -28,6 +28,7 @@ #include "../seq_clientmgr.h" #include #include +#include /* diff --git a/sound/core/seq/seq_fifo.c b/sound/core/seq/seq_fifo.c index 86240d02b530..448efd4e980e 100644 --- a/sound/core/seq/seq_fifo.c +++ b/sound/core/seq/seq_fifo.c @@ -21,6 +21,8 @@ #include #include +#include + #include "seq_fifo.h" #include "seq_lock.h" diff --git a/sound/core/seq/seq_memory.c b/sound/core/seq/seq_memory.c index dfa5156f3585..1a1acf3ddda4 100644 --- a/sound/core/seq/seq_memory.c +++ b/sound/core/seq/seq_memory.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/sound/core/timer.c b/sound/core/timer.c index ad153149b231..6d4fbc439246 100644 --- a/sound/core/timer.c +++ b/sound/core/timer.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/firewire/bebob/bebob.h b/sound/firewire/bebob/bebob.h index 175da875162d..17678d6ab5a2 100644 --- a/sound/firewire/bebob/bebob.h +++ b/sound/firewire/bebob/bebob.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/sound/firewire/dice/dice.h b/sound/firewire/dice/dice.h index e6c07857f475..da00e75e09d4 100644 --- a/sound/firewire/dice/dice.h +++ b/sound/firewire/dice/dice.h @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/sound/firewire/digi00x/digi00x.h b/sound/firewire/digi00x/digi00x.h index 2cd465c0caae..9dc761bdacca 100644 --- a/sound/firewire/digi00x/digi00x.h +++ b/sound/firewire/digi00x/digi00x.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/sound/firewire/fireworks/fireworks.h b/sound/firewire/fireworks/fireworks.h index d73c12b8753d..9b19c7f05d57 100644 --- a/sound/firewire/fireworks/fireworks.h +++ b/sound/firewire/fireworks/fireworks.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/sound/firewire/oxfw/oxfw.h b/sound/firewire/oxfw/oxfw.h index 2047dcb27625..d54d4a9ac4a1 100644 --- a/sound/firewire/oxfw/oxfw.h +++ b/sound/firewire/oxfw/oxfw.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include diff --git a/sound/firewire/tascam/tascam.h b/sound/firewire/tascam/tascam.h index 1f61011579a7..d3cd4065722b 100644 --- a/sound/firewire/tascam/tascam.h +++ b/sound/firewire/tascam/tascam.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/sound/isa/gus/gus_pcm.c b/sound/isa/gus/gus_pcm.c index 25f6788ccef3..06505999155f 100644 --- a/sound/isa/gus/gus_pcm.c +++ b/sound/isa/gus/gus_pcm.c @@ -27,6 +27,8 @@ #include #include +#include + #include #include #include diff --git a/sound/isa/msnd/msnd.c b/sound/isa/msnd/msnd.c index 835d4aa26761..8109ab3d29d1 100644 --- a/sound/isa/msnd/msnd.c +++ b/sound/isa/msnd/msnd.c @@ -36,6 +36,7 @@ ********************************************************************/ #include +#include #include #include #include diff --git a/sound/isa/sb/emu8000.c b/sound/isa/sb/emu8000.c index 94c411299e5a..ec180708f160 100644 --- a/sound/isa/sb/emu8000.c +++ b/sound/isa/sb/emu8000.c @@ -21,7 +21,7 @@ */ #include -#include +#include #include #include #include diff --git a/sound/isa/sb/emu8000_patch.c b/sound/isa/sb/emu8000_patch.c index 71d13c0bb746..c2e41d2762f7 100644 --- a/sound/isa/sb/emu8000_patch.c +++ b/sound/isa/sb/emu8000_patch.c @@ -20,6 +20,8 @@ */ #include "emu8000_local.h" + +#include #include #include diff --git a/sound/isa/sb/emu8000_pcm.c b/sound/isa/sb/emu8000_pcm.c index 250fd0006b53..32f234f494e5 100644 --- a/sound/isa/sb/emu8000_pcm.c +++ b/sound/isa/sb/emu8000_pcm.c @@ -19,6 +19,8 @@ */ #include "emu8000_local.h" + +#include #include #include #include diff --git a/sound/isa/wavefront/wavefront_synth.c b/sound/isa/wavefront/wavefront_synth.c index 718d5e3b7806..4dae9ff9ef5a 100644 --- a/sound/isa/wavefront/wavefront_synth.c +++ b/sound/isa/wavefront/wavefront_synth.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/sound/oss/dmabuf.c b/sound/oss/dmabuf.c index e3f29132d3ac..c5dd396c66a2 100644 --- a/sound/oss/dmabuf.c +++ b/sound/oss/dmabuf.c @@ -27,6 +27,8 @@ #include #include +#include + #include "sound_config.h" #include "sleep.h" diff --git a/sound/oss/dmasound/dmasound_core.c b/sound/oss/dmasound/dmasound_core.c index 5f248fb41bea..fb3bbceb1fef 100644 --- a/sound/oss/dmasound/dmasound_core.c +++ b/sound/oss/dmasound/dmasound_core.c @@ -182,6 +182,7 @@ #include #include #include +#include #include diff --git a/sound/oss/midibuf.c b/sound/oss/midibuf.c index 8f45cd999965..701c7625c971 100644 --- a/sound/oss/midibuf.c +++ b/sound/oss/midibuf.c @@ -16,6 +16,8 @@ #include #include #include +#include + #define MIDIBUF_C #include "sound_config.h" diff --git a/sound/oss/msnd_pinnacle.c b/sound/oss/msnd_pinnacle.c index a8bb4a06ba6f..f34ec01d2239 100644 --- a/sound/oss/msnd_pinnacle.c +++ b/sound/oss/msnd_pinnacle.c @@ -41,6 +41,8 @@ #include #include #include +#include + #include #include #include "sound_config.h" diff --git a/sound/oss/sound_config.h b/sound/oss/sound_config.h index f2554ab78f5e..5253b0a70437 100644 --- a/sound/oss/sound_config.h +++ b/sound/oss/sound_config.h @@ -16,6 +16,7 @@ #include #include +#include #include "os.h" #include "soundvers.h" diff --git a/sound/oss/swarm_cs4297a.c b/sound/oss/swarm_cs4297a.c index f3af63e58b36..97899352b15f 100644 --- a/sound/oss/swarm_cs4297a.c +++ b/sound/oss/swarm_cs4297a.c @@ -64,7 +64,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/virt/kvm/kvm_main.c b/virt/kvm/kvm_main.c index dd5de09bf362..ae79f7679cc2 100644 --- a/virt/kvm/kvm_main.c +++ b/virt/kvm/kvm_main.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include -- cgit v1.2.3 From b17b01533b719e9949e437abf66436a875739b40 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:35 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/kernel/process.c | 1 + arch/alpha/kernel/traps.c | 1 + arch/arc/kernel/ctx_sw.c | 1 + arch/arc/kernel/stacktrace.c | 2 ++ arch/arc/kernel/troubleshoot.c | 1 + arch/arm/kernel/process.c | 1 + arch/arm/kernel/stacktrace.c | 1 + arch/arm/kernel/traps.c | 1 + arch/arm/mm/alignment.c | 1 + arch/arm/mm/fault.c | 1 + arch/arm/probes/kprobes/core.c | 1 + arch/arm64/kernel/probes/kprobes.c | 1 + arch/arm64/kernel/process.c | 1 + arch/arm64/kernel/stacktrace.c | 1 + arch/arm64/kernel/traps.c | 1 + arch/arm64/mm/fault.c | 1 + arch/avr32/kernel/nmi_debug.c | 1 + arch/avr32/kernel/process.c | 1 + arch/blackfin/kernel/dumpstack.c | 2 ++ arch/blackfin/kernel/early_printk.c | 1 + arch/blackfin/kernel/nmi.c | 1 + arch/blackfin/kernel/process.c | 1 + arch/blackfin/kernel/trace.c | 1 + arch/blackfin/kernel/traps.c | 1 + arch/blackfin/mach-common/ints-priority.c | 1 + arch/blackfin/mm/isram-driver.c | 1 + arch/c6x/kernel/traps.c | 1 + arch/cris/arch-v10/kernel/process.c | 1 + arch/cris/arch-v10/kernel/traps.c | 2 ++ arch/cris/arch-v32/kernel/process.c | 1 + arch/cris/arch-v32/kernel/traps.c | 2 ++ arch/cris/kernel/irq.c | 1 + arch/cris/kernel/stacktrace.c | 2 +- arch/cris/kernel/traps.c | 1 + arch/frv/kernel/process.c | 1 + arch/frv/kernel/traps.c | 1 + arch/h8300/kernel/process.c | 1 + arch/h8300/kernel/traps.c | 1 + arch/hexagon/kernel/process.c | 1 + arch/hexagon/kernel/traps.c | 1 + arch/hexagon/kernel/vm_events.c | 1 + arch/ia64/hp/sim/simserial.c | 1 + arch/ia64/kernel/mca.c | 1 + arch/ia64/kernel/process.c | 1 + arch/ia64/kernel/traps.c | 1 + arch/m32r/kernel/process.c | 1 + arch/m32r/kernel/traps.c | 1 + arch/m68k/kernel/process.c | 1 + arch/m68k/kernel/traps.c | 1 + arch/m68k/mac/macints.c | 1 + arch/metag/kernel/process.c | 1 + arch/metag/kernel/stacktrace.c | 1 + arch/metag/kernel/traps.c | 1 + arch/metag/mm/fault.c | 1 + arch/microblaze/kernel/exceptions.c | 1 + arch/microblaze/kernel/process.c | 1 + arch/microblaze/kernel/traps.c | 1 + arch/mips/kernel/process.c | 1 + arch/mips/kernel/stacktrace.c | 1 + arch/mips/kernel/traps.c | 1 + arch/mips/sgi-ip22/ip28-berr.c | 1 + arch/mips/sgi-ip27/ip27-berr.c | 1 + arch/mips/sgi-ip32/ip32-berr.c | 1 + arch/mips/sgi-ip32/ip32-irq.c | 1 + arch/mn10300/kernel/process.c | 1 + arch/mn10300/kernel/traps.c | 1 + arch/nios2/kernel/process.c | 1 + arch/nios2/kernel/traps.c | 1 + arch/nios2/mm/fault.c | 1 + arch/openrisc/kernel/process.c | 1 + arch/openrisc/kernel/traps.c | 1 + arch/parisc/kernel/pa7300lc.c | 1 + arch/parisc/kernel/process.c | 1 + arch/parisc/kernel/signal.c | 1 + arch/parisc/kernel/traps.c | 1 + arch/parisc/kernel/unaligned.c | 1 + arch/parisc/mm/fault.c | 1 + arch/powerpc/kernel/process.c | 1 + arch/powerpc/kernel/stacktrace.c | 1 + arch/powerpc/kernel/traps.c | 1 + arch/s390/kernel/dumpstack.c | 1 + arch/s390/kernel/process.c | 1 + arch/s390/kernel/stacktrace.c | 1 + arch/s390/kernel/traps.c | 1 + arch/s390/mm/fault.c | 1 + arch/score/kernel/traps.c | 1 + arch/sh/kernel/dumpstack.c | 1 + arch/sh/kernel/nmi_debug.c | 1 + arch/sh/kernel/process_32.c | 1 + arch/sh/kernel/process_64.c | 1 + arch/sh/kernel/stacktrace.c | 1 + arch/sh/kernel/traps.c | 1 + arch/sh/kernel/traps_64.c | 1 + arch/sparc/kernel/process_32.c | 1 + arch/sparc/kernel/process_64.c | 1 + arch/sparc/kernel/stacktrace.c | 1 + arch/sparc/kernel/sun4m_irq.c | 1 + arch/sparc/kernel/sys_sparc_32.c | 1 + arch/sparc/kernel/sys_sparc_64.c | 1 + arch/sparc/kernel/traps_32.c | 1 + arch/sparc/kernel/traps_64.c | 1 + arch/sparc/mm/fault_64.c | 1 + arch/tile/include/asm/stack.h | 2 ++ arch/tile/kernel/process.c | 1 + arch/tile/kernel/signal.c | 1 + arch/tile/kernel/stack.c | 1 + arch/tile/kernel/traps.c | 1 + arch/tile/kernel/unaligned.c | 1 + arch/tile/mm/fault.c | 1 + arch/um/drivers/mconsole_kern.c | 1 + arch/um/kernel/process.c | 1 + arch/um/kernel/sysrq.c | 2 ++ arch/um/kernel/trap.c | 1 + arch/unicore32/kernel/process.c | 1 + arch/unicore32/kernel/stacktrace.c | 1 + arch/unicore32/kernel/traps.c | 1 + arch/unicore32/mm/alignment.c | 1 + arch/x86/kernel/amd_gart_64.c | 1 + arch/x86/kernel/doublefault.c | 1 + arch/x86/kernel/dumpstack.c | 1 + arch/x86/kernel/dumpstack_32.c | 1 + arch/x86/kernel/dumpstack_64.c | 1 + arch/x86/kernel/kprobes/core.c | 1 + arch/x86/kernel/nmi.c | 1 + arch/x86/kernel/process.c | 1 + arch/x86/kernel/stacktrace.c | 1 + arch/x86/mm/extable.c | 2 ++ arch/x86/platform/uv/uv_nmi.c | 1 + arch/x86/um/sysrq_32.c | 1 + arch/x86/um/sysrq_64.c | 1 + arch/xtensa/kernel/process.c | 1 + arch/xtensa/kernel/traps.c | 1 + drivers/base/power/main.c | 1 + drivers/tty/sysrq.c | 2 ++ drivers/tty/tty_ldsem.c | 1 + drivers/tty/vt/keyboard.c | 2 ++ fs/proc/base.c | 1 + include/linux/sched/debug.h | 6 ++++++ kernel/debug/kdb/kdb_bt.c | 1 + kernel/debug/kdb/kdb_main.c | 1 + kernel/hung_task.c | 1 + kernel/latencytop.c | 1 + kernel/locking/mutex.c | 1 + kernel/locking/rtmutex-debug.c | 1 + kernel/locking/rtmutex.c | 1 + kernel/locking/rwsem-spinlock.c | 1 + kernel/locking/rwsem-xadd.c | 1 + kernel/locking/rwsem.c | 1 + kernel/locking/semaphore.c | 1 + kernel/panic.c | 1 + kernel/power/process.c | 1 + kernel/printk/printk.c | 1 + kernel/rcu/tree.c | 1 + kernel/rcu/tree_plugin.h | 1 + kernel/rcu/update.c | 1 + kernel/sched/completion.c | 1 + kernel/sched/sched.h | 2 +- kernel/sched/wait.c | 1 + kernel/signal.c | 1 + kernel/time/alarmtimer.c | 1 + kernel/time/hrtimer.c | 1 + kernel/time/timer.c | 1 + kernel/watchdog.c | 1 + kernel/watchdog_hld.c | 2 ++ lib/dump_stack.c | 1 + lib/nmi_backtrace.c | 1 + 166 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 include/linux/sched/debug.h (limited to 'arch/x86') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index bca963a4aa48..88f7a974d311 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/alpha/kernel/traps.c b/arch/alpha/kernel/traps.c index 6448ab00043d..b137390e87e7 100644 --- a/arch/alpha/kernel/traps.c +++ b/arch/alpha/kernel/traps.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arc/kernel/ctx_sw.c b/arch/arc/kernel/ctx_sw.c index 6f4cb0dab1b9..9e1ae9d41925 100644 --- a/arch/arc/kernel/ctx_sw.c +++ b/arch/arc/kernel/ctx_sw.c @@ -16,6 +16,7 @@ #include #include +#include #ifdef CONFIG_ARC_PLAT_EZNPS #include #endif diff --git a/arch/arc/kernel/stacktrace.c b/arch/arc/kernel/stacktrace.c index b9192a653b7e..74315f302971 100644 --- a/arch/arc/kernel/stacktrace.c +++ b/arch/arc/kernel/stacktrace.c @@ -28,6 +28,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/arc/kernel/troubleshoot.c b/arch/arc/kernel/troubleshoot.c index 0c48907f56a9..f9caf79186d4 100644 --- a/arch/arc/kernel/troubleshoot.c +++ b/arch/arc/kernel/troubleshoot.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 91d2d5b01414..03d46395d1ff 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/arch/arm/kernel/stacktrace.c b/arch/arm/kernel/stacktrace.c index 92b72375c4c7..3a2fa203637a 100644 --- a/arch/arm/kernel/stacktrace.c +++ b/arch/arm/kernel/stacktrace.c @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index dc5f1a22b3c9..a9dad001b97d 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mm/alignment.c b/arch/arm/mm/alignment.c index d7fe2de9cc9e..2c96190e018b 100644 --- a/arch/arm/mm/alignment.c +++ b/arch/arm/mm/alignment.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mm/fault.c b/arch/arm/mm/fault.c index 520c7778d330..ff8b0aa2dfde 100644 --- a/arch/arm/mm/fault.c +++ b/arch/arm/mm/fault.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/probes/kprobes/core.c b/arch/arm/probes/kprobes/core.c index a4ec240ee7ba..b6dc9d838a9a 100644 --- a/arch/arm/probes/kprobes/core.c +++ b/arch/arm/probes/kprobes/core.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/kernel/probes/kprobes.c b/arch/arm64/kernel/probes/kprobes.c index f0593c92279b..2a07aae5b8a2 100644 --- a/arch/arm64/kernel/probes/kprobes.c +++ b/arch/arm64/kernel/probes/kprobes.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 1ad48f93abdd..40a16cdd9873 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 8a552a33c6ef..7597e42feeea 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 5b8779a849a2..5a5d83791837 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -30,6 +30,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/mm/fault.c b/arch/arm64/mm/fault.c index 30dd60ab8a65..4bf899fb451b 100644 --- a/arch/arm64/mm/fault.c +++ b/arch/arm64/mm/fault.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/avr32/kernel/nmi_debug.c b/arch/avr32/kernel/nmi_debug.c index 3414b8566c29..25823049bb99 100644 --- a/arch/avr32/kernel/nmi_debug.c +++ b/arch/avr32/kernel/nmi_debug.c @@ -9,6 +9,7 @@ #include #include #include +#include #include diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 68e5b9dac059..dac022d88d9d 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -6,6 +6,7 @@ * published by the Free Software Foundation. */ #include +#include #include #include #include diff --git a/arch/blackfin/kernel/dumpstack.c b/arch/blackfin/kernel/dumpstack.c index 95ba6d9e9a3d..3c992c1f8ef2 100644 --- a/arch/blackfin/kernel/dumpstack.c +++ b/arch/blackfin/kernel/dumpstack.c @@ -10,6 +10,8 @@ #include #include #include +#include + #include /* diff --git a/arch/blackfin/kernel/early_printk.c b/arch/blackfin/kernel/early_printk.c index 61fbd2de993d..4b89af9243d3 100644 --- a/arch/blackfin/kernel/early_printk.c +++ b/arch/blackfin/kernel/early_printk.c @@ -8,6 +8,7 @@ */ #include +#include #include #include #include diff --git a/arch/blackfin/kernel/nmi.c b/arch/blackfin/kernel/nmi.c index 9919d29287dc..633c37083e87 100644 --- a/arch/blackfin/kernel/nmi.c +++ b/arch/blackfin/kernel/nmi.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 4aa5545c4fde..e95d094c20a6 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c index 01e546ad2f7a..23c05b8effb3 100644 --- a/arch/blackfin/kernel/trace.c +++ b/arch/blackfin/kernel/trace.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/kernel/traps.c b/arch/blackfin/kernel/traps.c index 32676d7721b1..a323a40a46e9 100644 --- a/arch/blackfin/kernel/traps.c +++ b/arch/blackfin/kernel/traps.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/mach-common/ints-priority.c b/arch/blackfin/mach-common/ints-priority.c index 4986b4fbcee9..13e94bf9d8ba 100644 --- a/arch/blackfin/mach-common/ints-priority.c +++ b/arch/blackfin/mach-common/ints-priority.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/mm/isram-driver.c b/arch/blackfin/mm/isram-driver.c index 7e2e674ed444..aaa1e64b753b 100644 --- a/arch/blackfin/mm/isram-driver.c +++ b/arch/blackfin/mm/isram-driver.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/arch/c6x/kernel/traps.c b/arch/c6x/kernel/traps.c index dcc2c2f6d67c..09b8a40d5680 100644 --- a/arch/c6x/kernel/traps.c +++ b/arch/c6x/kernel/traps.c @@ -10,6 +10,7 @@ */ #include #include +#include #include #include diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 96e5afef6b47..068a40374200 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include diff --git a/arch/cris/arch-v10/kernel/traps.c b/arch/cris/arch-v10/kernel/traps.c index 96d004fe9740..c0a501f29bd8 100644 --- a/arch/cris/arch-v10/kernel/traps.c +++ b/arch/cris/arch-v10/kernel/traps.c @@ -10,6 +10,8 @@ #include #include +#include + #include #include diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index 4d1afa9f9fd3..613c2d71bf10 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/arch/cris/arch-v32/kernel/traps.c b/arch/cris/arch-v32/kernel/traps.c index ad6174e217c9..a34256515036 100644 --- a/arch/cris/arch-v32/kernel/traps.c +++ b/arch/cris/arch-v32/kernel/traps.c @@ -5,6 +5,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/cris/kernel/irq.c b/arch/cris/kernel/irq.c index 694850e8f077..09b864f46f8a 100644 --- a/arch/cris/kernel/irq.c +++ b/arch/cris/kernel/irq.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include diff --git a/arch/cris/kernel/stacktrace.c b/arch/cris/kernel/stacktrace.c index 99838c74456d..f1cc3aaacd8d 100644 --- a/arch/cris/kernel/stacktrace.c +++ b/arch/cris/kernel/stacktrace.c @@ -1,5 +1,5 @@ #include -#include +#include #include #include diff --git a/arch/cris/kernel/traps.c b/arch/cris/kernel/traps.c index b2a312a7afc6..a01636a12a6e 100644 --- a/arch/cris/kernel/traps.c +++ b/arch/cris/kernel/traps.c @@ -15,6 +15,7 @@ #include #include #include +#include #ifdef CONFIG_KALLSYMS #include #endif diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index b306241c4ef2..cf4c34c09ab3 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/frv/kernel/traps.c b/arch/frv/kernel/traps.c index 43c134d6081c..ce29991e4219 100644 --- a/arch/frv/kernel/traps.c +++ b/arch/frv/kernel/traps.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 891974a11704..d7cabf373fe3 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index 044a36125846..ee7e3ce40d78 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index d9edfd3fc52a..3bdd9592d025 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -19,6 +19,7 @@ */ #include +#include #include #include #include diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index 4496bcf605ef..b55f13c70b34 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include diff --git a/arch/hexagon/kernel/vm_events.c b/arch/hexagon/kernel/vm_events.c index 741aaa917cda..04f57ef22009 100644 --- a/arch/hexagon/kernel/vm_events.c +++ b/arch/hexagon/kernel/vm_events.c @@ -19,6 +19,7 @@ */ #include +#include #include #include #include diff --git a/arch/ia64/hp/sim/simserial.c b/arch/ia64/hp/sim/simserial.c index 21fd50def270..de8cba121013 100644 --- a/arch/ia64/hp/sim/simserial.c +++ b/arch/ia64/hp/sim/simserial.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index 5ac51069e453..f9fe3ac64242 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -73,6 +73,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 52deab683ba1..804b251ee5d1 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/traps.c b/arch/ia64/kernel/traps.c index 48ba46b025e1..7b1fe9462158 100644 --- a/arch/ia64/kernel/traps.c +++ b/arch/ia64/kernel/traps.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include /* For unblank_screen() */ #include diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index e0568bee60c0..1a227c1fdb1a 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/arch/m32r/kernel/traps.c b/arch/m32r/kernel/traps.c index c3c5fdfae920..7c05bb393597 100644 --- a/arch/m32r/kernel/traps.c +++ b/arch/m32r/kernel/traps.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index f0a8e9b332cd..b4a4675f4119 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/m68k/kernel/traps.c b/arch/m68k/kernel/traps.c index 558f38402737..a926d2c88898 100644 --- a/arch/m68k/kernel/traps.c +++ b/arch/m68k/kernel/traps.c @@ -19,6 +19,7 @@ */ #include +#include #include #include #include diff --git a/arch/m68k/mac/macints.c b/arch/m68k/mac/macints.c index b5cd06df71fd..9637dee90dac 100644 --- a/arch/m68k/mac/macints.c +++ b/arch/m68k/mac/macints.c @@ -110,6 +110,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c index 35062796edf2..2e611bd37515 100644 --- a/arch/metag/kernel/process.c +++ b/arch/metag/kernel/process.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/metag/kernel/stacktrace.c b/arch/metag/kernel/stacktrace.c index 5510361d5bea..4f806d66a58c 100644 --- a/arch/metag/kernel/stacktrace.c +++ b/arch/metag/kernel/stacktrace.c @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c index 17b2e2e38d5a..5ce67f9124aa 100644 --- a/arch/metag/kernel/traps.c +++ b/arch/metag/kernel/traps.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/metag/mm/fault.c b/arch/metag/mm/fault.c index c765b3621b9b..5055477486b6 100644 --- a/arch/metag/mm/fault.c +++ b/arch/metag/mm/fault.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/microblaze/kernel/exceptions.c b/arch/microblaze/kernel/exceptions.c index 42dd12a62ff5..e6f338d0496b 100644 --- a/arch/microblaze/kernel/exceptions.c +++ b/arch/microblaze/kernel/exceptions.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index b2dd37196b3b..8c5fdb2e2850 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/microblaze/kernel/traps.c b/arch/microblaze/kernel/traps.c index cb619533a192..45bbba9d919f 100644 --- a/arch/microblaze/kernel/traps.c +++ b/arch/microblaze/kernel/traps.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 803e255b6fc3..97574cdb532d 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c index 506021f62549..986f910961d9 100644 --- a/arch/mips/kernel/stacktrace.c +++ b/arch/mips/kernel/stacktrace.c @@ -4,6 +4,7 @@ * Copyright (C) 2006 Atsushi Nemoto */ #include +#include #include #include #include diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 49c6df20672a..1a9366a157cb 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/sgi-ip22/ip28-berr.c b/arch/mips/sgi-ip22/ip28-berr.c index 9960a8302eac..1f2a5bc4779e 100644 --- a/arch/mips/sgi-ip22/ip28-berr.c +++ b/arch/mips/sgi-ip22/ip28-berr.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/sgi-ip27/ip27-berr.c b/arch/mips/sgi-ip27/ip27-berr.c index f8919b6a24c8..d12879eb2b1f 100644 --- a/arch/mips/sgi-ip27/ip27-berr.c +++ b/arch/mips/sgi-ip27/ip27-berr.c @@ -11,6 +11,7 @@ #include #include /* for SIGBUS */ #include /* schow_regs(), force_sig() */ +#include #include #include diff --git a/arch/mips/sgi-ip32/ip32-berr.c b/arch/mips/sgi-ip32/ip32-berr.c index ba8f46d80ab8..57d8c7486fe6 100644 --- a/arch/mips/sgi-ip32/ip32-berr.c +++ b/arch/mips/sgi-ip32/ip32-berr.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/sgi-ip32/ip32-irq.c b/arch/mips/sgi-ip32/ip32-irq.c index 838d8589a1c0..a6a0ff7f5aed 100644 --- a/arch/mips/sgi-ip32/ip32-irq.c +++ b/arch/mips/sgi-ip32/ip32-irq.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index e5def2217f72..a1e2f8301d94 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mn10300/kernel/traps.c b/arch/mn10300/kernel/traps.c index a7a987c7954f..800fd0801969 100644 --- a/arch/mn10300/kernel/traps.c +++ b/arch/mn10300/kernel/traps.c @@ -10,6 +10,7 @@ * 2 of the Licence, or (at your option) any later version. */ #include +#include #include #include #include diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 2f8c74f93e70..3ad87a6b119b 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -14,6 +14,7 @@ #include #include +#include #include #include diff --git a/arch/nios2/kernel/traps.c b/arch/nios2/kernel/traps.c index 72ed30a93c85..8184e7d6b385 100644 --- a/arch/nios2/kernel/traps.c +++ b/arch/nios2/kernel/traps.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include diff --git a/arch/nios2/mm/fault.c b/arch/nios2/mm/fault.c index e7a14e1e0d6b..b804dd06ea1c 100644 --- a/arch/nios2/mm/fault.c +++ b/arch/nios2/mm/fault.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 6e9d1cb519f2..899339dac938 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/arch/openrisc/kernel/traps.c b/arch/openrisc/kernel/traps.c index 7e81ad258bca..2354ab88d948 100644 --- a/arch/openrisc/kernel/traps.c +++ b/arch/openrisc/kernel/traps.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/arch/parisc/kernel/pa7300lc.c b/arch/parisc/kernel/pa7300lc.c index 8a89780223aa..9b245fc67560 100644 --- a/arch/parisc/kernel/pa7300lc.c +++ b/arch/parisc/kernel/pa7300lc.c @@ -5,6 +5,7 @@ * Copyright (C) 2000 Philipp Rumpf */ #include +#include #include #include #include diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index ea6603ee8d24..8c283caf2b4d 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/parisc/kernel/signal.c b/arch/parisc/kernel/signal.c index e58925ac64d1..9e03296641d7 100644 --- a/arch/parisc/kernel/signal.c +++ b/arch/parisc/kernel/signal.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include diff --git a/arch/parisc/kernel/traps.c b/arch/parisc/kernel/traps.c index 378df9207406..991654c88eec 100644 --- a/arch/parisc/kernel/traps.c +++ b/arch/parisc/kernel/traps.c @@ -11,6 +11,7 @@ */ #include +#include #include #include #include diff --git a/arch/parisc/kernel/unaligned.c b/arch/parisc/kernel/unaligned.c index a08ab481e556..e36f7b75ab07 100644 --- a/arch/parisc/kernel/unaligned.c +++ b/arch/parisc/kernel/unaligned.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/parisc/mm/fault.c b/arch/parisc/mm/fault.c index 1a0b4f63f0e9..c3cac4ddfe9c 100644 --- a/arch/parisc/mm/fault.c +++ b/arch/parisc/mm/fault.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 4379a079b3c2..76f58b87dcb2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/stacktrace.c b/arch/powerpc/kernel/stacktrace.c index 4f24606afc3f..66711958493c 100644 --- a/arch/powerpc/kernel/stacktrace.c +++ b/arch/powerpc/kernel/stacktrace.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/traps.c b/arch/powerpc/kernel/traps.c index e6cc56b61d01..ff365f9de27a 100644 --- a/arch/powerpc/kernel/traps.c +++ b/arch/powerpc/kernel/traps.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 55d4fe174fd9..72c584d62f59 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 54281660582c..ed99ff911b67 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/stacktrace.c b/arch/s390/kernel/stacktrace.c index 0085b2d8ed7d..e66687dc6144 100644 --- a/arch/s390/kernel/stacktrace.c +++ b/arch/s390/kernel/stacktrace.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include diff --git a/arch/s390/kernel/traps.c b/arch/s390/kernel/traps.c index 283ad7840335..f787b9d8f54c 100644 --- a/arch/s390/kernel/traps.c +++ b/arch/s390/kernel/traps.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/mm/fault.c b/arch/s390/mm/fault.c index bb5560eb2435..5845d3028ffc 100644 --- a/arch/s390/mm/fault.c +++ b/arch/s390/mm/fault.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index fa624f30f783..8a54d320fb41 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -25,6 +25,7 @@ #include #include +#include #include #include diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index 8dfe645bcc4b..d00cab2f50f9 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/nmi_debug.c b/arch/sh/kernel/nmi_debug.c index ff0abbd1e652..730d928f0d12 100644 --- a/arch/sh/kernel/nmi_debug.c +++ b/arch/sh/kernel/nmi_debug.c @@ -9,6 +9,7 @@ #include #include #include +#include #include enum nmi_action { diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 51741850a715..5098274e0f23 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -15,6 +15,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index e0b271bffd6a..ced21a417d9d 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/stacktrace.c b/arch/sh/kernel/stacktrace.c index bf989e063a0c..7a73d2763e1b 100644 --- a/arch/sh/kernel/stacktrace.c +++ b/arch/sh/kernel/stacktrace.c @@ -10,6 +10,7 @@ * for more details. */ #include +#include #include #include #include diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index 3036dee854d1..bc5c9f347b9e 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/traps_64.c b/arch/sh/kernel/traps_64.c index 00835edb6e20..014fb08cf133 100644 --- a/arch/sh/kernel/traps_64.c +++ b/arch/sh/kernel/traps_64.c @@ -10,6 +10,7 @@ * for more details. */ #include +#include #include #include #include diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 48ffc3e7d1dd..237a471c8ed5 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index d249ca10b203..98f6ff6eaa55 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/stacktrace.c b/arch/sparc/kernel/stacktrace.c index e78386a0029f..be4c14cccc05 100644 --- a/arch/sparc/kernel/stacktrace.c +++ b/arch/sparc/kernel/stacktrace.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/arch/sparc/kernel/sun4m_irq.c b/arch/sparc/kernel/sun4m_irq.c index da737c712fa8..aa84da0b2d30 100644 --- a/arch/sparc/kernel/sun4m_irq.c +++ b/arch/sparc/kernel/sun4m_irq.c @@ -10,6 +10,7 @@ */ #include +#include #include #include diff --git a/arch/sparc/kernel/sys_sparc_32.c b/arch/sparc/kernel/sys_sparc_32.c index ff3573059936..7aecb239626d 100644 --- a/arch/sparc/kernel/sys_sparc_32.c +++ b/arch/sparc/kernel/sys_sparc_32.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/sys_sparc_64.c b/arch/sparc/kernel/sys_sparc_64.c index c521ee277083..ef4520efc813 100644 --- a/arch/sparc/kernel/sys_sparc_64.c +++ b/arch/sparc/kernel/sys_sparc_64.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index ecddac5a4c96..26740a2f285d 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -10,6 +10,7 @@ */ #include /* for jiffies */ +#include #include #include #include diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index e022d7b00390..4ff4c35f76b2 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/sparc/mm/fault_64.c b/arch/sparc/mm/fault_64.c index 643c149a3151..b84c4dd14954 100644 --- a/arch/sparc/mm/fault_64.c +++ b/arch/sparc/mm/fault_64.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/tile/include/asm/stack.h b/arch/tile/include/asm/stack.h index c3cb42615a9f..3573325e340b 100644 --- a/arch/tile/include/asm/stack.h +++ b/arch/tile/include/asm/stack.h @@ -17,6 +17,8 @@ #include #include +#include + #include #include #include diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index c84c54a1ac55..557b4c8435d0 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 87299a6cfec8..8cc92ae6eb27 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -14,6 +14,7 @@ */ #include +#include #include #include #include diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index 22bbbd3ff4a3..a1c0787fe9d8 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include diff --git a/arch/tile/kernel/traps.c b/arch/tile/kernel/traps.c index 39f427bb0de2..54804866f238 100644 --- a/arch/tile/kernel/traps.c +++ b/arch/tile/kernel/traps.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c index f229e979584e..142acae78316 100644 --- a/arch/tile/kernel/unaligned.c +++ b/arch/tile/kernel/unaligned.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 709f8e9ba3e9..005c91c2adca 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/arch/um/drivers/mconsole_kern.c b/arch/um/drivers/mconsole_kern.c index 8a4c72af3bc0..af326fb6510d 100644 --- a/arch/um/drivers/mconsole_kern.c +++ b/arch/um/drivers/mconsole_kern.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index 078630d6448c..c6c45ea4021f 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/kernel/sysrq.c b/arch/um/kernel/sysrq.c index aa1b56f5ac68..34ae555c3e70 100644 --- a/arch/um/kernel/sysrq.c +++ b/arch/um/kernel/sysrq.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/um/kernel/trap.c b/arch/um/kernel/trap.c index 9711ae4aaa6a..59158871b9fc 100644 --- a/arch/um/kernel/trap.c +++ b/arch/um/kernel/trap.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index d7c6b676b3a5..c1a0eec88f1f 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -13,6 +13,7 @@ #include #include +#include #include #include #include diff --git a/arch/unicore32/kernel/stacktrace.c b/arch/unicore32/kernel/stacktrace.c index b34030bdabe3..9976e767d51c 100644 --- a/arch/unicore32/kernel/stacktrace.c +++ b/arch/unicore32/kernel/stacktrace.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 7f5e06f9a202..506e1a2127c6 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/unicore32/mm/alignment.c b/arch/unicore32/mm/alignment.c index 24e836023e6c..3a7f6faa8794 100644 --- a/arch/unicore32/mm/alignment.c +++ b/arch/unicore32/mm/alignment.c @@ -15,6 +15,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/amd_gart_64.c b/arch/x86/kernel/amd_gart_64.c index 82dfe32faaf4..df083efe6ee0 100644 --- a/arch/x86/kernel/amd_gart_64.c +++ b/arch/x86/kernel/amd_gart_64.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/doublefault.c b/arch/x86/kernel/doublefault.c index b2f7207ba86c..f9c324e08d85 100644 --- a/arch/x86/kernel/doublefault.c +++ b/arch/x86/kernel/doublefault.c @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 0cfd01d2754c..7b9e7e68f316 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index bb3b5b9a6899..b0b3a3df7c20 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -2,6 +2,7 @@ * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs */ +#include #include #include #include diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index fac189efcc34..a8b117e93b46 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -2,6 +2,7 @@ * Copyright (C) 1991, 1992 Linus Torvalds * Copyright (C) 2000, 2001, 2002 Andi Kleen, SuSE Labs */ +#include #include #include #include diff --git a/arch/x86/kernel/kprobes/core.c b/arch/x86/kernel/kprobes/core.c index 520b8dfe1640..6384eb754a58 100644 --- a/arch/x86/kernel/kprobes/core.c +++ b/arch/x86/kernel/kprobes/core.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index d17b1a940add..f088ea4c66e7 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 6395e0cd7dd6..505be5589e52 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 0653788026e2..330cae0025d0 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -4,6 +4,7 @@ * Copyright (C) 2006-2009 Red Hat, Inc., Ingo Molnar */ #include +#include #include #include #include diff --git a/arch/x86/mm/extable.c b/arch/x86/mm/extable.c index 61a7e9ea9aa1..35ea061010a1 100644 --- a/arch/x86/mm/extable.c +++ b/arch/x86/mm/extable.c @@ -1,5 +1,7 @@ #include #include +#include + #include #include diff --git a/arch/x86/platform/uv/uv_nmi.c b/arch/x86/platform/uv/uv_nmi.c index 9743d0ccfec6..c34bd8233f7c 100644 --- a/arch/x86/platform/uv/uv_nmi.c +++ b/arch/x86/platform/uv/uv_nmi.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/um/sysrq_32.c b/arch/x86/um/sysrq_32.c index 16ee0e450e3e..f2383484840d 100644 --- a/arch/x86/um/sysrq_32.c +++ b/arch/x86/um/sysrq_32.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/um/sysrq_64.c b/arch/x86/um/sysrq_64.c index 38b4e4abd0f8..903ad91b624f 100644 --- a/arch/x86/um/sysrq_64.c +++ b/arch/x86/um/sysrq_64.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index 826d25104846..afaa3588d869 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/arch/xtensa/kernel/traps.c b/arch/xtensa/kernel/traps.c index 84abd66e680d..d8bb2e62393e 100644 --- a/arch/xtensa/kernel/traps.c +++ b/arch/xtensa/kernel/traps.c @@ -25,6 +25,7 @@ #include #include +#include #include #include #include diff --git a/drivers/base/power/main.c b/drivers/base/power/main.c index 249e0304597f..9faee1c893e5 100644 --- a/drivers/base/power/main.c +++ b/drivers/base/power/main.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index 65db0aeb3d80..e5f0c7d0fe8d 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -16,6 +16,8 @@ #include #include +#include +#include #include #include #include diff --git a/drivers/tty/tty_ldsem.c b/drivers/tty/tty_ldsem.c index 9229de43e19d..4f6b1b10b537 100644 --- a/drivers/tty/tty_ldsem.c +++ b/drivers/tty/tty_ldsem.c @@ -32,6 +32,7 @@ #include #include #include +#include #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/drivers/tty/vt/keyboard.c b/drivers/tty/vt/keyboard.c index 6b3a2c00974d..c5f0fc906136 100644 --- a/drivers/tty/vt/keyboard.c +++ b/drivers/tty/vt/keyboard.c @@ -27,6 +27,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/fs/proc/base.c b/fs/proc/base.c index 4c5fa704e38c..5914fed3712d 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -88,6 +88,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_HARDWALL diff --git a/include/linux/sched/debug.h b/include/linux/sched/debug.h new file mode 100644 index 000000000000..55bc0cd35fd5 --- /dev/null +++ b/include/linux/sched/debug.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_DEBUG_H +#define _LINUX_SCHED_DEBUG_H + +#include + +#endif /* _LINUX_SCHED_DEBUG_H */ diff --git a/kernel/debug/kdb/kdb_bt.c b/kernel/debug/kdb/kdb_bt.c index 9b976a42376d..6ad4a9fcbd6f 100644 --- a/kernel/debug/kdb/kdb_bt.c +++ b/kernel/debug/kdb/kdb_bt.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include "kdb_private.h" diff --git a/kernel/debug/kdb/kdb_main.c b/kernel/debug/kdb/kdb_main.c index bf48a492b4be..c8146d53ca67 100644 --- a/kernel/debug/kdb/kdb_main.c +++ b/kernel/debug/kdb/kdb_main.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/hung_task.c b/kernel/hung_task.c index 129247e56902..f0f8e2a8496f 100644 --- a/kernel/hung_task.c +++ b/kernel/hung_task.c @@ -17,6 +17,7 @@ #include #include #include +#include #include diff --git a/kernel/latencytop.c b/kernel/latencytop.c index b5c30d9f46c5..545839b838d6 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include diff --git a/kernel/locking/mutex.c b/kernel/locking/mutex.c index 82ff5726bc1b..198527a62149 100644 --- a/kernel/locking/mutex.c +++ b/kernel/locking/mutex.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/locking/rtmutex-debug.c b/kernel/locking/rtmutex-debug.c index 62b6cee8ea7f..97ee9df32e0f 100644 --- a/kernel/locking/rtmutex-debug.c +++ b/kernel/locking/rtmutex-debug.c @@ -18,6 +18,7 @@ */ #include #include +#include #include #include #include diff --git a/kernel/locking/rtmutex.c b/kernel/locking/rtmutex.c index 1c8c8707853a..6edc32ecd9c5 100644 --- a/kernel/locking/rtmutex.c +++ b/kernel/locking/rtmutex.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "rtmutex_common.h" diff --git a/kernel/locking/rwsem-spinlock.c b/kernel/locking/rwsem-spinlock.c index 91e7bc8e9d5a..7bc24d477805 100644 --- a/kernel/locking/rwsem-spinlock.c +++ b/kernel/locking/rwsem-spinlock.c @@ -7,6 +7,7 @@ */ #include #include +#include #include enum rwsem_waiter_type { diff --git a/kernel/locking/rwsem-xadd.c b/kernel/locking/rwsem-xadd.c index 9bd6e768164e..34e727f18e49 100644 --- a/kernel/locking/rwsem-xadd.c +++ b/kernel/locking/rwsem-xadd.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include "rwsem.h" diff --git a/kernel/locking/rwsem.c b/kernel/locking/rwsem.c index 45ba475d4be3..90a74ccd85a4 100644 --- a/kernel/locking/rwsem.c +++ b/kernel/locking/rwsem.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/locking/semaphore.c b/kernel/locking/semaphore.c index 9512e37637dc..561acdd39960 100644 --- a/kernel/locking/semaphore.c +++ b/kernel/locking/semaphore.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/panic.c b/kernel/panic.c index 3ec16e603e88..a58932b41700 100644 --- a/kernel/panic.c +++ b/kernel/panic.c @@ -9,6 +9,7 @@ * to indicate a major problem. */ #include +#include #include #include #include diff --git a/kernel/power/process.c b/kernel/power/process.c index 2fba066e125f..85e2915d8961 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 47050eedc206..3caaaa04b92e 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include diff --git a/kernel/rcu/tree.c b/kernel/rcu/tree.c index e456327a63d6..50fee7689e71 100644 --- a/kernel/rcu/tree.c +++ b/kernel/rcu/tree.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/rcu/tree_plugin.h b/kernel/rcu/tree_plugin.h index 9dabb04003be..0a62a8f1caac 100644 --- a/kernel/rcu/tree_plugin.h +++ b/kernel/rcu/tree_plugin.h @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include "../time/tick-internal.h" diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c index da128deb10ec..55c8530316c7 100644 --- a/kernel/rcu/update.c +++ b/kernel/rcu/update.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched/completion.c b/kernel/sched/completion.c index b294a8ee2842..53f9558fa925 100644 --- a/kernel/sched/completion.c +++ b/kernel/sched/completion.c @@ -12,6 +12,7 @@ */ #include +#include #include /** diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 5979f47c422c..e1e819f731b2 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -1830,7 +1831,6 @@ extern void print_rt_stats(struct seq_file *m, int cpu); extern void print_dl_stats(struct seq_file *m, int cpu); extern void print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); - #ifdef CONFIG_NUMA_BALANCING extern void show_numa_stats(struct task_struct *p, struct seq_file *m); diff --git a/kernel/sched/wait.c b/kernel/sched/wait.c index 1fedfcf6fc9b..4d2ea6f25568 100644 --- a/kernel/sched/wait.c +++ b/kernel/sched/wait.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index 762fcf64f0c3..15092fd8c7b1 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/alarmtimer.c b/kernel/time/alarmtimer.c index 04939053c823..ce3a31e8eb36 100644 --- a/kernel/time/alarmtimer.c +++ b/kernel/time/alarmtimer.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c index aed2207f5b2d..ec08f527d7ee 100644 --- a/kernel/time/hrtimer.c +++ b/kernel/time/hrtimer.c @@ -48,6 +48,7 @@ #include #include #include +#include #include #include diff --git a/kernel/time/timer.c b/kernel/time/timer.c index d7999cb06229..1dc0256bfb6e 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 52718f4512e9..03e0b69bb5bf 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/kernel/watchdog_hld.c b/kernel/watchdog_hld.c index b5de262a9eb9..54a427d1f344 100644 --- a/kernel/watchdog_hld.c +++ b/kernel/watchdog_hld.c @@ -13,6 +13,8 @@ #include #include +#include + #include #include diff --git a/lib/dump_stack.c b/lib/dump_stack.c index c30d07e99dba..625375e7f11f 100644 --- a/lib/dump_stack.c +++ b/lib/dump_stack.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 5f7999eacad5..4e8a30d1c22f 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef arch_trigger_cpumask_backtrace /* For reliability, we're prepared to waste bits here. */ -- cgit v1.2.3 From ef8bd77f332bb0a4e467d7171bbfc6c57aa08a88 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:36 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm/kernel/smp.c | 1 + arch/arm64/include/asm/mmu_context.h | 1 + arch/arm64/kernel/smp.c | 1 + arch/ia64/kernel/process.c | 1 + arch/metag/kernel/smp.c | 1 + arch/mips/cavium-octeon/smp.c | 1 + arch/mips/kernel/smp-bmips.c | 1 + arch/mips/kernel/smp-cps.c | 1 + arch/mips/loongson64/loongson-3/smp.c | 1 + arch/powerpc/platforms/85xx/smp.c | 1 + arch/powerpc/platforms/powermac/smp.c | 1 + arch/powerpc/platforms/powernv/smp.c | 1 + arch/powerpc/platforms/pseries/hotplug-cpu.c | 1 + arch/s390/kernel/smp.c | 1 + arch/sh/kernel/smp.c | 1 + arch/sparc/kernel/smp_64.c | 1 + arch/x86/kernel/smpboot.c | 1 + arch/xtensa/kernel/smp.c | 1 + include/linux/sched/hotplug.h | 6 ++++++ kernel/cpu.c | 1 + kernel/sched/core.c | 1 + kernel/sched/sched.h | 4 +++- 22 files changed, 29 insertions(+), 1 deletion(-) create mode 100644 include/linux/sched/hotplug.h (limited to 'arch/x86') diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 5a07c5a4b894..2083a5370308 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 1ef40d82cfd3..3c9f7d18a7e6 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -25,6 +25,7 @@ #include #include +#include #include #include diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 827d52d78b67..f66e58523b96 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 804b251ee5d1..2204ae450d65 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index c622293254e4..198eda75846b 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/cavium-octeon/smp.c b/arch/mips/cavium-octeon/smp.c index 4355a4cf4d74..4b94b7fbafa3 100644 --- a/arch/mips/cavium-octeon/smp.c +++ b/arch/mips/cavium-octeon/smp.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/kernel/smp-bmips.c b/arch/mips/kernel/smp-bmips.c index 16e37a28f876..3daa2cae50b0 100644 --- a/arch/mips/kernel/smp-bmips.c +++ b/arch/mips/kernel/smp-bmips.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/mips/kernel/smp-cps.c b/arch/mips/kernel/smp-cps.c index a2544c2394e4..1d3188c23bb8 100644 --- a/arch/mips/kernel/smp-cps.c +++ b/arch/mips/kernel/smp-cps.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c index cfcf240cedbe..66ede5b11355 100644 --- a/arch/mips/loongson64/loongson-3/smp.c +++ b/arch/mips/loongson64/loongson-3/smp.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/85xx/smp.c b/arch/powerpc/platforms/85xx/smp.c index a83a6d26090d..078097a0b09d 100644 --- a/arch/powerpc/platforms/85xx/smp.c +++ b/arch/powerpc/platforms/85xx/smp.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/powermac/smp.c b/arch/powerpc/platforms/powermac/smp.c index c9eb7d6540ea..746ca7321b03 100644 --- a/arch/powerpc/platforms/powermac/smp.c +++ b/arch/powerpc/platforms/powermac/smp.c @@ -23,6 +23,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/powernv/smp.c b/arch/powerpc/platforms/powernv/smp.c index e39e6c428af1..8b67e1eefb5c 100644 --- a/arch/powerpc/platforms/powernv/smp.c +++ b/arch/powerpc/platforms/powernv/smp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/platforms/pseries/hotplug-cpu.c b/arch/powerpc/platforms/pseries/hotplug-cpu.c index a1b63e00b2f7..7bc0e91f8715 100644 --- a/arch/powerpc/platforms/pseries/hotplug-cpu.c +++ b/arch/powerpc/platforms/pseries/hotplug-cpu.c @@ -24,6 +24,7 @@ #include #include #include /* for idle_task_exit */ +#include #include #include #include diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index d0a74d7ce433..b5766e03cdcb 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index edc4769b047e..4abf119c129c 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 8e3e13924594..15052d364e04 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index fe7a66e6b5a0..f3eb266d75ff 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -46,6 +46,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index fcea72019df7..f2b3e1725349 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h new file mode 100644 index 000000000000..34670ed24894 --- /dev/null +++ b/include/linux/sched/hotplug.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_HOTPLUG_H +#define _LINUX_SCHED_HOTPLUG_H + +#include + +#endif /* _LINUX_SCHED_HOTPLUG_H */ diff --git a/kernel/cpu.c b/kernel/cpu.c index 0aae3183b029..78c206579d01 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 11a0684a29a7..956383844116 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index e1e819f731b2..0974eb2ef50d 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -3,6 +3,7 @@ #include #include #include +#include #include #include #include @@ -12,8 +13,9 @@ #include #include #include +#include + #include -#include #include #include #include -- cgit v1.2.3 From 299300258d1bc4e997b7db340a2e06636757fe2e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:36 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/kernel/process.c | 1 + arch/arc/kernel/process.c | 2 ++ arch/arm/kernel/process.c | 1 + arch/arm/mm/init.c | 1 + arch/arm64/kernel/process.c | 1 + arch/avr32/kernel/process.c | 1 + arch/blackfin/kernel/process.c | 1 + arch/blackfin/kernel/trace.c | 1 + arch/c6x/kernel/process.c | 1 + arch/cris/arch-v10/kernel/process.c | 1 + arch/cris/arch-v32/kernel/process.c | 1 + arch/cris/kernel/process.c | 1 + arch/frv/kernel/process.c | 1 + arch/frv/mm/mmu-context.c | 1 + arch/h8300/kernel/process.c | 1 + arch/hexagon/kernel/process.c | 1 + arch/ia64/kernel/mca.c | 1 + arch/ia64/kernel/perfmon.c | 1 + arch/ia64/kernel/process.c | 1 + arch/ia64/kernel/ptrace.c | 1 + arch/m32r/kernel/process.c | 1 + arch/m32r/kernel/smpboot.c | 1 + arch/m68k/kernel/process.c | 1 + arch/metag/kernel/process.c | 1 + arch/metag/kernel/traps.c | 1 + arch/microblaze/kernel/process.c | 1 + arch/mips/kernel/mips-mt-fpaff.c | 1 + arch/mips/kernel/process.c | 1 + arch/mn10300/kernel/process.c | 1 + arch/mn10300/kernel/smp.c | 1 + arch/nios2/kernel/process.c | 1 + arch/openrisc/kernel/process.c | 1 + arch/parisc/kernel/process.c | 1 + arch/powerpc/kernel/process.c | 1 + arch/s390/kernel/process.c | 1 + arch/s390/kernel/setup.c | 1 + arch/score/kernel/process.c | 1 + arch/sh/kernel/cpu/fpu.c | 1 + arch/sh/kernel/process_32.c | 1 + arch/sh/kernel/process_64.c | 1 + arch/sh/mm/asids-debugfs.c | 1 + arch/sparc/kernel/process_32.c | 1 + arch/sparc/kernel/process_64.c | 1 + arch/tile/kernel/process.c | 1 + arch/tile/kernel/smpboot.c | 1 + arch/tile/kernel/unaligned.c | 1 + arch/tile/mm/fault.c | 2 ++ arch/um/kernel/exec.c | 1 + arch/um/kernel/process.c | 1 + arch/um/kernel/reboot.c | 1 + arch/unicore32/kernel/process.c | 1 + arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 1 + arch/x86/kernel/fpu/init.c | 1 + arch/x86/kernel/process.c | 1 + arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/process_64.c | 1 + arch/x86/kernel/unwind_frame.c | 1 + arch/xtensa/kernel/process.c | 1 + drivers/misc/kgdbts.c | 2 ++ drivers/tty/sysrq.c | 2 +- drivers/tty/tty_io.c | 1 + fs/exec.c | 1 + fs/fcntl.c | 1 + fs/fs_struct.c | 1 + fs/proc/array.c | 1 + fs/proc/kcore.c | 1 + include/linux/sched/task.h | 6 ++++++ init/main.c | 1 + kernel/cgroup/cgroup.c | 1 + kernel/cpu.c | 1 + kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/kmod.c | 1 + kernel/kthread.c | 1 + kernel/locking/lockdep.c | 1 + kernel/pid.c | 1 + kernel/pid_namespace.c | 1 + kernel/power/process.c | 1 + kernel/ptrace.c | 1 + kernel/sched/sched.h | 1 + kernel/signal.c | 1 + kernel/smpboot.c | 1 + kernel/sys.c | 1 + kernel/trace/ftrace.c | 1 + kernel/tracepoint.c | 1 + lib/dma-debug.c | 1 + mm/kmemleak.c | 1 + mm/memory-failure.c | 1 + mm/memory.c | 1 + mm/oom_kill.c | 1 + mm/rmap.c | 1 + mm/swapfile.c | 1 + mm/usercopy.c | 2 ++ security/keys/keyctl.c | 1 + security/selinux/hooks.c | 1 + 95 files changed, 104 insertions(+), 1 deletion(-) create mode 100644 include/linux/sched/task.h (limited to 'arch/x86') diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 88f7a974d311..713b4fac998e 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index a41a79a4f4fe..d618d26721ab 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index 03d46395d1ff..d4c7c9a1afa9 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/mm/init.c b/arch/arm/mm/init.c index 2a9040dcf47e..1d8558ff9827 100644 --- a/arch/arm/mm/init.c +++ b/arch/arm/mm/init.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index 40a16cdd9873..f9b8e74ff8f2 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index dac022d88d9d..75b944a5107c 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index e95d094c20a6..1a1f444004b1 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/kernel/trace.c b/arch/blackfin/kernel/trace.c index 23c05b8effb3..151f22196ab6 100644 --- a/arch/blackfin/kernel/trace.c +++ b/arch/blackfin/kernel/trace.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c index 0ee7686a78f3..6b61779d426a 100644 --- a/arch/c6x/kernel/process.c +++ b/arch/c6x/kernel/process.c @@ -17,6 +17,7 @@ #include #include #include +#include #include diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 068a40374200..808c2186b704 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index 613c2d71bf10..c852df262948 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/cris/kernel/process.c b/arch/cris/kernel/process.c index 50a7dd451456..0bbd3a0c3d70 100644 --- a/arch/cris/kernel/process.c +++ b/arch/cris/kernel/process.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index cf4c34c09ab3..c96dbd4b8626 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/frv/mm/mmu-context.c b/arch/frv/mm/mmu-context.c index 4031289bf2ec..16946a58f64d 100644 --- a/arch/frv/mm/mmu-context.c +++ b/arch/frv/mm/mmu-context.c @@ -11,6 +11,7 @@ #include #include +#include #include #include diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index d7cabf373fe3..54e4d6f01865 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index 3bdd9592d025..a2a822a875b6 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -20,6 +20,7 @@ #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/mca.c b/arch/ia64/kernel/mca.c index f9fe3ac64242..79c7c46d7dc1 100644 --- a/arch/ia64/kernel/mca.c +++ b/arch/ia64/kernel/mca.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 677a86826771..7e943d3c05ed 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 2204ae450d65..054facf22156 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 0b1153e610ea..04fe1436e1cc 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 1a227c1fdb1a..2a450382934c 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/m32r/kernel/smpboot.c b/arch/m32r/kernel/smpboot.c index f98d2f6519d6..a7d04684d2c7 100644 --- a/arch/m32r/kernel/smpboot.c +++ b/arch/m32r/kernel/smpboot.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index b4a4675f4119..5d335d178706 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c index 2e611bd37515..801e6f927e62 100644 --- a/arch/metag/kernel/process.c +++ b/arch/metag/kernel/process.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c index 5ce67f9124aa..23c5ac33a93f 100644 --- a/arch/metag/kernel/traps.c +++ b/arch/metag/kernel/traps.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index 8c5fdb2e2850..a642fe5ac5ff 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/kernel/mips-mt-fpaff.c b/arch/mips/kernel/mips-mt-fpaff.c index ffc6bd3464d9..8cab633e0e5a 100644 --- a/arch/mips/kernel/mips-mt-fpaff.c +++ b/arch/mips/kernel/mips-mt-fpaff.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 97574cdb532d..8bfb833b3158 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index a1e2f8301d94..8ca7a9de09a5 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index e65b5cc2fa67..d924f7a79708 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 3ad87a6b119b..5ee1ddf6a924 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 899339dac938..0c5ad3bde0f5 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index 8c283caf2b4d..e5f97239cc5e 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -44,6 +44,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index 76f58b87dcb2..b99b12656f6f 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index ed99ff911b67..250c41f05721 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e4d811f17971..3de07004c02e 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index aae9480706c2..32924159d8c9 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -28,6 +28,7 @@ #include #include #include +#include void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index 37e35330aae6..b76370a47bf9 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 5098274e0f23..493c3eb86aa5 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index ced21a417d9d..056607185158 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/mm/asids-debugfs.c b/arch/sh/mm/asids-debugfs.c index 110bd35165bf..e5539e0f8e3b 100644 --- a/arch/sh/mm/asids-debugfs.c +++ b/arch/sh/mm/asids-debugfs.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index 237a471c8ed5..ed87c45a785b 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 98f6ff6eaa55..4c82a73af893 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index 557b4c8435d0..dbb4fa76d1dd 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index 53ce940a5016..f3fdd0c39b12 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/tile/kernel/unaligned.c b/arch/tile/kernel/unaligned.c index 142acae78316..8149c38f67b6 100644 --- a/arch/tile/kernel/unaligned.c +++ b/arch/tile/kernel/unaligned.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/tile/mm/fault.c b/arch/tile/mm/fault.c index 005c91c2adca..f58fa06a2214 100644 --- a/arch/tile/mm/fault.c +++ b/arch/tile/mm/fault.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include #include #include diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 770ec07b6a6a..2df4e5d11939 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index c6c45ea4021f..e76dc7c11251 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 79218106a033..0bc921cee0b1 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index c1a0eec88f1f..d58f1600b477 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index d48af18e7baf..0bbe0f3a039f 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/fpu/init.c b/arch/x86/kernel/fpu/init.c index 19bdd1bf8160..c2f8dde3255c 100644 --- a/arch/x86/kernel/fpu/init.c +++ b/arch/x86/kernel/fpu/init.c @@ -7,6 +7,7 @@ #include #include +#include #include /* diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 505be5589e52..441f00e7be8f 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index a0ac3e81518a..d79ffa47aab8 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index a61e141b6891..124f5652ae3c 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 23d15565d02a..7c0abdc9a732 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -1,4 +1,5 @@ #include +#include #include #include #include diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index afaa3588d869..af33f9d4c624 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/misc/kgdbts.c b/drivers/misc/kgdbts.c index 99635dd9dbac..fc7efedbc4be 100644 --- a/drivers/misc/kgdbts.c +++ b/drivers/misc/kgdbts.c @@ -103,6 +103,8 @@ #include #include #include +#include + #include #define v1printk(a...) do { \ diff --git a/drivers/tty/sysrq.c b/drivers/tty/sysrq.c index e5f0c7d0fe8d..c6fc7141d7b2 100644 --- a/drivers/tty/sysrq.c +++ b/drivers/tty/sysrq.c @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/tty/tty_io.c b/drivers/tty/tty_io.c index 985d33f0f315..e6d1a6510886 100644 --- a/drivers/tty/tty_io.c +++ b/drivers/tty/tty_io.c @@ -70,6 +70,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/exec.c b/fs/exec.c index e857c7260b1f..65145a3df065 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/fcntl.c b/fs/fcntl.c index e1c54f20325c..be8fbe289087 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/fs_struct.c b/fs/fs_struct.c index 543ed50f0387..be0250788b73 100644 --- a/fs/fs_struct.c +++ b/fs/fs_struct.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/fs/proc/array.c b/fs/proc/array.c index cfe7f934a300..f3169b58af38 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -62,6 +62,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index ea9f3d1ae830..4ee55274f155 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include "internal.h" diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h new file mode 100644 index 000000000000..0023c91ff821 --- /dev/null +++ b/include/linux/sched/task.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_TASK_H +#define _LINUX_SCHED_TASK_H + +#include + +#endif /* _LINUX_SCHED_TASK_H */ diff --git a/init/main.c b/init/main.c index c891cfb8928f..47956b22add1 100644 --- a/init/main.c +++ b/init/main.c @@ -76,6 +76,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index e8f87bf9840c..0125589c7428 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/cpu.c b/kernel/cpu.c index 78c206579d01..f7c063239fa5 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/exit.c b/kernel/exit.c index 5bad7dce1b7b..e53408d156df 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 8632905f64c5..0af64d5d8b73 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/kmod.c b/kernel/kmod.c index 0c407f905ca4..ac5f5c2d098d 100644 --- a/kernel/kmod.c +++ b/kernel/kmod.c @@ -20,6 +20,7 @@ */ #include #include +#include #include #include #include diff --git a/kernel/kthread.c b/kernel/kthread.c index ef9b9eb809c7..2f26adea0f84 100644 --- a/kernel/kthread.c +++ b/kernel/kthread.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include #include diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c index cdafaff926ca..12e38c213b70 100644 --- a/kernel/locking/lockdep.c +++ b/kernel/locking/lockdep.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/pid.c b/kernel/pid.c index 0291804151b5..0143ac0ddceb 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -38,6 +38,7 @@ #include #include #include +#include #define pid_hashfn(nr, ns) \ hash_long((unsigned long)nr + (unsigned long)ns, pidhash_shift) diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 25314c96fbe6..7c8367854dc4 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -19,6 +19,7 @@ #include #include #include +#include struct pid_cache { int nr_ids; diff --git a/kernel/power/process.c b/kernel/power/process.c index 85e2915d8961..c7209f060eeb 100644 --- a/kernel/power/process.c +++ b/kernel/power/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/ptrace.c b/kernel/ptrace.c index bcdbdc19eb35..0af928712174 100644 --- a/kernel/ptrace.c +++ b/kernel/ptrace.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 0974eb2ef50d..6b7155ae5c33 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index 15092fd8c7b1..25d099fd80f7 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/smpboot.c b/kernel/smpboot.c index 4a5c6e73ecd4..1d71c051a951 100644 --- a/kernel/smpboot.c +++ b/kernel/smpboot.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sys.c b/kernel/sys.c index 8c0392c8be51..7f1ecd2e41c1 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -54,6 +54,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/trace/ftrace.c b/kernel/trace/ftrace.c index 0c0609326391..0d1597c9ee30 100644 --- a/kernel/trace/ftrace.c +++ b/kernel/trace/ftrace.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/kernel/tracepoint.c b/kernel/tracepoint.c index 9f6984d52fec..685c50ae6300 100644 --- a/kernel/tracepoint.c +++ b/kernel/tracepoint.c @@ -25,6 +25,7 @@ #include #include #include +#include #include extern struct tracepoint * const __start___tracepoints_ptrs[]; diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 60c57ec936db..942adf13418d 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include diff --git a/mm/kmemleak.c b/mm/kmemleak.c index 2df6d3687b2a..a3970573359e 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -74,6 +74,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/memory-failure.c b/mm/memory-failure.c index b74984db386e..27f7210e7fab 100644 --- a/mm/memory-failure.c +++ b/mm/memory-failure.c @@ -41,6 +41,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/memory.c b/mm/memory.c index d4994a28dc85..a97a4cec2e1f 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -43,6 +43,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 69f75b014607..d083714a2bb9 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/rmap.c b/mm/rmap.c index 0367a0506667..2da487d6cea8 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -47,6 +47,7 @@ #include #include +#include #include #include #include diff --git a/mm/swapfile.c b/mm/swapfile.c index ff2bf3f61b14..521ef9b6064f 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include diff --git a/mm/usercopy.c b/mm/usercopy.c index 7ccad05c0d5c..d155e12563b1 100644 --- a/mm/usercopy.c +++ b/mm/usercopy.c @@ -17,6 +17,8 @@ #include #include #include +#include +#include #include enum { diff --git a/security/keys/keyctl.c b/security/keys/keyctl.c index bcb0b597c391..52c34532c785 100644 --- a/security/keys/keyctl.c +++ b/security/keys/keyctl.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index b12f873f92ba..57ff53696144 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 68db0cf10678630d286f4bbbbdfa102951a35faa Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 8 Feb 2017 18:51:37 +0100 Subject: sched/headers: Prepare for new header dependencies before moving code to We are going to split out of , which will have to be picked up from other headers and a couple of .c files. Create a trivial placeholder file that just maps to to make this patch obviously correct and bisectable. Include the new header in the files that are going to need it. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/kernel/osf_sys.c | 1 + arch/alpha/kernel/process.c | 1 + arch/alpha/kernel/ptrace.c | 1 + arch/alpha/kernel/signal.c | 1 + arch/arc/kernel/kgdb.c | 1 + arch/arc/kernel/process.c | 1 + arch/arc/kernel/ptrace.c | 1 + arch/arc/kernel/signal.c | 2 ++ arch/arm/kernel/perf_regs.c | 1 + arch/arm/kernel/process.c | 1 + arch/arm/kernel/ptrace.c | 1 + arch/arm/kernel/smp.c | 1 + arch/arm/kernel/traps.c | 1 + arch/arm64/include/asm/compat.h | 1 + arch/arm64/kernel/debug-monitors.c | 1 + arch/arm64/kernel/kgdb.c | 2 ++ arch/arm64/kernel/perf_regs.c | 1 + arch/arm64/kernel/process.c | 1 + arch/arm64/kernel/ptrace.c | 1 + arch/arm64/kernel/smp.c | 1 + arch/arm64/kernel/stacktrace.c | 1 + arch/arm64/kernel/traps.c | 1 + arch/avr32/kernel/process.c | 1 + arch/avr32/kernel/ptrace.c | 1 + arch/avr32/kernel/stacktrace.c | 1 + arch/blackfin/kernel/process.c | 1 + arch/blackfin/kernel/ptrace.c | 1 + arch/blackfin/kernel/signal.c | 1 + arch/blackfin/kernel/stacktrace.c | 1 + arch/blackfin/mach-common/smp.c | 1 + arch/c6x/kernel/process.c | 1 + arch/c6x/kernel/ptrace.c | 1 + arch/cris/arch-v10/kernel/process.c | 1 + arch/cris/arch-v10/kernel/ptrace.c | 1 + arch/cris/arch-v10/kernel/signal.c | 1 + arch/cris/arch-v32/kernel/process.c | 1 + arch/cris/arch-v32/kernel/ptrace.c | 1 + arch/cris/arch-v32/kernel/signal.c | 1 + arch/frv/kernel/process.c | 1 + arch/h8300/kernel/process.c | 1 + arch/h8300/kernel/signal.c | 1 + arch/hexagon/kernel/kgdb.c | 1 + arch/hexagon/kernel/process.c | 1 + arch/hexagon/kernel/ptrace.c | 1 + arch/hexagon/kernel/signal.c | 2 ++ arch/hexagon/kernel/stacktrace.c | 1 + arch/hexagon/kernel/traps.c | 1 + arch/ia64/kernel/perfmon.c | 1 + arch/ia64/kernel/process.c | 1 + arch/ia64/kernel/ptrace.c | 1 + arch/ia64/kernel/setup.c | 1 + arch/ia64/kernel/sys_ia64.c | 1 + arch/m32r/kernel/process.c | 1 + arch/m32r/kernel/ptrace.c | 1 + arch/m68k/kernel/process.c | 1 + arch/m68k/kernel/ptrace.c | 1 + arch/metag/kernel/process.c | 1 + arch/metag/kernel/ptrace.c | 2 ++ arch/metag/kernel/signal.c | 1 + arch/metag/kernel/smp.c | 1 + arch/metag/kernel/traps.c | 1 + arch/microblaze/kernel/process.c | 1 + arch/microblaze/kernel/ptrace.c | 1 + arch/microblaze/kernel/unwind.c | 1 + arch/mips/include/asm/fpu.h | 1 + arch/mips/kernel/crash.c | 1 + arch/mips/kernel/perf_event.c | 1 + arch/mips/kernel/process.c | 1 + arch/mips/kernel/ptrace.c | 1 + arch/mips/kernel/ptrace32.c | 1 + arch/mips/kernel/stacktrace.c | 1 + arch/mips/kernel/syscall.c | 1 + arch/mips/loongson64/loongson-3/smp.c | 1 + arch/mips/paravirt/paravirt-smp.c | 1 + arch/mips/sibyte/bcm1480/smp.c | 1 + arch/mn10300/kernel/process.c | 1 + arch/mn10300/kernel/ptrace.c | 1 + arch/nios2/kernel/process.c | 1 + arch/nios2/kernel/ptrace.c | 1 + arch/openrisc/kernel/process.c | 1 + arch/openrisc/kernel/ptrace.c | 1 + arch/parisc/kernel/process.c | 1 + arch/powerpc/kernel/process.c | 1 + arch/powerpc/mm/fault.c | 1 + arch/powerpc/perf/perf_regs.c | 1 + arch/s390/include/asm/compat.h | 1 + arch/s390/include/asm/kprobes.h | 1 + arch/s390/kernel/compat_signal.c | 1 + arch/s390/kernel/dumpstack.c | 1 + arch/s390/kernel/process.c | 1 + arch/s390/kernel/ptrace.c | 1 + arch/s390/kernel/runtime_instr.c | 2 ++ arch/s390/kernel/signal.c | 1 + arch/s390/kernel/smp.c | 1 + arch/s390/kernel/uprobes.c | 2 ++ arch/score/kernel/process.c | 1 + arch/score/kernel/ptrace.c | 1 + arch/sh/kernel/cpu/fpu.c | 1 + arch/sh/kernel/dumpstack.c | 1 + arch/sh/kernel/kgdb.c | 2 ++ arch/sh/kernel/process.c | 1 + arch/sh/kernel/process_32.c | 1 + arch/sh/kernel/process_64.c | 1 + arch/sh/kernel/ptrace_32.c | 1 + arch/sh/kernel/ptrace_64.c | 1 + arch/sh/kernel/signal_32.c | 1 + arch/sh/kernel/sys_sh32.c | 1 + arch/sh/kernel/traps.c | 1 + arch/sh/kernel/traps_32.c | 2 ++ arch/sparc/kernel/process_32.c | 1 + arch/sparc/kernel/process_64.c | 1 + arch/sparc/kernel/ptrace_64.c | 1 + arch/tile/kernel/compat_signal.c | 1 + arch/tile/kernel/kgdb.c | 2 ++ arch/tile/kernel/process.c | 1 + arch/tile/kernel/ptrace.c | 2 ++ arch/tile/kernel/signal.c | 1 + arch/tile/kernel/stack.c | 1 + arch/um/kernel/exec.c | 1 + arch/um/kernel/process.c | 1 + arch/um/kernel/skas/process.c | 1 + arch/unicore32/kernel/process.c | 1 + arch/unicore32/kernel/ptrace.c | 1 + arch/unicore32/kernel/traps.c | 1 + arch/x86/entry/common.c | 1 + arch/x86/entry/vdso/vma.c | 1 + arch/x86/ia32/ia32_aout.c | 1 + arch/x86/ia32/ia32_signal.c | 1 + arch/x86/kernel/dumpstack.c | 1 + arch/x86/kernel/fpu/regset.c | 1 + arch/x86/kernel/ioport.c | 1 + arch/x86/kernel/irq_64.c | 1 + arch/x86/kernel/perf_regs.c | 1 + arch/x86/kernel/process.c | 1 + arch/x86/kernel/process_32.c | 1 + arch/x86/kernel/process_64.c | 1 + arch/x86/kernel/ptrace.c | 1 + arch/x86/kernel/signal.c | 1 + arch/x86/kernel/smpboot.c | 1 + arch/x86/kernel/stacktrace.c | 1 + arch/x86/kernel/step.c | 1 + arch/x86/kernel/traps.c | 1 + arch/x86/kernel/unwind_frame.c | 1 + arch/x86/kernel/vm86_32.c | 1 + arch/x86/mm/fault.c | 1 + arch/xtensa/kernel/process.c | 1 + arch/xtensa/kernel/ptrace.c | 1 + arch/xtensa/kernel/signal.c | 1 + arch/xtensa/kernel/smp.c | 1 + block/blk-map.c | 1 + drivers/hv/vmbus_drv.c | 2 ++ drivers/ide/ide-cd.c | 1 + drivers/md/bcache/closure.h | 1 + drivers/misc/lkdtm_usercopy.c | 1 + drivers/mtd/nand/gpmi-nand/gpmi-nand.c | 1 + fs/binfmt_aout.c | 1 + fs/binfmt_elf.c | 1 + fs/binfmt_elf_fdpic.c | 3 ++- fs/binfmt_flat.c | 1 + fs/coredump.c | 1 + include/linux/elfcore.h | 2 ++ include/linux/perf_regs.h | 2 ++ include/linux/sched/task_stack.h | 6 ++++++ init/main.c | 1 + kernel/events/callchain.c | 2 ++ kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/printk/printk.c | 1 + kernel/sched/sched.h | 1 + kernel/seccomp.c | 1 + kernel/signal.c | 1 + kernel/trace/trace_stack.c | 1 + lib/debugobjects.c | 1 + lib/dma-debug.c | 1 + lib/syscall.c | 1 + mm/kasan/kasan.c | 1 + mm/kmemleak.c | 1 + mm/util.c | 1 + 178 files changed, 198 insertions(+), 1 deletion(-) create mode 100644 include/linux/sched/task_stack.h (limited to 'arch/x86') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 3b9b2a382ba2..73446baa632e 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/alpha/kernel/process.c b/arch/alpha/kernel/process.c index 713b4fac998e..0b9635040721 100644 --- a/arch/alpha/kernel/process.c +++ b/arch/alpha/kernel/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/alpha/kernel/ptrace.c b/arch/alpha/kernel/ptrace.c index bc4d2cdcf21d..285a82d491ef 100644 --- a/arch/alpha/kernel/ptrace.c +++ b/arch/alpha/kernel/ptrace.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/arch/alpha/kernel/signal.c b/arch/alpha/kernel/signal.c index b8221f112eee..8129dd92cadc 100644 --- a/arch/alpha/kernel/signal.c +++ b/arch/alpha/kernel/signal.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include diff --git a/arch/arc/kernel/kgdb.c b/arch/arc/kernel/kgdb.c index ecf6a7869375..9a3c34af2ae8 100644 --- a/arch/arc/kernel/kgdb.c +++ b/arch/arc/kernel/kgdb.c @@ -10,6 +10,7 @@ #include #include +#include #include #include diff --git a/arch/arc/kernel/process.c b/arch/arc/kernel/process.c index d618d26721ab..2a018de6d6cd 100644 --- a/arch/arc/kernel/process.c +++ b/arch/arc/kernel/process.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/arch/arc/kernel/ptrace.c b/arch/arc/kernel/ptrace.c index 4442204fe238..31150060d38b 100644 --- a/arch/arc/kernel/ptrace.c +++ b/arch/arc/kernel/ptrace.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/arch/arc/kernel/signal.c b/arch/arc/kernel/signal.c index d347bbc086fe..48685445002e 100644 --- a/arch/arc/kernel/signal.c +++ b/arch/arc/kernel/signal.c @@ -53,6 +53,8 @@ #include #include #include +#include + #include struct rt_sigframe { diff --git a/arch/arm/kernel/perf_regs.c b/arch/arm/kernel/perf_regs.c index 592dda3f21ff..c366b83bf955 100644 --- a/arch/arm/kernel/perf_regs.c +++ b/arch/arm/kernel/perf_regs.c @@ -3,6 +3,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/kernel/process.c b/arch/arm/kernel/process.c index d4c7c9a1afa9..939e8b58c59d 100644 --- a/arch/arm/kernel/process.c +++ b/arch/arm/kernel/process.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/kernel/ptrace.c b/arch/arm/kernel/ptrace.c index 46f7bab81c40..58e3771e4c5b 100644 --- a/arch/arm/kernel/ptrace.c +++ b/arch/arm/kernel/ptrace.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index 2083a5370308..b724cff7ad60 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm/kernel/traps.c b/arch/arm/kernel/traps.c index a9dad001b97d..948c648fea00 100644 --- a/arch/arm/kernel/traps.c +++ b/arch/arm/kernel/traps.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/include/asm/compat.h b/arch/arm64/include/asm/compat.h index eb8432bb82b8..e39d487bf724 100644 --- a/arch/arm64/include/asm/compat.h +++ b/arch/arm64/include/asm/compat.h @@ -23,6 +23,7 @@ */ #include #include +#include #define COMPAT_USER_HZ 100 #ifdef __AARCH64EB__ diff --git a/arch/arm64/kernel/debug-monitors.c b/arch/arm64/kernel/debug-monitors.c index 2bd426448fc1..32913567da08 100644 --- a/arch/arm64/kernel/debug-monitors.c +++ b/arch/arm64/kernel/debug-monitors.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kernel/kgdb.c b/arch/arm64/kernel/kgdb.c index d217c9e95b06..2122cd187f19 100644 --- a/arch/arm64/kernel/kgdb.c +++ b/arch/arm64/kernel/kgdb.c @@ -24,6 +24,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/arm64/kernel/perf_regs.c b/arch/arm64/kernel/perf_regs.c index 3f62b35fb6f1..bd1b74c2436f 100644 --- a/arch/arm64/kernel/perf_regs.c +++ b/arch/arm64/kernel/perf_regs.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kernel/process.c b/arch/arm64/kernel/process.c index f9b8e74ff8f2..043d373b8369 100644 --- a/arch/arm64/kernel/process.c +++ b/arch/arm64/kernel/process.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/kernel/ptrace.c b/arch/arm64/kernel/ptrace.c index 64fc32ea3422..c142459a88f3 100644 --- a/arch/arm64/kernel/ptrace.c +++ b/arch/arm64/kernel/ptrace.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index f66e58523b96..83c0a839a6ad 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/arm64/kernel/stacktrace.c b/arch/arm64/kernel/stacktrace.c index 7597e42feeea..feac80c22f61 100644 --- a/arch/arm64/kernel/stacktrace.c +++ b/arch/arm64/kernel/stacktrace.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index 5a5d83791837..bdbd0c3febf4 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/arch/avr32/kernel/process.c b/arch/avr32/kernel/process.c index 75b944a5107c..ad0dfccedb79 100644 --- a/arch/avr32/kernel/process.c +++ b/arch/avr32/kernel/process.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/avr32/kernel/ptrace.c b/arch/avr32/kernel/ptrace.c index a89b893279bb..41a14e96a1db 100644 --- a/arch/avr32/kernel/ptrace.c +++ b/arch/avr32/kernel/ptrace.c @@ -8,6 +8,7 @@ #undef DEBUG #include #include +#include #include #include #include diff --git a/arch/avr32/kernel/stacktrace.c b/arch/avr32/kernel/stacktrace.c index c09f0d8dd679..f8cc995cf0e0 100644 --- a/arch/avr32/kernel/stacktrace.c +++ b/arch/avr32/kernel/stacktrace.c @@ -8,6 +8,7 @@ * published by the Free Software Foundation. */ #include +#include #include #include #include diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index 1a1f444004b1..b691ef875a40 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/kernel/ptrace.c b/arch/blackfin/kernel/ptrace.c index 360d99645163..a6827095b99a 100644 --- a/arch/blackfin/kernel/ptrace.c +++ b/arch/blackfin/kernel/ptrace.c @@ -7,6 +7,7 @@ #include #include +#include #include #include #include diff --git a/arch/blackfin/kernel/signal.c b/arch/blackfin/kernel/signal.c index ea570db598e5..5f5172779204 100644 --- a/arch/blackfin/kernel/signal.c +++ b/arch/blackfin/kernel/signal.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/arch/blackfin/kernel/stacktrace.c b/arch/blackfin/kernel/stacktrace.c index 30301e1eace5..17198f3650b6 100644 --- a/arch/blackfin/kernel/stacktrace.c +++ b/arch/blackfin/kernel/stacktrace.c @@ -6,6 +6,7 @@ */ #include +#include #include #include #include diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index a2e6db2ce811..3ac98252d299 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/c6x/kernel/process.c b/arch/c6x/kernel/process.c index 6b61779d426a..c4ecb24c2d5c 100644 --- a/arch/c6x/kernel/process.c +++ b/arch/c6x/kernel/process.c @@ -18,6 +18,7 @@ #include #include #include +#include #include diff --git a/arch/c6x/kernel/ptrace.c b/arch/c6x/kernel/ptrace.c index 3c494e84444d..a27e1f02ce18 100644 --- a/arch/c6x/kernel/ptrace.c +++ b/arch/c6x/kernel/ptrace.c @@ -14,6 +14,7 @@ #include #include #include +#include #include diff --git a/arch/cris/arch-v10/kernel/process.c b/arch/cris/arch-v10/kernel/process.c index 808c2186b704..e299d30105b5 100644 --- a/arch/cris/arch-v10/kernel/process.c +++ b/arch/cris/arch-v10/kernel/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/cris/arch-v10/kernel/ptrace.c b/arch/cris/arch-v10/kernel/ptrace.c index eca94c7d56e7..c2f2b9b83cc4 100644 --- a/arch/cris/arch-v10/kernel/ptrace.c +++ b/arch/cris/arch-v10/kernel/ptrace.c @@ -4,6 +4,7 @@ #include #include +#include #include #include #include diff --git a/arch/cris/arch-v10/kernel/signal.c b/arch/cris/arch-v10/kernel/signal.c index db30c98e4926..bab4a8dd6bfd 100644 --- a/arch/cris/arch-v10/kernel/signal.c +++ b/arch/cris/arch-v10/kernel/signal.c @@ -14,6 +14,7 @@ */ #include +#include #include #include #include diff --git a/arch/cris/arch-v32/kernel/process.c b/arch/cris/arch-v32/kernel/process.c index c852df262948..c530a8fa87ce 100644 --- a/arch/cris/arch-v32/kernel/process.c +++ b/arch/cris/arch-v32/kernel/process.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/cris/arch-v32/kernel/ptrace.c b/arch/cris/arch-v32/kernel/ptrace.c index c366bc05466a..0461e95bbb62 100644 --- a/arch/cris/arch-v32/kernel/ptrace.c +++ b/arch/cris/arch-v32/kernel/ptrace.c @@ -4,6 +4,7 @@ #include #include +#include #include #include #include diff --git a/arch/cris/arch-v32/kernel/signal.c b/arch/cris/arch-v32/kernel/signal.c index 816bf2ca93ef..ea2e8e1398e8 100644 --- a/arch/cris/arch-v32/kernel/signal.c +++ b/arch/cris/arch-v32/kernel/signal.c @@ -3,6 +3,7 @@ */ #include +#include #include #include #include diff --git a/arch/frv/kernel/process.c b/arch/frv/kernel/process.c index c96dbd4b8626..5a4c92abc99e 100644 --- a/arch/frv/kernel/process.c +++ b/arch/frv/kernel/process.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/h8300/kernel/process.c b/arch/h8300/kernel/process.c index 54e4d6f01865..0f5db5bb561b 100644 --- a/arch/h8300/kernel/process.c +++ b/arch/h8300/kernel/process.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/h8300/kernel/signal.c b/arch/h8300/kernel/signal.c index d784f7117f9a..1e8070d08770 100644 --- a/arch/h8300/kernel/signal.c +++ b/arch/h8300/kernel/signal.c @@ -25,6 +25,7 @@ */ #include +#include #include #include #include diff --git a/arch/hexagon/kernel/kgdb.c b/arch/hexagon/kernel/kgdb.c index 62dece3ad827..16c24b22d0b2 100644 --- a/arch/hexagon/kernel/kgdb.c +++ b/arch/hexagon/kernel/kgdb.c @@ -20,6 +20,7 @@ #include #include +#include #include #include diff --git a/arch/hexagon/kernel/process.c b/arch/hexagon/kernel/process.c index a2a822a875b6..de715bab7956 100644 --- a/arch/hexagon/kernel/process.c +++ b/arch/hexagon/kernel/process.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/hexagon/kernel/ptrace.c b/arch/hexagon/kernel/ptrace.c index 390a9ad14ca1..ecd75e2e8eb3 100644 --- a/arch/hexagon/kernel/ptrace.c +++ b/arch/hexagon/kernel/ptrace.c @@ -22,6 +22,7 @@ #include #include +#include #include #include #include diff --git a/arch/hexagon/kernel/signal.c b/arch/hexagon/kernel/signal.c index c6b22b9945a7..78aa7304a5c9 100644 --- a/arch/hexagon/kernel/signal.c +++ b/arch/hexagon/kernel/signal.c @@ -21,6 +21,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/hexagon/kernel/stacktrace.c b/arch/hexagon/kernel/stacktrace.c index f94918b449a8..41866a06adf7 100644 --- a/arch/hexagon/kernel/stacktrace.c +++ b/arch/hexagon/kernel/stacktrace.c @@ -19,6 +19,7 @@ */ #include +#include #include #include #include diff --git a/arch/hexagon/kernel/traps.c b/arch/hexagon/kernel/traps.c index b55f13c70b34..2942a9204a9a 100644 --- a/arch/hexagon/kernel/traps.c +++ b/arch/hexagon/kernel/traps.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/perfmon.c b/arch/ia64/kernel/perfmon.c index 7e943d3c05ed..09f86ebfcc7b 100644 --- a/arch/ia64/kernel/perfmon.c +++ b/arch/ia64/kernel/perfmon.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/process.c b/arch/ia64/kernel/process.c index 054facf22156..d344d0d691aa 100644 --- a/arch/ia64/kernel/process.c +++ b/arch/ia64/kernel/process.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/ptrace.c b/arch/ia64/kernel/ptrace.c index 04fe1436e1cc..3f8293378a83 100644 --- a/arch/ia64/kernel/ptrace.c +++ b/arch/ia64/kernel/ptrace.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 32a6cc36296f..b2b4f5216180 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/sys_ia64.c b/arch/ia64/kernel/sys_ia64.c index ce4cc60d519b..5ce927c854a6 100644 --- a/arch/ia64/kernel/sys_ia64.c +++ b/arch/ia64/kernel/sys_ia64.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include /* doh, must come after sched.h... */ #include diff --git a/arch/m32r/kernel/process.c b/arch/m32r/kernel/process.c index 2a450382934c..d8ffcfec599c 100644 --- a/arch/m32r/kernel/process.c +++ b/arch/m32r/kernel/process.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/m32r/kernel/ptrace.c b/arch/m32r/kernel/ptrace.c index a68acb9fa515..2d887400e30e 100644 --- a/arch/m32r/kernel/ptrace.c +++ b/arch/m32r/kernel/ptrace.c @@ -16,6 +16,7 @@ #include #include +#include #include #include #include diff --git a/arch/m68k/kernel/process.c b/arch/m68k/kernel/process.c index 5d335d178706..e475c945c8b2 100644 --- a/arch/m68k/kernel/process.c +++ b/arch/m68k/kernel/process.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/m68k/kernel/ptrace.c b/arch/m68k/kernel/ptrace.c index 9cd86d7343a6..748c63bd0081 100644 --- a/arch/m68k/kernel/ptrace.c +++ b/arch/m68k/kernel/ptrace.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/arch/metag/kernel/process.c b/arch/metag/kernel/process.c index 801e6f927e62..c4606ce743d2 100644 --- a/arch/metag/kernel/process.c +++ b/arch/metag/kernel/process.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/metag/kernel/ptrace.c b/arch/metag/kernel/ptrace.c index 7563628822bd..5fd16ee5280c 100644 --- a/arch/metag/kernel/ptrace.c +++ b/arch/metag/kernel/ptrace.c @@ -15,6 +15,8 @@ #include #include #include +#include + #include #define CREATE_TRACE_POINTS diff --git a/arch/metag/kernel/signal.c b/arch/metag/kernel/signal.c index ce49d429c74a..338925d808e6 100644 --- a/arch/metag/kernel/signal.c +++ b/arch/metag/kernel/signal.c @@ -7,6 +7,7 @@ */ #include +#include #include #include #include diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index 198eda75846b..cab2aa64ca82 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/metag/kernel/traps.c b/arch/metag/kernel/traps.c index 23c5ac33a93f..444851e510d5 100644 --- a/arch/metag/kernel/traps.c +++ b/arch/metag/kernel/traps.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/microblaze/kernel/process.c b/arch/microblaze/kernel/process.c index a642fe5ac5ff..e92a817e645f 100644 --- a/arch/microblaze/kernel/process.c +++ b/arch/microblaze/kernel/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/microblaze/kernel/ptrace.c b/arch/microblaze/kernel/ptrace.c index 8cfa98cadf3d..badd286882ae 100644 --- a/arch/microblaze/kernel/ptrace.c +++ b/arch/microblaze/kernel/ptrace.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/microblaze/kernel/unwind.c b/arch/microblaze/kernel/unwind.c index 61c04eed14d5..34c270cb11fc 100644 --- a/arch/microblaze/kernel/unwind.c +++ b/arch/microblaze/kernel/unwind.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/include/asm/fpu.h b/arch/mips/include/asm/fpu.h index f06f97bd62df..321752bcbab6 100644 --- a/arch/mips/include/asm/fpu.h +++ b/arch/mips/include/asm/fpu.h @@ -11,6 +11,7 @@ #define _ASM_FPU_H #include +#include #include #include diff --git a/arch/mips/kernel/crash.c b/arch/mips/kernel/crash.c index 5a71518be0f1..ca25cd393b1c 100644 --- a/arch/mips/kernel/crash.c +++ b/arch/mips/kernel/crash.c @@ -8,6 +8,7 @@ #include #include #include +#include /* This keeps a track of which one is crashing cpu. */ static int crashing_cpu = -1; diff --git a/arch/mips/kernel/perf_event.c b/arch/mips/kernel/perf_event.c index d64056e0bb56..f298eb2ff6c2 100644 --- a/arch/mips/kernel/perf_event.c +++ b/arch/mips/kernel/perf_event.c @@ -15,6 +15,7 @@ */ #include +#include #include diff --git a/arch/mips/kernel/process.c b/arch/mips/kernel/process.c index 8bfb833b3158..fb6b6b650719 100644 --- a/arch/mips/kernel/process.c +++ b/arch/mips/kernel/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/kernel/ptrace.c b/arch/mips/kernel/ptrace.c index fdef26382c37..339601267265 100644 --- a/arch/mips/kernel/ptrace.c +++ b/arch/mips/kernel/ptrace.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/kernel/ptrace32.c b/arch/mips/kernel/ptrace32.c index 4f0998525626..40e212d6b26b 100644 --- a/arch/mips/kernel/ptrace32.c +++ b/arch/mips/kernel/ptrace32.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/kernel/stacktrace.c b/arch/mips/kernel/stacktrace.c index 986f910961d9..7c7c902249f2 100644 --- a/arch/mips/kernel/stacktrace.c +++ b/arch/mips/kernel/stacktrace.c @@ -5,6 +5,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/mips/kernel/syscall.c b/arch/mips/kernel/syscall.c index c86ddbaa4598..f1d17ece4181 100644 --- a/arch/mips/kernel/syscall.c +++ b/arch/mips/kernel/syscall.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/loongson64/loongson-3/smp.c b/arch/mips/loongson64/loongson-3/smp.c index 66ede5b11355..64659fc73940 100644 --- a/arch/mips/loongson64/loongson-3/smp.c +++ b/arch/mips/loongson64/loongson-3/smp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mips/paravirt/paravirt-smp.c b/arch/mips/paravirt/paravirt-smp.c index f8d3e081b2eb..72eb1a56c645 100644 --- a/arch/mips/paravirt/paravirt-smp.c +++ b/arch/mips/paravirt/paravirt-smp.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/sibyte/bcm1480/smp.c b/arch/mips/sibyte/bcm1480/smp.c index 4c71aea25663..d0e94ffcc1b8 100644 --- a/arch/mips/sibyte/bcm1480/smp.c +++ b/arch/mips/sibyte/bcm1480/smp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/arch/mn10300/kernel/process.c b/arch/mn10300/kernel/process.c index 8ca7a9de09a5..c9fa42619c6a 100644 --- a/arch/mn10300/kernel/process.c +++ b/arch/mn10300/kernel/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mn10300/kernel/ptrace.c b/arch/mn10300/kernel/ptrace.c index 976020f469c1..8009876a7ac4 100644 --- a/arch/mn10300/kernel/ptrace.c +++ b/arch/mn10300/kernel/ptrace.c @@ -11,6 +11,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 5ee1ddf6a924..869a4d59de32 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include diff --git a/arch/nios2/kernel/ptrace.c b/arch/nios2/kernel/ptrace.c index 681dda92eff1..de97bcb7dd44 100644 --- a/arch/nios2/kernel/ptrace.c +++ b/arch/nios2/kernel/ptrace.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/openrisc/kernel/process.c b/arch/openrisc/kernel/process.c index 0c5ad3bde0f5..828a29110459 100644 --- a/arch/openrisc/kernel/process.c +++ b/arch/openrisc/kernel/process.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/openrisc/kernel/ptrace.c b/arch/openrisc/kernel/ptrace.c index 228288887d74..eb97a8e7c8aa 100644 --- a/arch/openrisc/kernel/ptrace.c +++ b/arch/openrisc/kernel/ptrace.c @@ -18,6 +18,7 @@ #include #include +#include #include #include diff --git a/arch/parisc/kernel/process.c b/arch/parisc/kernel/process.c index e5f97239cc5e..06f7ca7fe70b 100644 --- a/arch/parisc/kernel/process.c +++ b/arch/parisc/kernel/process.c @@ -45,6 +45,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/process.c b/arch/powerpc/kernel/process.c index b99b12656f6f..d645da302bf2 100644 --- a/arch/powerpc/kernel/process.c +++ b/arch/powerpc/kernel/process.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/fault.c b/arch/powerpc/mm/fault.c index 8dc758658972..51def8a515be 100644 --- a/arch/powerpc/mm/fault.c +++ b/arch/powerpc/mm/fault.c @@ -17,6 +17,7 @@ #include #include +#include #include #include #include diff --git a/arch/powerpc/perf/perf_regs.c b/arch/powerpc/perf/perf_regs.c index d24a8a3668fa..cbd82fde5770 100644 --- a/arch/powerpc/perf/perf_regs.c +++ b/arch/powerpc/perf/perf_regs.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/include/asm/compat.h b/arch/s390/include/asm/compat.h index 352f7bdaf11f..0ddd37e6c29d 100644 --- a/arch/s390/include/asm/compat.h +++ b/arch/s390/include/asm/compat.h @@ -5,6 +5,7 @@ */ #include #include +#include #include #define __TYPE_IS_PTR(t) (!__builtin_types_compatible_p(typeof(0?(t)0:0ULL), u64)) diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 84c0f9086483..1293c4066cfc 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -35,6 +35,7 @@ #include #include #include +#include #define __ARCH_WANT_KPROBES_INSN_SLOT diff --git a/arch/s390/kernel/compat_signal.c b/arch/s390/kernel/compat_signal.c index 362350cc485c..c620049c61f2 100644 --- a/arch/s390/kernel/compat_signal.c +++ b/arch/s390/kernel/compat_signal.c @@ -10,6 +10,7 @@ #include #include +#include #include #include #include diff --git a/arch/s390/kernel/dumpstack.c b/arch/s390/kernel/dumpstack.c index 72c584d62f59..829e1c53005c 100644 --- a/arch/s390/kernel/dumpstack.c +++ b/arch/s390/kernel/dumpstack.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/process.c b/arch/s390/kernel/process.c index 250c41f05721..20cd339e11ae 100644 --- a/arch/s390/kernel/process.c +++ b/arch/s390/kernel/process.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/ptrace.c b/arch/s390/kernel/ptrace.c index 12020b55887b..c14df0a1ec3c 100644 --- a/arch/s390/kernel/ptrace.c +++ b/arch/s390/kernel/ptrace.c @@ -8,6 +8,7 @@ #include #include +#include #include #include #include diff --git a/arch/s390/kernel/runtime_instr.c b/arch/s390/kernel/runtime_instr.c index fffa0e5462af..429d3a782f1c 100644 --- a/arch/s390/kernel/runtime_instr.c +++ b/arch/s390/kernel/runtime_instr.c @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/s390/kernel/signal.c b/arch/s390/kernel/signal.c index 62a4c263e887..289dd50f9744 100644 --- a/arch/s390/kernel/signal.c +++ b/arch/s390/kernel/signal.c @@ -10,6 +10,7 @@ */ #include +#include #include #include #include diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index b5766e03cdcb..47a973b5b4f1 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -32,6 +32,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kernel/uprobes.c b/arch/s390/kernel/uprobes.c index 66956c09d5bf..314e0ee3016a 100644 --- a/arch/s390/kernel/uprobes.c +++ b/arch/s390/kernel/uprobes.c @@ -9,6 +9,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/score/kernel/process.c b/arch/score/kernel/process.c index 32924159d8c9..eb64d7a677cb 100644 --- a/arch/score/kernel/process.c +++ b/arch/score/kernel/process.c @@ -29,6 +29,7 @@ #include #include #include +#include void (*pm_power_off)(void); EXPORT_SYMBOL(pm_power_off); diff --git a/arch/score/kernel/ptrace.c b/arch/score/kernel/ptrace.c index 8b75e54816c1..d8455e60bce0 100644 --- a/arch/score/kernel/ptrace.c +++ b/arch/score/kernel/ptrace.c @@ -28,6 +28,7 @@ #include #include #include +#include #include diff --git a/arch/sh/kernel/cpu/fpu.c b/arch/sh/kernel/cpu/fpu.c index b76370a47bf9..547c73478459 100644 --- a/arch/sh/kernel/cpu/fpu.c +++ b/arch/sh/kernel/cpu/fpu.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/arch/sh/kernel/dumpstack.c b/arch/sh/kernel/dumpstack.c index d00cab2f50f9..b564b1eae4ae 100644 --- a/arch/sh/kernel/dumpstack.c +++ b/arch/sh/kernel/dumpstack.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/kgdb.c b/arch/sh/kernel/kgdb.c index adad46e41a1d..4f04c6638a4d 100644 --- a/arch/sh/kernel/kgdb.c +++ b/arch/sh/kernel/kgdb.c @@ -14,6 +14,8 @@ #include #include #include +#include + #include #include diff --git a/arch/sh/kernel/process.c b/arch/sh/kernel/process.c index 80a61c5e3015..f8a695a223dd 100644 --- a/arch/sh/kernel/process.c +++ b/arch/sh/kernel/process.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/process_32.c b/arch/sh/kernel/process_32.c index 493c3eb86aa5..2c7bdf8cb934 100644 --- a/arch/sh/kernel/process_32.c +++ b/arch/sh/kernel/process_32.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/process_64.c b/arch/sh/kernel/process_64.c index 056607185158..ee2abe96f9f3 100644 --- a/arch/sh/kernel/process_64.c +++ b/arch/sh/kernel/process_64.c @@ -27,6 +27,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/ptrace_32.c b/arch/sh/kernel/ptrace_32.c index 1aabfd356b35..5fc3ff606210 100644 --- a/arch/sh/kernel/ptrace_32.c +++ b/arch/sh/kernel/ptrace_32.c @@ -12,6 +12,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/sh/kernel/ptrace_64.c b/arch/sh/kernel/ptrace_64.c index c49d0d05a215..1e0656d9e7af 100644 --- a/arch/sh/kernel/ptrace_64.c +++ b/arch/sh/kernel/ptrace_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/signal_32.c b/arch/sh/kernel/signal_32.c index 5128d3001ee5..08bce11badc6 100644 --- a/arch/sh/kernel/signal_32.c +++ b/arch/sh/kernel/signal_32.c @@ -9,6 +9,7 @@ * */ #include +#include #include #include #include diff --git a/arch/sh/kernel/sys_sh32.c b/arch/sh/kernel/sys_sh32.c index d5287d76809c..a2e1231a90a3 100644 --- a/arch/sh/kernel/sys_sh32.c +++ b/arch/sh/kernel/sys_sh32.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/arch/sh/kernel/traps.c b/arch/sh/kernel/traps.c index bc5c9f347b9e..b32d1c3a4655 100644 --- a/arch/sh/kernel/traps.c +++ b/arch/sh/kernel/traps.c @@ -5,6 +5,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sh/kernel/traps_32.c b/arch/sh/kernel/traps_32.c index ff639342a8be..57cff00cad17 100644 --- a/arch/sh/kernel/traps_32.c +++ b/arch/sh/kernel/traps_32.c @@ -25,6 +25,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/sparc/kernel/process_32.c b/arch/sparc/kernel/process_32.c index ed87c45a785b..b6dac8e980f0 100644 --- a/arch/sparc/kernel/process_32.c +++ b/arch/sparc/kernel/process_32.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/process_64.c b/arch/sparc/kernel/process_64.c index 4c82a73af893..1badc493e62e 100644 --- a/arch/sparc/kernel/process_64.c +++ b/arch/sparc/kernel/process_64.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/sparc/kernel/ptrace_64.c b/arch/sparc/kernel/ptrace_64.c index 901063c1cf7e..df9e731a76f5 100644 --- a/arch/sparc/kernel/ptrace_64.c +++ b/arch/sparc/kernel/ptrace_64.c @@ -12,6 +12,7 @@ #include #include +#include #include #include #include diff --git a/arch/tile/kernel/compat_signal.c b/arch/tile/kernel/compat_signal.c index c667e104a0c2..0e863f1ee08c 100644 --- a/arch/tile/kernel/compat_signal.c +++ b/arch/tile/kernel/compat_signal.c @@ -13,6 +13,7 @@ */ #include +#include #include #include #include diff --git a/arch/tile/kernel/kgdb.c b/arch/tile/kernel/kgdb.c index 9247d6b562f4..d4eb5fb2df9d 100644 --- a/arch/tile/kernel/kgdb.c +++ b/arch/tile/kernel/kgdb.c @@ -19,6 +19,8 @@ #include #include #include +#include + #include static tile_bundle_bits singlestep_insn = TILEGX_BPT_BUNDLE | DIE_SSTEPBP; diff --git a/arch/tile/kernel/process.c b/arch/tile/kernel/process.c index dbb4fa76d1dd..f0a0e18e4dfb 100644 --- a/arch/tile/kernel/process.c +++ b/arch/tile/kernel/process.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/tile/kernel/ptrace.c b/arch/tile/kernel/ptrace.c index e279572824b1..e1a078e6828e 100644 --- a/arch/tile/kernel/ptrace.c +++ b/arch/tile/kernel/ptrace.c @@ -23,6 +23,8 @@ #include #include #include +#include + #include #include diff --git a/arch/tile/kernel/signal.c b/arch/tile/kernel/signal.c index 8cc92ae6eb27..f2bf557bb005 100644 --- a/arch/tile/kernel/signal.c +++ b/arch/tile/kernel/signal.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/arch/tile/kernel/stack.c b/arch/tile/kernel/stack.c index a1c0787fe9d8..94ecbc6676e5 100644 --- a/arch/tile/kernel/stack.c +++ b/arch/tile/kernel/stack.c @@ -14,6 +14,7 @@ #include #include +#include #include #include #include diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 2df4e5d11939..31968677a0d0 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/kernel/process.c b/arch/um/kernel/process.c index e76dc7c11251..a9bd61820042 100644 --- a/arch/um/kernel/process.c +++ b/arch/um/kernel/process.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 527fa5881915..ddecf326dab2 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/arch/unicore32/kernel/process.c b/arch/unicore32/kernel/process.c index d58f1600b477..d22c1dc7e39e 100644 --- a/arch/unicore32/kernel/process.c +++ b/arch/unicore32/kernel/process.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/unicore32/kernel/ptrace.c b/arch/unicore32/kernel/ptrace.c index 9f07c08da050..a102c2b4f358 100644 --- a/arch/unicore32/kernel/ptrace.c +++ b/arch/unicore32/kernel/ptrace.c @@ -15,6 +15,7 @@ #include #include #include +#include /* * this routine will get a word off of the processes privileged stack. diff --git a/arch/unicore32/kernel/traps.c b/arch/unicore32/kernel/traps.c index 506e1a2127c6..5f25b39f04d4 100644 --- a/arch/unicore32/kernel/traps.c +++ b/arch/unicore32/kernel/traps.c @@ -16,6 +16,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/entry/common.c b/arch/x86/entry/common.c index b83c61cfd154..370c42c7f046 100644 --- a/arch/x86/entry/common.c +++ b/arch/x86/entry/common.c @@ -9,6 +9,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/entry/vdso/vma.c b/arch/x86/entry/vdso/vma.c index 572cee3fccff..226ca70dc6bd 100644 --- a/arch/x86/entry/vdso/vma.c +++ b/arch/x86/entry/vdso/vma.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/ia32/ia32_aout.c b/arch/x86/ia32/ia32_aout.c index 7c0a711989d2..8d0879f1d42c 100644 --- a/arch/x86/ia32/ia32_aout.c +++ b/arch/x86/ia32/ia32_aout.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/ia32/ia32_signal.c b/arch/x86/ia32/ia32_signal.c index 95c0b4ae09b0..724153797209 100644 --- a/arch/x86/ia32/ia32_signal.c +++ b/arch/x86/ia32/ia32_signal.c @@ -9,6 +9,7 @@ */ #include +#include #include #include #include diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 7b9e7e68f316..09d4ac0d2661 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/fpu/regset.c b/arch/x86/kernel/fpu/regset.c index c114b132d121..b188b16841e3 100644 --- a/arch/x86/kernel/fpu/regset.c +++ b/arch/x86/kernel/fpu/regset.c @@ -5,6 +5,7 @@ #include #include #include +#include /* * The xstateregs_active() routine is the same as the regset_fpregs_active() routine, diff --git a/arch/x86/kernel/ioport.c b/arch/x86/kernel/ioport.c index b01bc8517450..ca49bab3e467 100644 --- a/arch/x86/kernel/ioport.c +++ b/arch/x86/kernel/ioport.c @@ -4,6 +4,7 @@ */ #include +#include #include #include #include diff --git a/arch/x86/kernel/irq_64.c b/arch/x86/kernel/irq_64.c index 6b0678a541e2..3be74fbdeff2 100644 --- a/arch/x86/kernel/irq_64.c +++ b/arch/x86/kernel/irq_64.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kernel/perf_regs.c b/arch/x86/kernel/perf_regs.c index da8cb987b973..587d887f7f17 100644 --- a/arch/x86/kernel/perf_regs.c +++ b/arch/x86/kernel/perf_regs.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 441f00e7be8f..56b059486c3b 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index d79ffa47aab8..4c818f8bc135 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 124f5652ae3c..d6b784a5520d 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 9cc7d5a330ef..2364b23ea3e5 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -6,6 +6,7 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/signal.c b/arch/x86/kernel/signal.c index 763af1d0de64..396c042e9d0e 100644 --- a/arch/x86/kernel/signal.c +++ b/arch/x86/kernel/signal.c @@ -10,6 +10,7 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include #include #include #include diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index f3eb266d75ff..bd1f1ad35284 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/stacktrace.c b/arch/x86/kernel/stacktrace.c index 330cae0025d0..8e2b79b88e51 100644 --- a/arch/x86/kernel/stacktrace.c +++ b/arch/x86/kernel/stacktrace.c @@ -5,6 +5,7 @@ */ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index a23ce84a3f6c..f07f83b3611b 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -2,6 +2,7 @@ * x86 single-step support code, common to 32-bit and 64-bit. */ #include +#include #include #include #include diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1dc86ee60a03..948443e115c1 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 7c0abdc9a732..478d15dbaee4 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -1,5 +1,6 @@ #include #include +#include #include #include #include diff --git a/arch/x86/kernel/vm86_32.c b/arch/x86/kernel/vm86_32.c index 0442d98367ae..23ee89ce59a9 100644 --- a/arch/x86/kernel/vm86_32.c +++ b/arch/x86/kernel/vm86_32.c @@ -35,6 +35,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/mm/fault.c b/arch/x86/mm/fault.c index e3254ca0eec4..428e31763cb9 100644 --- a/arch/x86/mm/fault.c +++ b/arch/x86/mm/fault.c @@ -4,6 +4,7 @@ * Copyright (C) 2008-2009, Red Hat Inc., Ingo Molnar */ #include /* test_thread_flag(), ... */ +#include /* task_stack_*(), ... */ #include /* oops_begin/end, ... */ #include /* search_exception_tables */ #include /* max_low_pfn */ diff --git a/arch/xtensa/kernel/process.c b/arch/xtensa/kernel/process.c index af33f9d4c624..58f96d1230d4 100644 --- a/arch/xtensa/kernel/process.c +++ b/arch/xtensa/kernel/process.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/xtensa/kernel/ptrace.c b/arch/xtensa/kernel/ptrace.c index 32519b71d914..e0f583fed06a 100644 --- a/arch/xtensa/kernel/ptrace.c +++ b/arch/xtensa/kernel/ptrace.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/xtensa/kernel/signal.c b/arch/xtensa/kernel/signal.c index c41294745731..70a131945443 100644 --- a/arch/xtensa/kernel/signal.c +++ b/arch/xtensa/kernel/signal.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index f2b3e1725349..fd894eaa63f3 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -22,6 +22,7 @@ #include #include #include +#include #include #include #include diff --git a/block/blk-map.c b/block/blk-map.c index 2f18c2a0be1b..3b5cb863318f 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -2,6 +2,7 @@ * Functions related to mapping data to requests */ #include +#include #include #include #include diff --git a/drivers/hv/vmbus_drv.c b/drivers/hv/vmbus_drv.c index f7f6b9144b07..da6b59ba5940 100644 --- a/drivers/hv/vmbus_drv.c +++ b/drivers/hv/vmbus_drv.c @@ -34,6 +34,8 @@ #include #include #include +#include + #include #include #include diff --git a/drivers/ide/ide-cd.c b/drivers/ide/ide-cd.c index aef00511ca86..74f1b7dc03f7 100644 --- a/drivers/ide/ide-cd.c +++ b/drivers/ide/ide-cd.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include diff --git a/drivers/md/bcache/closure.h b/drivers/md/bcache/closure.h index 9b2fe2d3e3a9..1ec84ca81146 100644 --- a/drivers/md/bcache/closure.h +++ b/drivers/md/bcache/closure.h @@ -3,6 +3,7 @@ #include #include +#include #include /* diff --git a/drivers/misc/lkdtm_usercopy.c b/drivers/misc/lkdtm_usercopy.c index 1dd611423d8b..df6ac985fbb5 100644 --- a/drivers/misc/lkdtm_usercopy.c +++ b/drivers/misc/lkdtm_usercopy.c @@ -5,6 +5,7 @@ #include "lkdtm.h" #include #include +#include #include #include #include diff --git a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c index 6c062b8251d2..d52139635b67 100644 --- a/drivers/mtd/nand/gpmi-nand/gpmi-nand.c +++ b/drivers/mtd/nand/gpmi-nand/gpmi-nand.c @@ -20,6 +20,7 @@ */ #include #include +#include #include #include #include diff --git a/fs/binfmt_aout.c b/fs/binfmt_aout.c index 2a59139f520b..9be82c4e14a4 100644 --- a/fs/binfmt_aout.c +++ b/fs/binfmt_aout.c @@ -25,6 +25,7 @@ #include #include #include +#include #include #include diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 2c95257fa4da..92c00a13b28b 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 222873bb689c..6103a8149ccd 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -15,6 +15,8 @@ #include #include #include +#include +#include #include #include #include @@ -35,7 +37,6 @@ #include #include #include -#include #include #include diff --git a/fs/binfmt_flat.c b/fs/binfmt_flat.c index 9b2917a30294..2edcefc0a294 100644 --- a/fs/binfmt_flat.c +++ b/fs/binfmt_flat.c @@ -19,6 +19,7 @@ #include #include +#include #include #include #include diff --git a/fs/coredump.c b/fs/coredump.c index c74ab43b8383..592683711c64 100644 --- a/fs/coredump.c +++ b/fs/coredump.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/elfcore.h b/include/linux/elfcore.h index 698d51a0eea3..c8240a12c42d 100644 --- a/include/linux/elfcore.h +++ b/include/linux/elfcore.h @@ -3,6 +3,8 @@ #include #include +#include + #include #include diff --git a/include/linux/perf_regs.h b/include/linux/perf_regs.h index a5f98d53d732..9b7dd59fe28d 100644 --- a/include/linux/perf_regs.h +++ b/include/linux/perf_regs.h @@ -1,6 +1,8 @@ #ifndef _LINUX_PERF_REGS_H #define _LINUX_PERF_REGS_H +#include + struct perf_regs { __u64 abi; struct pt_regs *regs; diff --git a/include/linux/sched/task_stack.h b/include/linux/sched/task_stack.h new file mode 100644 index 000000000000..933cd49824c2 --- /dev/null +++ b/include/linux/sched/task_stack.h @@ -0,0 +1,6 @@ +#ifndef _LINUX_SCHED_TASK_STACK_H +#define _LINUX_SCHED_TASK_STACK_H + +#include + +#endif /* _LINUX_SCHED_TASK_STACK_H */ diff --git a/init/main.c b/init/main.c index 47956b22add1..c2d337a136d1 100644 --- a/init/main.c +++ b/init/main.c @@ -77,6 +77,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index e9fdb5203de5..c04917cad1bf 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -11,6 +11,8 @@ #include #include +#include + #include "internal.h" struct callchain_cpus_entries { diff --git a/kernel/exit.c b/kernel/exit.c index e53408d156df..771c33fc9952 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index 0af64d5d8b73..c87b6f03c710 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 3caaaa04b92e..2984fb0f0257 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -47,6 +47,7 @@ #include #include #include +#include #include #include diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index 6b7155ae5c33..4e2fec3f12a0 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include diff --git a/kernel/seccomp.c b/kernel/seccomp.c index e15185c28de5..65f61077ad50 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index 25d099fd80f7..e6d470a5f75a 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/trace/trace_stack.c b/kernel/trace/trace_stack.c index 2a1abbaca10e..1d68b5b7ad41 100644 --- a/kernel/trace/trace_stack.c +++ b/kernel/trace/trace_stack.c @@ -2,6 +2,7 @@ * Copyright (C) 2008 Steven Rostedt * */ +#include #include #include #include diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 8c28cbd7e104..17afb0430161 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 942adf13418d..b157b46cc9a6 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -17,6 +17,7 @@ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ +#include #include #include #include diff --git a/lib/syscall.c b/lib/syscall.c index 63239e097b13..17d5ff5fa6a3 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/mm/kasan/kasan.c b/mm/kasan/kasan.c index d99312ed7c22..98b27195e38b 100644 --- a/mm/kasan/kasan.c +++ b/mm/kasan/kasan.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/kmemleak.c b/mm/kmemleak.c index a3970573359e..26c874e90b12 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -75,6 +75,7 @@ #include #include #include +#include #include #include #include diff --git a/mm/util.c b/mm/util.c index 14f4e13323e2..656dc5e37a87 100644 --- a/mm/util.c +++ b/mm/util.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 589ee62844e042b0b7d19ef57fb4cff77f3ca294 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 00:16:44 +0100 Subject: sched/headers: Prepare to remove the dependency from Update code that relied on sched.h including various MM types for them. This will allow us to remove the include from . Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/include/asm/a.out-core.h | 1 + arch/alpha/include/asm/mmu_context.h | 2 ++ arch/arc/mm/tlb.c | 2 ++ arch/arm/include/asm/mmu_context.h | 2 ++ arch/arm/include/asm/tlbflush.h | 7 ++++--- arch/arm/kernel/suspend.c | 1 + arch/arm/kernel/swp_emulate.c | 1 + arch/arm/mm/idmap.c | 1 + arch/arm64/include/asm/mmu_context.h | 1 + arch/arm64/kernel/traps.c | 1 + arch/avr32/include/asm/mmu_context.h | 2 ++ arch/blackfin/include/asm/mmu_context.h | 2 ++ arch/blackfin/kernel/flat.c | 1 + arch/blackfin/kernel/process.c | 1 + arch/blackfin/mm/sram-alloc.c | 2 ++ arch/cris/arch-v10/mm/tlb.c | 2 ++ arch/cris/arch-v32/mm/tlb.c | 1 + arch/cris/include/asm/pgtable.h | 2 +- arch/cris/mm/tlb.c | 2 ++ arch/h8300/kernel/traps.c | 1 + arch/hexagon/include/asm/mmu_context.h | 2 ++ arch/hexagon/kernel/smp.c | 1 + arch/ia64/include/asm/mmu_context.h | 1 + arch/ia64/include/asm/pgtable.h | 2 +- arch/ia64/sn/kernel/sn2/sn2_smp.c | 1 + arch/m32r/include/asm/mmu_context.h | 2 ++ arch/m68k/include/asm/a.out-core.h | 1 + arch/m68k/include/asm/mmu_context.h | 1 + arch/metag/include/asm/mmu_context.h | 1 + arch/microblaze/include/asm/mmu_context_mm.h | 2 ++ arch/microblaze/mm/pgtable.c | 1 + arch/mips/include/asm/elf.h | 2 ++ arch/mips/include/asm/mmu_context.h | 2 ++ arch/mips/kernel/smp.c | 2 +- arch/mips/math-emu/dsemul.c | 1 + arch/mips/mm/ioremap.c | 1 + arch/mn10300/include/asm/mmu_context.h | 2 ++ arch/mn10300/kernel/smp.c | 2 +- arch/mn10300/mm/tlb-smp.c | 2 +- arch/nios2/include/asm/mmu_context.h | 2 ++ arch/nios2/kernel/process.c | 1 + arch/powerpc/kernel/io-workarounds.c | 2 +- arch/powerpc/kvm/e500_mmu_host.c | 2 +- arch/powerpc/lib/feature-fixups.c | 1 + arch/powerpc/mm/hash_utils_64.c | 2 +- arch/powerpc/mm/pgtable-book3s64.c | 2 ++ arch/powerpc/mm/pgtable-hash64.c | 2 ++ arch/powerpc/mm/pgtable-radix.c | 2 +- arch/powerpc/mm/slb.c | 2 ++ arch/s390/include/asm/elf.h | 2 +- arch/s390/include/asm/mmu_context.h | 1 + arch/s390/kernel/processor.c | 1 + arch/s390/kvm/gaccess.c | 2 ++ arch/s390/kvm/priv.c | 2 ++ arch/score/include/asm/mmu_context.h | 2 ++ arch/score/kernel/traps.c | 1 + arch/sh/include/asm/mmu_context.h | 2 ++ arch/sparc/include/asm/mmu_context_64.h | 2 ++ arch/sparc/include/asm/pgtable_64.h | 3 +++ arch/sparc/kernel/asm-offsets.c | 1 + arch/sparc/kernel/traps_32.c | 1 + arch/sparc/mm/tsb.c | 2 ++ arch/tile/include/asm/mmu_context.h | 2 ++ arch/um/include/asm/mmu_context.h | 2 ++ arch/um/kernel/exec.c | 2 +- arch/um/kernel/reboot.c | 1 + arch/um/kernel/skas/process.c | 3 ++- arch/x86/entry/vsyscall/vsyscall_64.c | 1 + arch/x86/events/core.c | 2 +- arch/x86/include/asm/a.out-core.h | 2 ++ arch/x86/include/asm/mpx.h | 2 ++ arch/x86/mm/mpx.c | 1 + arch/x86/um/syscalls_64.c | 1 + arch/x86/xen/mmu.c | 2 +- arch/xtensa/include/asm/mmu_context.h | 1 + drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c | 2 ++ drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c | 2 ++ drivers/infiniband/hw/cxgb3/iwch_provider.c | 2 +- drivers/infiniband/hw/cxgb4/iw_cxgb4.h | 2 +- drivers/media/v4l2-core/videobuf-dma-sg.c | 2 +- fs/binfmt_misc.c | 2 +- fs/kernfs/file.c | 2 +- include/drm/drm_mm.h | 1 + include/linux/init_task.h | 1 + include/linux/sched/mm.h | 1 + kernel/sched/debug.c | 2 +- kernel/sched/fair.c | 2 +- kernel/signal.c | 2 +- lib/is_single_threaded.c | 1 + 89 files changed, 125 insertions(+), 25 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/include/asm/a.out-core.h b/arch/alpha/include/asm/a.out-core.h index 9e33e92e524c..1610d078b064 100644 --- a/arch/alpha/include/asm/a.out-core.h +++ b/arch/alpha/include/asm/a.out-core.h @@ -15,6 +15,7 @@ #ifdef __KERNEL__ #include +#include /* * Fill in the user structure for an ECOFF core dump. diff --git a/arch/alpha/include/asm/mmu_context.h b/arch/alpha/include/asm/mmu_context.h index 4c51c05333c6..384bd47b5187 100644 --- a/arch/alpha/include/asm/mmu_context.h +++ b/arch/alpha/include/asm/mmu_context.h @@ -7,6 +7,8 @@ * Copyright (C) 1996, Linus Torvalds */ +#include + #include #include #include diff --git a/arch/arc/mm/tlb.c b/arch/arc/mm/tlb.c index bdb295e09160..d0126fdfe2d8 100644 --- a/arch/arc/mm/tlb.c +++ b/arch/arc/mm/tlb.c @@ -53,6 +53,8 @@ #include #include +#include + #include #include #include diff --git a/arch/arm/include/asm/mmu_context.h b/arch/arm/include/asm/mmu_context.h index 3cc14dd8587c..7f303295ef19 100644 --- a/arch/arm/include/asm/mmu_context.h +++ b/arch/arm/include/asm/mmu_context.h @@ -15,7 +15,9 @@ #include #include +#include #include + #include #include #include diff --git a/arch/arm/include/asm/tlbflush.h b/arch/arm/include/asm/tlbflush.h index def9e570199f..1897b5196fb5 100644 --- a/arch/arm/include/asm/tlbflush.h +++ b/arch/arm/include/asm/tlbflush.h @@ -10,6 +10,10 @@ #ifndef _ASMARM_TLBFLUSH_H #define _ASMARM_TLBFLUSH_H +#ifndef __ASSEMBLY__ +# include +#endif + #ifdef CONFIG_MMU #include @@ -644,9 +648,6 @@ static inline void update_mmu_cache(struct vm_area_struct *vma, #elif defined(CONFIG_SMP) /* !CONFIG_MMU */ #ifndef __ASSEMBLY__ - -#include - static inline void local_flush_tlb_all(void) { } static inline void local_flush_tlb_mm(struct mm_struct *mm) { } static inline void local_flush_tlb_page(struct vm_area_struct *vma, unsigned long uaddr) { } diff --git a/arch/arm/kernel/suspend.c b/arch/arm/kernel/suspend.c index 9a2f882a0a2d..ef794c799cb6 100644 --- a/arch/arm/kernel/suspend.c +++ b/arch/arm/kernel/suspend.c @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/arch/arm/kernel/swp_emulate.c b/arch/arm/kernel/swp_emulate.c index 853221f81104..3bda08bee674 100644 --- a/arch/arm/kernel/swp_emulate.c +++ b/arch/arm/kernel/swp_emulate.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm/mm/idmap.c b/arch/arm/mm/idmap.c index c1a48f88764e..3e511bec69b8 100644 --- a/arch/arm/mm/idmap.c +++ b/arch/arm/mm/idmap.c @@ -1,6 +1,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/include/asm/mmu_context.h b/arch/arm64/include/asm/mmu_context.h index 3c9f7d18a7e6..3257895a9b5e 100644 --- a/arch/arm64/include/asm/mmu_context.h +++ b/arch/arm64/include/asm/mmu_context.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/kernel/traps.c b/arch/arm64/kernel/traps.c index bdbd0c3febf4..e52be6aa44ee 100644 --- a/arch/arm64/kernel/traps.c +++ b/arch/arm64/kernel/traps.c @@ -33,6 +33,7 @@ #include #include #include +#include #include #include diff --git a/arch/avr32/include/asm/mmu_context.h b/arch/avr32/include/asm/mmu_context.h index 27ff23407100..cd87abba8db7 100644 --- a/arch/avr32/include/asm/mmu_context.h +++ b/arch/avr32/include/asm/mmu_context.h @@ -12,6 +12,8 @@ #ifndef __ASM_AVR32_MMU_CONTEXT_H #define __ASM_AVR32_MMU_CONTEXT_H +#include + #include #include #include diff --git a/arch/blackfin/include/asm/mmu_context.h b/arch/blackfin/include/asm/mmu_context.h index 15b16d3e8de8..0ce6de873b27 100644 --- a/arch/blackfin/include/asm/mmu_context.h +++ b/arch/blackfin/include/asm/mmu_context.h @@ -9,6 +9,8 @@ #include #include +#include + #include #include #include diff --git a/arch/blackfin/kernel/flat.c b/arch/blackfin/kernel/flat.c index a88daddbf074..b5b658449616 100644 --- a/arch/blackfin/kernel/flat.c +++ b/arch/blackfin/kernel/flat.c @@ -6,6 +6,7 @@ #include #include +#include #include #define FLAT_BFIN_RELOC_TYPE_16_BIT 0 diff --git a/arch/blackfin/kernel/process.c b/arch/blackfin/kernel/process.c index b691ef875a40..89d5162d4ca6 100644 --- a/arch/blackfin/kernel/process.c +++ b/arch/blackfin/kernel/process.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/blackfin/mm/sram-alloc.c b/arch/blackfin/mm/sram-alloc.c index 1f3b3ef3e103..d2a96c2c02a3 100644 --- a/arch/blackfin/mm/sram-alloc.c +++ b/arch/blackfin/mm/sram-alloc.c @@ -19,6 +19,8 @@ #include #include #include +#include + #include #include #include "blackfin_sram.h" diff --git a/arch/cris/arch-v10/mm/tlb.c b/arch/cris/arch-v10/mm/tlb.c index 21d78c599bab..3225d38bdaea 100644 --- a/arch/cris/arch-v10/mm/tlb.c +++ b/arch/cris/arch-v10/mm/tlb.c @@ -10,6 +10,8 @@ * */ +#include + #include #include #include diff --git a/arch/cris/arch-v32/mm/tlb.c b/arch/cris/arch-v32/mm/tlb.c index c030d020660a..bc3de5b5e27c 100644 --- a/arch/cris/arch-v32/mm/tlb.c +++ b/arch/cris/arch-v32/mm/tlb.c @@ -6,6 +6,7 @@ * Authors: Bjorn Wesen * Tobias Anderberg , CRISv32 port. */ +#include #include #include diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h index ceefc314d64d..2a3210ba4c72 100644 --- a/arch/cris/include/asm/pgtable.h +++ b/arch/cris/include/asm/pgtable.h @@ -9,7 +9,7 @@ #include #ifndef __ASSEMBLY__ -#include +#include #include #endif #include diff --git a/arch/cris/mm/tlb.c b/arch/cris/mm/tlb.c index b7f8de576777..8413741cfa0f 100644 --- a/arch/cris/mm/tlb.c +++ b/arch/cris/mm/tlb.c @@ -9,6 +9,8 @@ #include #include +#include + #include #define D(x) diff --git a/arch/h8300/kernel/traps.c b/arch/h8300/kernel/traps.c index ee7e3ce40d78..e47a9e0dc278 100644 --- a/arch/h8300/kernel/traps.c +++ b/arch/h8300/kernel/traps.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/hexagon/include/asm/mmu_context.h b/arch/hexagon/include/asm/mmu_context.h index d423d2e73c30..d8a071afdd1d 100644 --- a/arch/hexagon/include/asm/mmu_context.h +++ b/arch/hexagon/include/asm/mmu_context.h @@ -21,6 +21,8 @@ #ifndef _ASM_MMU_CONTEXT_H #define _ASM_MMU_CONTEXT_H +#include + #include #include #include diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index c02a6455839e..1f63e91a353b 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c @@ -29,6 +29,7 @@ #include #include #include +#include #include /* timer_interrupt */ #include diff --git a/arch/ia64/include/asm/mmu_context.h b/arch/ia64/include/asm/mmu_context.h index 7f2a456603cb..9b99368633b5 100644 --- a/arch/ia64/include/asm/mmu_context.h +++ b/arch/ia64/include/asm/mmu_context.h @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 9f3ed9ee8f13..384794e665fc 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -147,7 +147,7 @@ # ifndef __ASSEMBLY__ -#include /* for mm_struct */ +#include /* for mm_struct */ #include #include #include diff --git a/arch/ia64/sn/kernel/sn2/sn2_smp.c b/arch/ia64/sn/kernel/sn2/sn2_smp.c index c98dc965fe82..b73b0ebf8214 100644 --- a/arch/ia64/sn/kernel/sn2/sn2_smp.c +++ b/arch/ia64/sn/kernel/sn2/sn2_smp.c @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/m32r/include/asm/mmu_context.h b/arch/m32r/include/asm/mmu_context.h index 9fc78fc44445..1230b7050d8e 100644 --- a/arch/m32r/include/asm/mmu_context.h +++ b/arch/m32r/include/asm/mmu_context.h @@ -12,6 +12,8 @@ #ifndef __ASSEMBLY__ #include +#include + #include #include #include diff --git a/arch/m68k/include/asm/a.out-core.h b/arch/m68k/include/asm/a.out-core.h index f6bfc1d63ff6..ae91ea6bb303 100644 --- a/arch/m68k/include/asm/a.out-core.h +++ b/arch/m68k/include/asm/a.out-core.h @@ -16,6 +16,7 @@ #include #include +#include /* * fill in the user structure for an a.out core dump diff --git a/arch/m68k/include/asm/mmu_context.h b/arch/m68k/include/asm/mmu_context.h index dc3be991d634..4a6ae6dffa34 100644 --- a/arch/m68k/include/asm/mmu_context.h +++ b/arch/m68k/include/asm/mmu_context.h @@ -2,6 +2,7 @@ #define __M68K_MMU_CONTEXT_H #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) { diff --git a/arch/metag/include/asm/mmu_context.h b/arch/metag/include/asm/mmu_context.h index ae2a71b5e0be..2e0312748197 100644 --- a/arch/metag/include/asm/mmu_context.h +++ b/arch/metag/include/asm/mmu_context.h @@ -9,6 +9,7 @@ #include #include +#include static inline void enter_lazy_tlb(struct mm_struct *mm, struct task_struct *tsk) diff --git a/arch/microblaze/include/asm/mmu_context_mm.h b/arch/microblaze/include/asm/mmu_context_mm.h index d68647746448..99472d2ca340 100644 --- a/arch/microblaze/include/asm/mmu_context_mm.h +++ b/arch/microblaze/include/asm/mmu_context_mm.h @@ -12,6 +12,8 @@ #define _ASM_MICROBLAZE_MMU_CONTEXT_H #include +#include + #include #include #include diff --git a/arch/microblaze/mm/pgtable.c b/arch/microblaze/mm/pgtable.c index cc732fe357ad..4c0599239915 100644 --- a/arch/microblaze/mm/pgtable.c +++ b/arch/microblaze/mm/pgtable.c @@ -31,6 +31,7 @@ #include #include #include +#include #include #include diff --git a/arch/mips/include/asm/elf.h b/arch/mips/include/asm/elf.h index 7a6c466e5f2a..0eb1a75be105 100644 --- a/arch/mips/include/asm/elf.h +++ b/arch/mips/include/asm/elf.h @@ -10,6 +10,8 @@ #include #include +#include + #include #include diff --git a/arch/mips/include/asm/mmu_context.h b/arch/mips/include/asm/mmu_context.h index 2abf94f72c0a..da2004cef2d5 100644 --- a/arch/mips/include/asm/mmu_context.h +++ b/arch/mips/include/asm/mmu_context.h @@ -13,8 +13,10 @@ #include #include +#include #include #include + #include #include #include diff --git a/arch/mips/kernel/smp.c b/arch/mips/kernel/smp.c index 8c60a296294c..6e71130549ea 100644 --- a/arch/mips/kernel/smp.c +++ b/arch/mips/kernel/smp.c @@ -28,7 +28,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/math-emu/dsemul.c b/arch/mips/math-emu/dsemul.c index c4469ff4a996..6664908514b3 100644 --- a/arch/mips/math-emu/dsemul.c +++ b/arch/mips/math-emu/dsemul.c @@ -1,5 +1,6 @@ #include #include +#include #include #include diff --git a/arch/mips/mm/ioremap.c b/arch/mips/mm/ioremap.c index 1f189627440f..1986e09fb457 100644 --- a/arch/mips/mm/ioremap.c +++ b/arch/mips/mm/ioremap.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/mn10300/include/asm/mmu_context.h b/arch/mn10300/include/asm/mmu_context.h index 75dbe696f830..d2034f5e6eda 100644 --- a/arch/mn10300/include/asm/mmu_context.h +++ b/arch/mn10300/include/asm/mmu_context.h @@ -23,6 +23,8 @@ #define _ASM_MMU_CONTEXT_H #include +#include + #include #include #include diff --git a/arch/mn10300/kernel/smp.c b/arch/mn10300/kernel/smp.c index d924f7a79708..35d2c3fe6f76 100644 --- a/arch/mn10300/kernel/smp.c +++ b/arch/mn10300/kernel/smp.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mn10300/mm/tlb-smp.c b/arch/mn10300/mm/tlb-smp.c index 9a39ea9031d4..085f2bb691ac 100644 --- a/arch/mn10300/mm/tlb-smp.c +++ b/arch/mn10300/mm/tlb-smp.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/nios2/include/asm/mmu_context.h b/arch/nios2/include/asm/mmu_context.h index 294b4b1f81d4..78ab3dacf579 100644 --- a/arch/nios2/include/asm/mmu_context.h +++ b/arch/nios2/include/asm/mmu_context.h @@ -13,6 +13,8 @@ #ifndef _ASM_NIOS2_MMU_CONTEXT_H #define _ASM_NIOS2_MMU_CONTEXT_H +#include + #include extern void mmu_context_init(void); diff --git a/arch/nios2/kernel/process.c b/arch/nios2/kernel/process.c index 869a4d59de32..509e7855e8dc 100644 --- a/arch/nios2/kernel/process.c +++ b/arch/nios2/kernel/process.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/arch/powerpc/kernel/io-workarounds.c b/arch/powerpc/kernel/io-workarounds.c index 5f8613ceb97f..a582e0d42525 100644 --- a/arch/powerpc/kernel/io-workarounds.c +++ b/arch/powerpc/kernel/io-workarounds.c @@ -12,7 +12,7 @@ #undef DEBUG #include -#include /* for init_mm */ +#include /* for init_mm */ #include #include diff --git a/arch/powerpc/kvm/e500_mmu_host.c b/arch/powerpc/kvm/e500_mmu_host.c index b0333cc737dd..0fda4230f6c0 100644 --- a/arch/powerpc/kvm/e500_mmu_host.c +++ b/arch/powerpc/kvm/e500_mmu_host.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/lib/feature-fixups.c b/arch/powerpc/lib/feature-fixups.c index 043415f0bdb1..f3917705c686 100644 --- a/arch/powerpc/lib/feature-fixups.c +++ b/arch/powerpc/lib/feature-fixups.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/mm/hash_utils_64.c b/arch/powerpc/mm/hash_utils_64.c index 12d679df50bd..c554768b1fa2 100644 --- a/arch/powerpc/mm/hash_utils_64.c +++ b/arch/powerpc/mm/hash_utils_64.c @@ -23,7 +23,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/mm/pgtable-book3s64.c b/arch/powerpc/mm/pgtable-book3s64.c index b798ff674fab..5fcb3dd74c13 100644 --- a/arch/powerpc/mm/pgtable-book3s64.c +++ b/arch/powerpc/mm/pgtable-book3s64.c @@ -8,6 +8,8 @@ */ #include +#include + #include #include diff --git a/arch/powerpc/mm/pgtable-hash64.c b/arch/powerpc/mm/pgtable-hash64.c index c23e286a6b8f..8b85a14b08ea 100644 --- a/arch/powerpc/mm/pgtable-hash64.c +++ b/arch/powerpc/mm/pgtable-hash64.c @@ -10,6 +10,8 @@ */ #include +#include + #include #include diff --git a/arch/powerpc/mm/pgtable-radix.c b/arch/powerpc/mm/pgtable-radix.c index feeda90cd06d..2a590a98e652 100644 --- a/arch/powerpc/mm/pgtable-radix.c +++ b/arch/powerpc/mm/pgtable-radix.c @@ -8,7 +8,7 @@ * as published by the Free Software Foundation; either version * 2 of the License, or (at your option) any later version. */ -#include +#include #include #include diff --git a/arch/powerpc/mm/slb.c b/arch/powerpc/mm/slb.c index 48fc28bab544..5e01b2ece1d0 100644 --- a/arch/powerpc/mm/slb.c +++ b/arch/powerpc/mm/slb.c @@ -22,6 +22,8 @@ #include #include #include +#include + #include #include diff --git a/arch/s390/include/asm/elf.h b/arch/s390/include/asm/elf.h index 83aaefed2a7b..1d48880b3cc1 100644 --- a/arch/s390/include/asm/elf.h +++ b/arch/s390/include/asm/elf.h @@ -132,7 +132,7 @@ typedef s390_fp_regs compat_elf_fpregset_t; typedef s390_compat_regs compat_elf_gregset_t; #include -#include /* for task_struct */ +#include /* for task_struct */ #include #include diff --git a/arch/s390/include/asm/mmu_context.h b/arch/s390/include/asm/mmu_context.h index 9b828c073176..6e31d87fb669 100644 --- a/arch/s390/include/asm/mmu_context.h +++ b/arch/s390/include/asm/mmu_context.h @@ -9,6 +9,7 @@ #include #include +#include #include #include diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index bc2b60dcb178..bf7854523831 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/s390/kvm/gaccess.c b/arch/s390/kvm/gaccess.c index 4492c9363178..d55c829a5944 100644 --- a/arch/s390/kvm/gaccess.c +++ b/arch/s390/kvm/gaccess.c @@ -6,7 +6,9 @@ */ #include +#include #include + #include #include #include "kvm-s390.h" diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index fb4b494cde9b..64b6a309f2c4 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -15,6 +15,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/score/include/asm/mmu_context.h b/arch/score/include/asm/mmu_context.h index 2644577c96e8..073f95d350de 100644 --- a/arch/score/include/asm/mmu_context.h +++ b/arch/score/include/asm/mmu_context.h @@ -3,7 +3,9 @@ #include #include +#include #include + #include #include diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index 8a54d320fb41..fe68de6c746c 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -26,6 +26,7 @@ #include #include #include +#include #include #include diff --git a/arch/sh/include/asm/mmu_context.h b/arch/sh/include/asm/mmu_context.h index 35ffdd081d26..eb6ac3c10c44 100644 --- a/arch/sh/include/asm/mmu_context.h +++ b/arch/sh/include/asm/mmu_context.h @@ -11,6 +11,8 @@ #include #include #include +#include + #include #include diff --git a/arch/sparc/include/asm/mmu_context_64.h b/arch/sparc/include/asm/mmu_context_64.h index d0317993e947..22fede6eba11 100644 --- a/arch/sparc/include/asm/mmu_context_64.h +++ b/arch/sparc/include/asm/mmu_context_64.h @@ -6,6 +6,8 @@ #ifndef __ASSEMBLY__ #include +#include + #include #include diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 7932a4a37817..56e49c8f770d 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -878,6 +878,9 @@ static inline unsigned long pud_pfn(pud_t pud) #define pte_offset_map pte_index #define pte_unmap(pte) do { } while (0) +/* We cannot include at this point yet: */ +extern struct mm_struct init_mm; + /* Actual page table PTE updates. */ void tlb_batch_add(struct mm_struct *mm, unsigned long vaddr, pte_t *ptep, pte_t orig, int fullmm, diff --git a/arch/sparc/kernel/asm-offsets.c b/arch/sparc/kernel/asm-offsets.c index f76389a32342..3f09e1c83f58 100644 --- a/arch/sparc/kernel/asm-offsets.c +++ b/arch/sparc/kernel/asm-offsets.c @@ -11,6 +11,7 @@ */ #include +#include // #include #include diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index 26740a2f285d..2de72a49308a 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -11,6 +11,7 @@ #include /* for jiffies */ #include +#include #include #include #include diff --git a/arch/sparc/mm/tsb.c b/arch/sparc/mm/tsb.c index 23479c3d39f0..0a04811f06b7 100644 --- a/arch/sparc/mm/tsb.c +++ b/arch/sparc/mm/tsb.c @@ -6,6 +6,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/tile/include/asm/mmu_context.h b/arch/tile/include/asm/mmu_context.h index f67753db1f78..45a4b4c424cf 100644 --- a/arch/tile/include/asm/mmu_context.h +++ b/arch/tile/include/asm/mmu_context.h @@ -16,6 +16,8 @@ #define _ASM_TILE_MMU_CONTEXT_H #include +#include + #include #include #include diff --git a/arch/um/include/asm/mmu_context.h b/arch/um/include/asm/mmu_context.h index 1a60e1328e2f..94ac2739918c 100644 --- a/arch/um/include/asm/mmu_context.h +++ b/arch/um/include/asm/mmu_context.h @@ -7,6 +7,8 @@ #define __UM_MMU_CONTEXT_H #include +#include + #include extern void uml_setup_stubs(struct mm_struct *mm); diff --git a/arch/um/kernel/exec.c b/arch/um/kernel/exec.c index 31968677a0d0..a43d42bf0a86 100644 --- a/arch/um/kernel/exec.c +++ b/arch/um/kernel/exec.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/um/kernel/reboot.c b/arch/um/kernel/reboot.c index 0bc921cee0b1..71f3e9217cf2 100644 --- a/arch/um/kernel/reboot.c +++ b/arch/um/kernel/reboot.c @@ -5,6 +5,7 @@ #include #include +#include #include #include #include diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index ddecf326dab2..3a952a4f7965 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -4,8 +4,9 @@ */ #include -#include +#include #include + #include #include #include diff --git a/arch/x86/entry/vsyscall/vsyscall_64.c b/arch/x86/entry/vsyscall/vsyscall_64.c index df91fb393a01..ce1d7534fa53 100644 --- a/arch/x86/entry/vsyscall/vsyscall_64.c +++ b/arch/x86/entry/vsyscall/vsyscall_64.c @@ -28,6 +28,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 2ecc0e97772b..349d4d17aa7f 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/include/asm/a.out-core.h b/arch/x86/include/asm/a.out-core.h index 7a15588e45d4..7d3ece8bfb61 100644 --- a/arch/x86/include/asm/a.out-core.h +++ b/arch/x86/include/asm/a.out-core.h @@ -17,6 +17,8 @@ #include #include +#include + #include /* diff --git a/arch/x86/include/asm/mpx.h b/arch/x86/include/asm/mpx.h index 0b416d4cf73b..a0d662be4c5b 100644 --- a/arch/x86/include/asm/mpx.h +++ b/arch/x86/include/asm/mpx.h @@ -2,6 +2,8 @@ #define _ASM_X86_MPX_H #include +#include + #include #include diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index c98079684bdb..5126dfd52b18 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -7,6 +7,7 @@ */ #include #include +#include #include #include diff --git a/arch/x86/um/syscalls_64.c b/arch/x86/um/syscalls_64.c index e6552275320b..10d907098c26 100644 --- a/arch/x86/um/syscalls_64.c +++ b/arch/x86/um/syscalls_64.c @@ -6,6 +6,7 @@ */ #include +#include #include #include /* XXX This should get the constants from libc */ #include diff --git a/arch/x86/xen/mmu.c b/arch/x86/xen/mmu.c index f6740b5b1738..37cb5aad71de 100644 --- a/arch/x86/xen/mmu.c +++ b/arch/x86/xen/mmu.c @@ -38,7 +38,7 @@ * * Jeremy Fitzhardinge , XenSource Inc, 2007 */ -#include +#include #include #include #include diff --git a/arch/xtensa/include/asm/mmu_context.h b/arch/xtensa/include/asm/mmu_context.h index 04c8ebdc4517..f7e186dfc4e4 100644 --- a/arch/xtensa/include/asm/mmu_context.h +++ b/arch/xtensa/include/asm/mmu_context.h @@ -17,6 +17,7 @@ #include #include +#include #include diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c index d83de985e88c..6acc4313363e 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_cik.c @@ -23,6 +23,8 @@ #include #include +#include + #include "kfd_priv.h" #include "kfd_mqd_manager.h" #include "cik_regs.h" diff --git a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c index fa32c32fa1c2..a9b9882a9a77 100644 --- a/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c +++ b/drivers/gpu/drm/amd/amdkfd/kfd_mqd_manager_vi.c @@ -23,6 +23,8 @@ #include #include +#include + #include "kfd_priv.h" #include "kfd_mqd_manager.h" #include "vi_structs.h" diff --git a/drivers/infiniband/hw/cxgb3/iwch_provider.c b/drivers/infiniband/hw/cxgb3/iwch_provider.c index 318ec5267bdf..86ecd3ea6a4b 100644 --- a/drivers/infiniband/hw/cxgb3/iwch_provider.c +++ b/drivers/infiniband/hw/cxgb3/iwch_provider.c @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h index d19662f635b1..5846c47c8d55 100644 --- a/drivers/infiniband/hw/cxgb4/iw_cxgb4.h +++ b/drivers/infiniband/hw/cxgb4/iw_cxgb4.h @@ -37,7 +37,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/drivers/media/v4l2-core/videobuf-dma-sg.c b/drivers/media/v4l2-core/videobuf-dma-sg.c index 36bd904946bd..0b5c43f7e020 100644 --- a/drivers/media/v4l2-core/videobuf-dma-sg.c +++ b/drivers/media/v4l2-core/videobuf-dma-sg.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include diff --git a/fs/binfmt_misc.c b/fs/binfmt_misc.c index 9b4688ab1d8e..bee1a36bc2ec 100644 --- a/fs/binfmt_misc.c +++ b/fs/binfmt_misc.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/kernfs/file.c b/fs/kernfs/file.c index 35043a8c4529..8e4dc7ab584c 100644 --- a/fs/kernfs/file.c +++ b/fs/kernfs/file.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include "kernfs-internal.h" diff --git a/include/drm/drm_mm.h b/include/drm/drm_mm.h index d81b0ba9921f..2ef16bf25826 100644 --- a/include/drm/drm_mm.h +++ b/include/drm/drm_mm.h @@ -40,6 +40,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_DRM_DEBUG_MM diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 452c9799318c..f6841c19c913 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -15,6 +15,7 @@ #include #include #include +#include #include diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 1cf7941bb946..d32e3932b2e3 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -2,6 +2,7 @@ #define _LINUX_SCHED_MM_H #include +#include #include #endif /* _LINUX_SCHED_MM_H */ diff --git a/kernel/sched/debug.c b/kernel/sched/debug.c index 109adc0e9cb9..e865d8bfb881 100644 --- a/kernel/sched/debug.c +++ b/kernel/sched/debug.c @@ -11,7 +11,7 @@ */ #include -#include +#include #include #include #include diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 11e0ab57748a..3e88b35ac157 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -20,7 +20,7 @@ * Copyright (C) 2007 Red Hat, Inc., Peter Zijlstra */ -#include +#include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index e6d470a5f75a..2008aa999976 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -13,7 +13,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c index 9745cfffcb69..8d4678edcc0e 100644 --- a/lib/is_single_threaded.c +++ b/lib/is_single_threaded.c @@ -10,6 +10,7 @@ * 2 of the Licence, or (at your option) any later version. */ #include +#include /* * Returns true if the task does not share ->mm with another thread/process. -- cgit v1.2.3 From 9164bb4a18dfa592cd0aca455ea57abf89ca4526 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 01:20:53 +0100 Subject: sched/headers: Prepare to move 'init_task' and 'init_thread_union' from to Update all usage sites first. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/arm64/kernel/setup.c | 1 + arch/arm64/mm/kasan_init.c | 1 + arch/frv/mm/init.c | 1 + arch/metag/mm/init.c | 1 + arch/nios2/kernel/setup.c | 1 + arch/powerpc/kernel/paca.c | 1 + arch/um/kernel/skas/process.c | 1 + arch/um/kernel/um_arch.c | 2 ++ arch/x86/kernel/cpu/common.c | 1 + arch/x86/mm/kasan_init_64.c | 1 + fs/namespace.c | 2 ++ include/linux/sched/signal.h | 1 + init/init_task.c | 1 + kernel/delayacct.c | 1 + lib/is_single_threaded.c | 1 + mm/vmacache.c | 1 + 16 files changed, 18 insertions(+) (limited to 'arch/x86') diff --git a/arch/arm64/kernel/setup.c b/arch/arm64/kernel/setup.c index 952e2c0dabd5..42274bda0ccb 100644 --- a/arch/arm64/kernel/setup.c +++ b/arch/arm64/kernel/setup.c @@ -42,6 +42,7 @@ #include #include #include +#include #include #include diff --git a/arch/arm64/mm/kasan_init.c b/arch/arm64/mm/kasan_init.c index 201d918e7575..55d1e9205543 100644 --- a/arch/arm64/mm/kasan_init.c +++ b/arch/arm64/mm/kasan_init.c @@ -13,6 +13,7 @@ #define pr_fmt(fmt) "kasan: " fmt #include #include +#include #include #include #include diff --git a/arch/frv/mm/init.c b/arch/frv/mm/init.c index 88a159743528..328f0a292316 100644 --- a/arch/frv/mm/init.c +++ b/arch/frv/mm/init.c @@ -18,6 +18,7 @@ #include #include +#include #include #include #include diff --git a/arch/metag/mm/init.c b/arch/metag/mm/init.c index c0ec116b3993..188d4d9fbed4 100644 --- a/arch/metag/mm/init.c +++ b/arch/metag/mm/init.c @@ -12,6 +12,7 @@ #include #include #include +#include #include #include diff --git a/arch/nios2/kernel/setup.c b/arch/nios2/kernel/setup.c index a3fa80d1aacc..6e57ffa5db27 100644 --- a/arch/nios2/kernel/setup.c +++ b/arch/nios2/kernel/setup.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/powerpc/kernel/paca.c b/arch/powerpc/kernel/paca.c index fa20060ff7a5..dfc479df9634 100644 --- a/arch/powerpc/kernel/paca.c +++ b/arch/powerpc/kernel/paca.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include diff --git a/arch/um/kernel/skas/process.c b/arch/um/kernel/skas/process.c index 3a952a4f7965..d4dbf08722d6 100644 --- a/arch/um/kernel/skas/process.c +++ b/arch/um/kernel/skas/process.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include diff --git a/arch/um/kernel/um_arch.c b/arch/um/kernel/um_arch.c index e8175a8aa22c..4b85acd4020c 100644 --- a/arch/um/kernel/um_arch.c +++ b/arch/um/kernel/um_arch.c @@ -11,7 +11,9 @@ #include #include #include +#include #include + #include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c7e2ca966386..f2fd8fefc589 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -9,6 +9,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 0493c17b8a51..8d63d7a104c3 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include diff --git a/fs/namespace.c b/fs/namespace.c index 131cd7b94f47..cc1375eff88c 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -25,6 +25,8 @@ #include #include #include +#include + #include "pnode.h" #include "internal.h" diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 7e10a7824523..320eca0446cd 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -5,5 +5,6 @@ #include #include #include +#include #endif /* _LINUX_SCHED_SIGNAL_H */ diff --git a/init/init_task.c b/init/init_task.c index 53d4ce942a88..66787e30a419 100644 --- a/init/init_task.c +++ b/init/init_task.c @@ -4,6 +4,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/delayacct.c b/kernel/delayacct.c index 660549656991..c94135fc2698 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -14,6 +14,7 @@ */ #include +#include #include #include #include diff --git a/lib/is_single_threaded.c b/lib/is_single_threaded.c index 8d4678edcc0e..9c7d89df40ed 100644 --- a/lib/is_single_threaded.c +++ b/lib/is_single_threaded.c @@ -10,6 +10,7 @@ * 2 of the Licence, or (at your option) any later version. */ #include +#include #include /* diff --git a/mm/vmacache.c b/mm/vmacache.c index 4355d34c68a6..7ffa0ee341b5 100644 --- a/mm/vmacache.c +++ b/mm/vmacache.c @@ -2,6 +2,7 @@ * Copyright (C) 2014 Davidlohr Bueso. */ #include +#include #include #include -- cgit v1.2.3 From b2d091031075ac9a1598e3cc3a29c28f02e64c0d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 4 Feb 2017 01:27:20 +0100 Subject: sched/headers: Prepare to use instead of in We don't actually need the full rculist.h header in sched.h anymore, we will be able to include the smaller rcupdate.h header instead. But first update code that relied on the implicit header inclusion. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/powerpc/kvm/book3s_mmu_hpte.c | 1 + arch/x86/kvm/irq_comm.c | 2 ++ arch/x86/kvm/page_track.c | 2 ++ drivers/md/bcache/btree.c | 2 ++ drivers/misc/vmw_vmci/vmci_event.c | 1 + drivers/nvme/target/admin-cmd.c | 2 ++ drivers/nvme/target/core.c | 2 ++ drivers/s390/cio/qdio_thinint.c | 2 ++ drivers/scsi/libfc/fc_disc.c | 2 ++ drivers/scsi/libfc/fc_rport.c | 2 ++ include/linux/dmar.h | 2 +- include/linux/pid.h | 2 +- include/linux/rhashtable.h | 2 +- include/linux/sched/signal.h | 1 + include/net/bluetooth/hci_core.h | 2 ++ kernel/trace/trace_events_hist.c | 1 + kernel/trace/trace_events_trigger.c | 1 + kernel/trace/trace_kprobe.c | 1 + kernel/trace/trace_uprobe.c | 1 + lib/bug.c | 1 + lib/rhashtable.c | 1 + net/mac80211/mesh_plink.c | 2 ++ net/mac802154/llsec.c | 2 ++ security/apparmor/policy.c | 1 + security/tomoyo/domain.c | 2 ++ security/tomoyo/group.c | 2 ++ security/tomoyo/util.c | 2 ++ 27 files changed, 41 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/powerpc/kvm/book3s_mmu_hpte.c b/arch/powerpc/kvm/book3s_mmu_hpte.c index 5a1ab1250a05..905a934c1ef4 100644 --- a/arch/powerpc/kvm/book3s_mmu_hpte.c +++ b/arch/powerpc/kvm/book3s_mmu_hpte.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include diff --git a/arch/x86/kvm/irq_comm.c b/arch/x86/kvm/irq_comm.c index b96d3893f121..6825cd36d13b 100644 --- a/arch/x86/kvm/irq_comm.c +++ b/arch/x86/kvm/irq_comm.c @@ -23,6 +23,8 @@ #include #include #include +#include + #include #include diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 4a1c13eaa518..37942e419c32 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -14,6 +14,8 @@ */ #include +#include + #include #include diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 384559af310e..450d0e848ae4 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -33,6 +33,8 @@ #include #include #include +#include + #include /* diff --git a/drivers/misc/vmw_vmci/vmci_event.c b/drivers/misc/vmw_vmci/vmci_event.c index 8449516d6ac6..84258a48029d 100644 --- a/drivers/misc/vmw_vmci/vmci_event.c +++ b/drivers/misc/vmw_vmci/vmci_event.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "vmci_driver.h" #include "vmci_event.h" diff --git a/drivers/nvme/target/admin-cmd.c b/drivers/nvme/target/admin-cmd.c index 94e524fea568..a7bcff45f437 100644 --- a/drivers/nvme/target/admin-cmd.c +++ b/drivers/nvme/target/admin-cmd.c @@ -13,6 +13,8 @@ */ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include +#include + #include #include #include "nvmet.h" diff --git a/drivers/nvme/target/core.c b/drivers/nvme/target/core.c index 5267ce20c12d..11b0a0a5f661 100644 --- a/drivers/nvme/target/core.c +++ b/drivers/nvme/target/core.c @@ -14,6 +14,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt #include #include +#include + #include "nvmet.h" static struct nvmet_fabrics_ops *nvmet_transports[NVMF_TRTYPE_MAX]; diff --git a/drivers/s390/cio/qdio_thinint.c b/drivers/s390/cio/qdio_thinint.c index 8ad98a902a91..c61164f4528e 100644 --- a/drivers/s390/cio/qdio_thinint.c +++ b/drivers/s390/cio/qdio_thinint.c @@ -8,6 +8,8 @@ #include #include #include +#include + #include #include #include diff --git a/drivers/scsi/libfc/fc_disc.c b/drivers/scsi/libfc/fc_disc.c index 6103231104da..fd501f8dbb11 100644 --- a/drivers/scsi/libfc/fc_disc.c +++ b/drivers/scsi/libfc/fc_disc.c @@ -36,6 +36,8 @@ #include #include #include +#include + #include #include diff --git a/drivers/scsi/libfc/fc_rport.c b/drivers/scsi/libfc/fc_rport.c index c991f3b822f8..b44c3136eb51 100644 --- a/drivers/scsi/libfc/fc_rport.c +++ b/drivers/scsi/libfc/fc_rport.c @@ -65,6 +65,8 @@ #include #include #include +#include + #include #include diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e9bc9292bd3a..e8ffba1052d3 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -26,7 +26,7 @@ #include #include #include -#include +#include struct acpi_dmar_header; diff --git a/include/linux/pid.h b/include/linux/pid.h index 298ead5512e5..4d179316e431 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -1,7 +1,7 @@ #ifndef _LINUX_PID_H #define _LINUX_PID_H -#include +#include enum pid_type { diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index f2e12a845910..092292b6675e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -25,7 +25,7 @@ #include #include #include -#include +#include /* * The end of the chain is marked with a special nulls marks which has diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 320eca0446cd..c6958a53fef3 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -1,6 +1,7 @@ #ifndef _LINUX_SCHED_SIGNAL_H #define _LINUX_SCHED_SIGNAL_H +#include #include #include #include diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h index 90708f68cc02..95ccc1eef558 100644 --- a/include/net/bluetooth/hci_core.h +++ b/include/net/bluetooth/hci_core.h @@ -26,6 +26,8 @@ #define __HCI_CORE_H #include +#include + #include #include diff --git a/kernel/trace/trace_events_hist.c b/kernel/trace/trace_events_hist.c index f3a960ed75a1..1c21d0e2a145 100644 --- a/kernel/trace/trace_events_hist.c +++ b/kernel/trace/trace_events_hist.c @@ -19,6 +19,7 @@ #include #include #include +#include #include "tracing_map.h" #include "trace.h" diff --git a/kernel/trace/trace_events_trigger.c b/kernel/trace/trace_events_trigger.c index 6721a1e89f39..f2ac9d44f6c4 100644 --- a/kernel/trace/trace_events_trigger.c +++ b/kernel/trace/trace_events_trigger.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "trace.h" diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index eadd96ef772f..5f688cc724f0 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -20,6 +20,7 @@ #include #include +#include #include "trace_probe.h" diff --git a/kernel/trace/trace_uprobe.c b/kernel/trace/trace_uprobe.c index f4379e772171..a7581fec9681 100644 --- a/kernel/trace/trace_uprobe.c +++ b/kernel/trace/trace_uprobe.c @@ -24,6 +24,7 @@ #include #include #include +#include #include "trace_probe.h" diff --git a/lib/bug.c b/lib/bug.c index bc3656e944d2..06edbbef0623 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -45,6 +45,7 @@ #include #include #include +#include extern const struct bug_entry __start___bug_table[], __stop___bug_table[]; diff --git a/lib/rhashtable.c b/lib/rhashtable.c index c5b9b9351cec..f8635fd57442 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c index fcba70e57073..953d71e784a9 100644 --- a/net/mac80211/mesh_plink.c +++ b/net/mac80211/mesh_plink.c @@ -9,6 +9,8 @@ #include #include #include +#include + #include "ieee80211_i.h" #include "rate.h" #include "mesh.h" diff --git a/net/mac802154/llsec.c b/net/mac802154/llsec.c index 6a3e1c2181d3..1e1c9b20bab7 100644 --- a/net/mac802154/llsec.c +++ b/net/mac802154/llsec.c @@ -18,6 +18,8 @@ #include #include #include +#include + #include #include diff --git a/security/apparmor/policy.c b/security/apparmor/policy.c index 462c5d36b871..def1fbd6bdfd 100644 --- a/security/apparmor/policy.c +++ b/security/apparmor/policy.c @@ -77,6 +77,7 @@ #include #include #include +#include #include #include "include/apparmor.h" diff --git a/security/tomoyo/domain.c b/security/tomoyo/domain.c index 838ffa78cfda..00d223e9fb37 100644 --- a/security/tomoyo/domain.c +++ b/security/tomoyo/domain.c @@ -5,8 +5,10 @@ */ #include "common.h" + #include #include +#include /* Variables definitions.*/ diff --git a/security/tomoyo/group.c b/security/tomoyo/group.c index 50092534ec54..944ad77d8fba 100644 --- a/security/tomoyo/group.c +++ b/security/tomoyo/group.c @@ -5,6 +5,8 @@ */ #include +#include + #include "common.h" /** diff --git a/security/tomoyo/util.c b/security/tomoyo/util.c index 5fe3679137ae..848317fea704 100644 --- a/security/tomoyo/util.c +++ b/security/tomoyo/util.c @@ -5,6 +5,8 @@ */ #include +#include + #include "common.h" /* Lock for protecting policy. */ -- cgit v1.2.3 From 32ef5517c298042ed58408545f475df43afe1f24 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 11:48:36 +0100 Subject: sched/headers: Prepare to move cputime functionality from into Introduce a trivial, mostly empty header to prepare for the moving of cputime functionality out of sched.h. Update all code that relies on these facilities. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/kernel/osf_sys.c | 1 + arch/ia64/kernel/time.c | 2 +- arch/powerpc/kernel/time.c | 2 +- arch/s390/kernel/idle.c | 2 +- arch/s390/kernel/vtime.c | 2 +- arch/x86/kernel/apm_32.c | 1 + arch/x86/kvm/hyperv.c | 2 ++ drivers/isdn/mISDN/stack.c | 1 + drivers/s390/cio/cio.c | 2 +- fs/binfmt_elf.c | 1 + fs/binfmt_elf_fdpic.c | 1 + fs/proc/array.c | 1 + fs/proc/stat.c | 2 +- include/linux/sched/cputime.h | 7 +++++++ kernel/acct.c | 2 ++ kernel/delayacct.c | 1 + kernel/exit.c | 1 + kernel/fork.c | 1 + kernel/sched/cputime.c | 2 +- kernel/sched/sched.h | 1 + kernel/signal.c | 1 + kernel/sys.c | 1 + kernel/time/itimer.c | 1 + kernel/time/posix-cpu-timers.c | 1 + kernel/tsacct.c | 1 + 25 files changed, 33 insertions(+), 7 deletions(-) create mode 100644 include/linux/sched/cputime.h (limited to 'arch/x86') diff --git a/arch/alpha/kernel/osf_sys.c b/arch/alpha/kernel/osf_sys.c index 73446baa632e..0b961093ca5c 100644 --- a/arch/alpha/kernel/osf_sys.c +++ b/arch/alpha/kernel/osf_sys.c @@ -14,6 +14,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/ia64/kernel/time.c b/arch/ia64/kernel/time.c index 144f9db7a876..aa7be020a904 100644 --- a/arch/ia64/kernel/time.c +++ b/arch/ia64/kernel/time.c @@ -22,7 +22,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/powerpc/kernel/time.c b/arch/powerpc/kernel/time.c index 37833c5dc274..07b90725855e 100644 --- a/arch/powerpc/kernel/time.c +++ b/arch/powerpc/kernel/time.c @@ -58,7 +58,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index fb07a70820af..9340b2a07935 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include "entry.h" diff --git a/arch/s390/kernel/vtime.c b/arch/s390/kernel/vtime.c index 31bd96e81167..c14fc9029912 100644 --- a/arch/s390/kernel/vtime.c +++ b/arch/s390/kernel/vtime.c @@ -6,7 +6,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/apm_32.c b/arch/x86/kernel/apm_32.c index dc04b30cbd60..5a414545e8a3 100644 --- a/arch/x86/kernel/apm_32.c +++ b/arch/x86/kernel/apm_32.c @@ -219,6 +219,7 @@ #include #include #include +#include #include #include #include diff --git a/arch/x86/kvm/hyperv.c b/arch/x86/kvm/hyperv.c index f701d4430727..ebae57ac5902 100644 --- a/arch/x86/kvm/hyperv.c +++ b/arch/x86/kvm/hyperv.c @@ -28,6 +28,8 @@ #include #include +#include + #include #include diff --git a/drivers/isdn/mISDN/stack.c b/drivers/isdn/mISDN/stack.c index 696f22fd5ab4..8b7faea2ddf8 100644 --- a/drivers/isdn/mISDN/stack.c +++ b/drivers/isdn/mISDN/stack.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include "core.h" diff --git a/drivers/s390/cio/cio.c b/drivers/s390/cio/cio.c index de6fccc13124..1b350665c823 100644 --- a/drivers/s390/cio/cio.c +++ b/drivers/s390/cio/cio.c @@ -29,7 +29,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/fs/binfmt_elf.c b/fs/binfmt_elf.c index 92c00a13b28b..5075fd5c62c8 100644 --- a/fs/binfmt_elf.c +++ b/fs/binfmt_elf.c @@ -37,6 +37,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/binfmt_elf_fdpic.c b/fs/binfmt_elf_fdpic.c index 6103a8149ccd..cf93a4fad012 100644 --- a/fs/binfmt_elf_fdpic.c +++ b/fs/binfmt_elf_fdpic.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/proc/array.c b/fs/proc/array.c index f3169b58af38..88c355574aa0 100644 --- a/fs/proc/array.c +++ b/fs/proc/array.c @@ -63,6 +63,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/proc/stat.c b/fs/proc/stat.c index b95556e036bb..bd4e55f4aa20 100644 --- a/fs/proc/stat.c +++ b/fs/proc/stat.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #ifndef arch_irq_stat_cpu diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h new file mode 100644 index 000000000000..6ed4fe43de28 --- /dev/null +++ b/include/linux/sched/cputime.h @@ -0,0 +1,7 @@ +#ifndef _LINUX_SCHED_CPUTIME_H +#define _LINUX_SCHED_CPUTIME_H + +#include +#include + +#endif /* _LINUX_SCHED_CPUTIME_H */ diff --git a/kernel/acct.c b/kernel/acct.c index ca9cb55b5855..5b1284370367 100644 --- a/kernel/acct.c +++ b/kernel/acct.c @@ -56,6 +56,8 @@ #include #include #include +#include + #include #include /* sector_div */ #include diff --git a/kernel/delayacct.c b/kernel/delayacct.c index c94135fc2698..4a1c33416b6a 100644 --- a/kernel/delayacct.c +++ b/kernel/delayacct.c @@ -15,6 +15,7 @@ #include #include +#include #include #include #include diff --git a/kernel/exit.c b/kernel/exit.c index 771c33fc9952..e126ebf2400c 100644 --- a/kernel/exit.c +++ b/kernel/exit.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/fork.c b/kernel/fork.c index c87b6f03c710..916e78004b8f 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -20,6 +20,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sched/cputime.c b/kernel/sched/cputime.c index 54031fa681e2..f3778e2b46c8 100644 --- a/kernel/sched/cputime.c +++ b/kernel/sched/cputime.c @@ -4,7 +4,7 @@ #include #include #include -#include +#include #include "sched.h" #ifdef CONFIG_IRQ_TIME_ACCOUNTING diff --git a/kernel/sched/sched.h b/kernel/sched/sched.h index b1f1c8443837..76e3af3f39f4 100644 --- a/kernel/sched/sched.h +++ b/kernel/sched/sched.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include diff --git a/kernel/signal.c b/kernel/signal.c index 2008aa999976..7e59ebc2c25e 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/sys.c b/kernel/sys.c index 7f1ecd2e41c1..7ff6d1b10cec 100644 --- a/kernel/sys.c +++ b/kernel/sys.c @@ -55,6 +55,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/itimer.c b/kernel/time/itimer.c index f6b961c5e58c..087d6a1279b8 100644 --- a/kernel/time/itimer.c +++ b/kernel/time/itimer.c @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/kernel/time/posix-cpu-timers.c b/kernel/time/posix-cpu-timers.c index a2475a9f57d8..4513ad16a253 100644 --- a/kernel/time/posix-cpu-timers.c +++ b/kernel/time/posix-cpu-timers.c @@ -3,6 +3,7 @@ */ #include +#include #include #include #include diff --git a/kernel/tsacct.c b/kernel/tsacct.c index d9a03b80b75d..370724b45391 100644 --- a/kernel/tsacct.c +++ b/kernel/tsacct.c @@ -19,6 +19,7 @@ #include #include #include +#include #include #include #include -- cgit v1.2.3 From 3905f9ad455e0fd2ddb557566c5561b4a3027c07 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sun, 5 Feb 2017 12:07:04 +0100 Subject: sched/headers: Prepare to move sched_info_on() and force_schedstat_enabled() from to But first update usage sites with the new header dependency. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/kvm/cpuid.c | 2 ++ arch/x86/kvm/x86.c | 2 ++ fs/proc/base.c | 1 + kernel/latencytop.c | 1 + kernel/profile.c | 2 ++ 5 files changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/cpuid.c b/arch/x86/kvm/cpuid.c index 1d155cc56629..efde6cc50875 100644 --- a/arch/x86/kvm/cpuid.c +++ b/arch/x86/kvm/cpuid.c @@ -16,6 +16,8 @@ #include #include #include +#include + #include #include #include diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index b2a4b11274b0..1faf620a6fdc 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -54,6 +54,8 @@ #include #include #include +#include + #include #include diff --git a/fs/proc/base.c b/fs/proc/base.c index 5914fed3712d..2dae60075f6e 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -89,6 +89,7 @@ #include #include #include +#include #include #include #ifdef CONFIG_HARDWALL diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 545839b838d6..96b4179cee6a 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -56,6 +56,7 @@ #include #include #include +#include #include #include diff --git a/kernel/profile.c b/kernel/profile.c index f67ce0aa6bc4..9aa2a4445b0d 100644 --- a/kernel/profile.c +++ b/kernel/profile.c @@ -25,6 +25,8 @@ #include #include #include +#include + #include #include #include -- cgit v1.2.3 From f94c8d116997597fc00f0812b0ab9256e7b0c58f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Mar 2017 15:53:38 +0100 Subject: sched/clock, x86/tsc: Rework the x86 'unstable' sched_clock() interface Wanpeng Li reported that since the following commit: acb04058de49 ("sched/clock: Fix hotplug crash") ... KVM always runs with unstable sched-clock even though KVM's kvm_clock _is_ stable. The problem is that we've tied clear_sched_clock_stable() to the TSC state, and overlooked that sched_clock() is a paravirt function. Solve this by doing two things: - tie the sched_clock() stable state more clearly to the TSC stable state for the normal (!paravirt) case. - only call clear_sched_clock_stable() when we mark TSC unstable when we use native_sched_clock(). The first means we can actually run with stable sched_clock in more situations then before, which is good. And since commit: 12907fbb1a69 ("sched/clock, clocksource: Add optional cs::mark_unstable() method") ... this should be reliable. Since any detection of TSC fail now results in marking the TSC unstable. Reported-by: Wanpeng Li Signed-off-by: Peter Zijlstra (Intel) Cc: Borislav Petkov Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: acb04058de49 ("sched/clock: Fix hotplug crash") Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/amd.c | 4 ---- arch/x86/kernel/cpu/centaur.c | 2 -- arch/x86/kernel/cpu/common.c | 3 --- arch/x86/kernel/cpu/cyrix.c | 1 - arch/x86/kernel/cpu/intel.c | 4 ---- arch/x86/kernel/cpu/transmeta.c | 2 -- arch/x86/kernel/tsc.c | 35 +++++++++++++++++++++++------------ 7 files changed, 23 insertions(+), 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c index 4e95b2e0d95f..30d924ae5c34 100644 --- a/arch/x86/kernel/cpu/amd.c +++ b/arch/x86/kernel/cpu/amd.c @@ -555,10 +555,6 @@ static void early_init_amd(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Bit 12 of 8000_0007 edx is accumulated power mechanism. */ diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c index 2c234a6d94c4..bad8ff078a21 100644 --- a/arch/x86/kernel/cpu/centaur.c +++ b/arch/x86/kernel/cpu/centaur.c @@ -104,8 +104,6 @@ static void early_init_centaur(struct cpuinfo_x86 *c) #ifdef CONFIG_X86_64 set_cpu_cap(c, X86_FEATURE_SYSENTER32); #endif - - clear_sched_clock_stable(); } static void init_centaur(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index c64ca5929cb5..9d98e2e15d54 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -86,7 +86,6 @@ static void default_init(struct cpuinfo_x86 *c) strcpy(c->x86_model_id, "386"); } #endif - clear_sched_clock_stable(); } static const struct cpu_dev default_cpu = { @@ -1075,8 +1074,6 @@ static void identify_cpu(struct cpuinfo_x86 *c) */ if (this_cpu->c_init) this_cpu->c_init(c); - else - clear_sched_clock_stable(); /* Disable the PN if appropriate */ squash_the_stupid_serial_number(c); diff --git a/arch/x86/kernel/cpu/cyrix.c b/arch/x86/kernel/cpu/cyrix.c index 47416f959a48..31e679238e8d 100644 --- a/arch/x86/kernel/cpu/cyrix.c +++ b/arch/x86/kernel/cpu/cyrix.c @@ -184,7 +184,6 @@ static void early_init_cyrix(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_CYRIX_ARR); break; } - clear_sched_clock_stable(); } static void init_cyrix(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 017ecd3bb553..2388bafe5c37 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -161,10 +161,6 @@ static void early_init_intel(struct cpuinfo_x86 *c) if (c->x86_power & (1 << 8)) { set_cpu_cap(c, X86_FEATURE_CONSTANT_TSC); set_cpu_cap(c, X86_FEATURE_NONSTOP_TSC); - if (check_tsc_unstable()) - clear_sched_clock_stable(); - } else { - clear_sched_clock_stable(); } /* Penwell and Cloverview have the TSC which doesn't sleep on S3 */ diff --git a/arch/x86/kernel/cpu/transmeta.c b/arch/x86/kernel/cpu/transmeta.c index c1ea5b999839..6a9ad73a2c54 100644 --- a/arch/x86/kernel/cpu/transmeta.c +++ b/arch/x86/kernel/cpu/transmeta.c @@ -15,8 +15,6 @@ static void early_init_transmeta(struct cpuinfo_x86 *c) if (xlvl >= 0x80860001) c->x86_capability[CPUID_8086_0001_EDX] = cpuid_edx(0x80860001); } - - clear_sched_clock_stable(); } static void init_transmeta(struct cpuinfo_x86 *c) diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 2724dc82f992..911129fda2f9 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -326,9 +326,16 @@ unsigned long long sched_clock(void) { return paravirt_sched_clock(); } + +static inline bool using_native_sched_clock(void) +{ + return pv_time_ops.sched_clock == native_sched_clock; +} #else unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); + +static inline bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) @@ -1111,8 +1118,10 @@ static void tsc_cs_mark_unstable(struct clocksource *cs) { if (tsc_unstable) return; + tsc_unstable = 1; - clear_sched_clock_stable(); + if (using_native_sched_clock()) + clear_sched_clock_stable(); disable_sched_clock_irqtime(); pr_info("Marking TSC unstable due to clocksource watchdog\n"); } @@ -1134,18 +1143,20 @@ static struct clocksource clocksource_tsc = { void mark_tsc_unstable(char *reason) { - if (!tsc_unstable) { - tsc_unstable = 1; + if (tsc_unstable) + return; + + tsc_unstable = 1; + if (using_native_sched_clock()) clear_sched_clock_stable(); - disable_sched_clock_irqtime(); - pr_info("Marking TSC unstable due to %s\n", reason); - /* Change only the rating, when not registered */ - if (clocksource_tsc.mult) - clocksource_mark_unstable(&clocksource_tsc); - else { - clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; - clocksource_tsc.rating = 0; - } + disable_sched_clock_irqtime(); + pr_info("Marking TSC unstable due to %s\n", reason); + /* Change only the rating, when not registered */ + if (clocksource_tsc.mult) { + clocksource_mark_unstable(&clocksource_tsc); + } else { + clocksource_tsc.flags |= CLOCK_SOURCE_UNSTABLE; + clocksource_tsc.rating = 0; } } -- cgit v1.2.3 From bb1a2c26165640ba2cbcfe06c81e9f9d6db4e643 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 1 Mar 2017 21:10:17 +0100 Subject: x86/hpet: Prevent might sleep splat on resume Sergey reported a might sleep warning triggered from the hpet resume path. It's caused by the call to disable_irq() from interrupt disabled context. The problem with the low level resume code is that it is not accounted as a special system_state like we do during the boot process. Calling the same code during system boot would not trigger the warning. That's inconsistent at best. In this particular case it's trivial to replace the disable_irq() with disable_hardirq() because this particular code path is solely used from system resume and the involved hpet interrupts can never be force threaded. Reported-and-tested-by: Sergey Senozhatsky Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: "Rafael J. Wysocki" Cc: Sergey Senozhatsky Cc: Borislav Petkov Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703012108460.3684@nanos Signed-off-by: Thomas Gleixner --- arch/x86/kernel/hpet.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hpet.c b/arch/x86/kernel/hpet.c index dc6ba5bda9fc..89ff7af2de50 100644 --- a/arch/x86/kernel/hpet.c +++ b/arch/x86/kernel/hpet.c @@ -354,7 +354,7 @@ static int hpet_resume(struct clock_event_device *evt, int timer) irq_domain_deactivate_irq(irq_get_irq_data(hdev->irq)); irq_domain_activate_irq(irq_get_irq_data(hdev->irq)); - disable_irq(hdev->irq); + disable_hardirq(hdev->irq); irq_set_affinity(hdev->irq, cpumask_of(hdev->cpu)); enable_irq(hdev->irq); } -- cgit v1.2.3 From 68e21be2916b359fd8afb536c1911dc014cfd03e Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 1 Feb 2017 19:08:20 +0100 Subject: sched/headers: Move task->mm handling methods to Move the following task->mm helper APIs into a new header file, , to further reduce the size and complexity of . Here are how the APIs are used in various kernel files: # mm_alloc(): arch/arm/mach-rpc/ecard.c fs/exec.c include/linux/sched/mm.h kernel/fork.c # __mmdrop(): arch/arc/include/asm/mmu_context.h include/linux/sched/mm.h kernel/fork.c # mmdrop(): arch/arm/mach-rpc/ecard.c arch/m68k/sun3/mmu_emu.c arch/x86/mm/tlb.c drivers/gpu/drm/amd/amdkfd/kfd_process.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/infiniband/hw/hfi1/file_ops.c drivers/vfio/vfio_iommu_spapr_tce.c fs/exec.c fs/proc/base.c fs/proc/task_mmu.c fs/proc/task_nommu.c fs/userfaultfd.c include/linux/mmu_notifier.h include/linux/sched/mm.h kernel/fork.c kernel/futex.c kernel/sched/core.c mm/khugepaged.c mm/ksm.c mm/mmu_context.c mm/mmu_notifier.c mm/oom_kill.c virt/kvm/kvm_main.c # mmdrop_async_fn(): include/linux/sched/mm.h # mmdrop_async(): include/linux/sched/mm.h kernel/fork.c # mmget_not_zero(): fs/userfaultfd.c include/linux/sched/mm.h mm/oom_kill.c # mmput(): arch/arc/include/asm/mmu_context.h arch/arc/kernel/troubleshoot.c arch/frv/mm/mmu-context.c arch/powerpc/platforms/cell/spufs/context.c arch/sparc/include/asm/mmu_context_32.h drivers/android/binder.c drivers/gpu/drm/etnaviv/etnaviv_gem.c drivers/gpu/drm/i915/i915_gem_userptr.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/core/uverbs_main.c drivers/infiniband/hw/mlx4/main.c drivers/infiniband/hw/mlx5/main.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/iommu/amd_iommu_v2.c drivers/iommu/intel-svm.c drivers/lguest/lguest_user.c drivers/misc/cxl/fault.c drivers/misc/mic/scif/scif_rma.c drivers/oprofile/buffer_sync.c drivers/vfio/vfio_iommu_type1.c drivers/vhost/vhost.c drivers/xen/gntdev.c fs/exec.c fs/proc/array.c fs/proc/base.c fs/proc/task_mmu.c fs/proc/task_nommu.c fs/userfaultfd.c include/linux/sched/mm.h kernel/cpuset.c kernel/events/core.c kernel/events/uprobes.c kernel/exit.c kernel/fork.c kernel/ptrace.c kernel/sys.c kernel/trace/trace_output.c kernel/tsacct.c mm/memcontrol.c mm/memory.c mm/mempolicy.c mm/migrate.c mm/mmu_notifier.c mm/nommu.c mm/oom_kill.c mm/process_vm_access.c mm/rmap.c mm/swapfile.c mm/util.c virt/kvm/async_pf.c # mmput_async(): include/linux/sched/mm.h kernel/fork.c mm/oom_kill.c # get_task_mm(): arch/arc/kernel/troubleshoot.c arch/powerpc/platforms/cell/spufs/context.c drivers/android/binder.c drivers/gpu/drm/etnaviv/etnaviv_gem.c drivers/infiniband/core/umem.c drivers/infiniband/core/umem_odp.c drivers/infiniband/hw/mlx4/main.c drivers/infiniband/hw/mlx5/main.c drivers/infiniband/hw/usnic/usnic_uiom.c drivers/iommu/amd_iommu_v2.c drivers/iommu/intel-svm.c drivers/lguest/lguest_user.c drivers/misc/cxl/fault.c drivers/misc/mic/scif/scif_rma.c drivers/oprofile/buffer_sync.c drivers/vfio/vfio_iommu_type1.c drivers/vhost/vhost.c drivers/xen/gntdev.c fs/proc/array.c fs/proc/base.c fs/proc/task_mmu.c include/linux/sched/mm.h kernel/cpuset.c kernel/events/core.c kernel/exit.c kernel/fork.c kernel/ptrace.c kernel/sys.c kernel/trace/trace_output.c kernel/tsacct.c mm/memcontrol.c mm/memory.c mm/mempolicy.c mm/migrate.c mm/mmu_notifier.c mm/nommu.c mm/util.c # mm_access(): fs/proc/base.c include/linux/sched/mm.h kernel/fork.c mm/process_vm_access.c # mm_release(): arch/arc/include/asm/mmu_context.h fs/exec.c include/linux/sched/mm.h include/uapi/linux/sched.h kernel/exit.c kernel/fork.c Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/alpha/kernel/smp.c | 2 +- arch/arc/kernel/smp.c | 2 +- arch/arm/kernel/smp.c | 2 +- arch/arm64/kernel/smp.c | 2 +- arch/blackfin/mach-common/smp.c | 2 +- arch/hexagon/kernel/smp.c | 2 +- arch/ia64/kernel/setup.c | 2 +- arch/m32r/kernel/setup.c | 2 +- arch/metag/kernel/smp.c | 2 +- arch/mips/kernel/traps.c | 2 +- arch/parisc/kernel/smp.c | 2 +- arch/powerpc/kernel/smp.c | 2 +- arch/s390/kernel/processor.c | 2 + arch/score/kernel/traps.c | 1 + arch/sh/kernel/smp.c | 2 +- arch/sparc/kernel/leon_smp.c | 2 +- arch/sparc/kernel/smp_64.c | 2 +- arch/sparc/kernel/sun4d_smp.c | 2 +- arch/sparc/kernel/sun4m_smp.c | 2 +- arch/sparc/kernel/traps_32.c | 2 +- arch/sparc/kernel/traps_64.c | 2 +- arch/tile/kernel/smpboot.c | 2 +- arch/x86/kernel/cpu/common.c | 2 +- arch/xtensa/kernel/smp.c | 1 + include/linux/sched.h | 95 ----------------------------------------- include/linux/sched/mm.h | 95 +++++++++++++++++++++++++++++++++++++++++ 26 files changed, 120 insertions(+), 116 deletions(-) (limited to 'arch/x86') diff --git a/arch/alpha/kernel/smp.c b/arch/alpha/kernel/smp.c index acb4b146a607..9fc560459ebd 100644 --- a/arch/alpha/kernel/smp.c +++ b/arch/alpha/kernel/smp.c @@ -14,7 +14,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arc/kernel/smp.c b/arch/arc/kernel/smp.c index b8e8d3944481..f46267153ec2 100644 --- a/arch/arc/kernel/smp.c +++ b/arch/arc/kernel/smp.c @@ -13,7 +13,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/arm/kernel/smp.c b/arch/arm/kernel/smp.c index b724cff7ad60..572a8df1b766 100644 --- a/arch/arm/kernel/smp.c +++ b/arch/arm/kernel/smp.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/arm64/kernel/smp.c b/arch/arm64/kernel/smp.c index 83c0a839a6ad..ef1caae02110 100644 --- a/arch/arm64/kernel/smp.c +++ b/arch/arm64/kernel/smp.c @@ -21,7 +21,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/blackfin/mach-common/smp.c b/arch/blackfin/mach-common/smp.c index 3ac98252d299..b32ddab7966c 100644 --- a/arch/blackfin/mach-common/smp.c +++ b/arch/blackfin/mach-common/smp.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/hexagon/kernel/smp.c b/arch/hexagon/kernel/smp.c index 1f63e91a353b..5dbc15549e01 100644 --- a/arch/hexagon/kernel/smp.c +++ b/arch/hexagon/kernel/smp.c @@ -25,7 +25,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/ia64/kernel/setup.c b/arch/ia64/kernel/setup.c index 63bef6fc0f3e..23e3fd61e335 100644 --- a/arch/ia64/kernel/setup.c +++ b/arch/ia64/kernel/setup.c @@ -32,7 +32,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/m32r/kernel/setup.c b/arch/m32r/kernel/setup.c index b18bc0bd6544..1a9e977287e6 100644 --- a/arch/m32r/kernel/setup.c +++ b/arch/m32r/kernel/setup.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/metag/kernel/smp.c b/arch/metag/kernel/smp.c index cab2aa64ca82..232a12bf3f99 100644 --- a/arch/metag/kernel/smp.c +++ b/arch/metag/kernel/smp.c @@ -12,7 +12,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/mips/kernel/traps.c b/arch/mips/kernel/traps.c index 1a9366a157cb..c7d17cfb32f6 100644 --- a/arch/mips/kernel/traps.c +++ b/arch/mips/kernel/traps.c @@ -23,7 +23,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/parisc/kernel/smp.c b/arch/parisc/kernel/smp.c index 67b452b41ff6..63365106ea19 100644 --- a/arch/parisc/kernel/smp.c +++ b/arch/parisc/kernel/smp.c @@ -21,7 +21,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/powerpc/kernel/smp.c b/arch/powerpc/kernel/smp.c index fce17789c675..46f89e66a273 100644 --- a/arch/powerpc/kernel/smp.c +++ b/arch/powerpc/kernel/smp.c @@ -19,7 +19,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/s390/kernel/processor.c b/arch/s390/kernel/processor.c index bf7854523831..928b929a6261 100644 --- a/arch/s390/kernel/processor.c +++ b/arch/s390/kernel/processor.c @@ -8,11 +8,13 @@ #include #include +#include #include #include #include #include #include + #include #include #include diff --git a/arch/score/kernel/traps.c b/arch/score/kernel/traps.c index fe68de6c746c..e359ec675869 100644 --- a/arch/score/kernel/traps.c +++ b/arch/score/kernel/traps.c @@ -24,6 +24,7 @@ */ #include +#include #include #include #include diff --git a/arch/sh/kernel/smp.c b/arch/sh/kernel/smp.c index 4abf119c129c..c483422ea4d0 100644 --- a/arch/sh/kernel/smp.c +++ b/arch/sh/kernel/smp.c @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/leon_smp.c b/arch/sparc/kernel/leon_smp.c index b99d33797e1d..db7acf27bea2 100644 --- a/arch/sparc/kernel/leon_smp.c +++ b/arch/sparc/kernel/leon_smp.c @@ -9,7 +9,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/smp_64.c b/arch/sparc/kernel/smp_64.c index 15052d364e04..b3bc0ac757cc 100644 --- a/arch/sparc/kernel/smp_64.c +++ b/arch/sparc/kernel/smp_64.c @@ -5,7 +5,7 @@ #include #include -#include +#include #include #include #include diff --git a/arch/sparc/kernel/sun4d_smp.c b/arch/sparc/kernel/sun4d_smp.c index 7b55c50eabe5..af93b50e3ce4 100644 --- a/arch/sparc/kernel/sun4d_smp.c +++ b/arch/sparc/kernel/sun4d_smp.c @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/sun4m_smp.c b/arch/sparc/kernel/sun4m_smp.c index 633c4cf6fdb0..5547fcb1d72d 100644 --- a/arch/sparc/kernel/sun4m_smp.c +++ b/arch/sparc/kernel/sun4m_smp.c @@ -8,7 +8,7 @@ #include #include #include -#include +#include #include #include diff --git a/arch/sparc/kernel/traps_32.c b/arch/sparc/kernel/traps_32.c index 2de72a49308a..466d4aed06c7 100644 --- a/arch/sparc/kernel/traps_32.c +++ b/arch/sparc/kernel/traps_32.c @@ -9,7 +9,7 @@ * I hate traps on the sparc, grrr... */ -#include /* for jiffies */ +#include #include #include #include diff --git a/arch/sparc/kernel/traps_64.c b/arch/sparc/kernel/traps_64.c index 4ff4c35f76b2..196ee5eb4d48 100644 --- a/arch/sparc/kernel/traps_64.c +++ b/arch/sparc/kernel/traps_64.c @@ -9,7 +9,7 @@ */ #include -#include +#include #include #include #include diff --git a/arch/tile/kernel/smpboot.c b/arch/tile/kernel/smpboot.c index f3fdd0c39b12..869c22e57561 100644 --- a/arch/tile/kernel/smpboot.c +++ b/arch/tile/kernel/smpboot.c @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index f2fd8fefc589..b11b38c3b0bd 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -7,7 +7,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/arch/xtensa/kernel/smp.c b/arch/xtensa/kernel/smp.c index fd894eaa63f3..932d64689bac 100644 --- a/arch/xtensa/kernel/smp.c +++ b/arch/xtensa/kernel/smp.c @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/sched.h b/include/linux/sched.h index 042620729230..d29bbe0ee41f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2379,101 +2379,6 @@ static inline unsigned long sigsp(unsigned long sp, struct ksignal *ksig) return sp; } -/* - * Routines for handling mm_structs - */ -extern struct mm_struct * mm_alloc(void); - -/** - * mmgrab() - Pin a &struct mm_struct. - * @mm: The &struct mm_struct to pin. - * - * Make sure that @mm will not get freed even after the owning task - * exits. This doesn't guarantee that the associated address space - * will still exist later on and mmget_not_zero() has to be used before - * accessing it. - * - * This is a preferred way to to pin @mm for a longer/unbounded amount - * of time. - * - * Use mmdrop() to release the reference acquired by mmgrab(). - * - * See also for an in-depth explanation - * of &mm_struct.mm_count vs &mm_struct.mm_users. - */ -static inline void mmgrab(struct mm_struct *mm) -{ - atomic_inc(&mm->mm_count); -} - -/* mmdrop drops the mm and the page tables */ -extern void __mmdrop(struct mm_struct *); -static inline void mmdrop(struct mm_struct *mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) - __mmdrop(mm); -} - -static inline void mmdrop_async_fn(struct work_struct *work) -{ - struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); - __mmdrop(mm); -} - -static inline void mmdrop_async(struct mm_struct *mm) -{ - if (unlikely(atomic_dec_and_test(&mm->mm_count))) { - INIT_WORK(&mm->async_put_work, mmdrop_async_fn); - schedule_work(&mm->async_put_work); - } -} - -/** - * mmget() - Pin the address space associated with a &struct mm_struct. - * @mm: The address space to pin. - * - * Make sure that the address space of the given &struct mm_struct doesn't - * go away. This does not protect against parts of the address space being - * modified or freed, however. - * - * Never use this function to pin this address space for an - * unbounded/indefinite amount of time. - * - * Use mmput() to release the reference acquired by mmget(). - * - * See also for an in-depth explanation - * of &mm_struct.mm_count vs &mm_struct.mm_users. - */ -static inline void mmget(struct mm_struct *mm) -{ - atomic_inc(&mm->mm_users); -} - -static inline bool mmget_not_zero(struct mm_struct *mm) -{ - return atomic_inc_not_zero(&mm->mm_users); -} - -/* mmput gets rid of the mappings and all user-space */ -extern void mmput(struct mm_struct *); -#ifdef CONFIG_MMU -/* same as above but performs the slow path from the async context. Can - * be called from the atomic context as well - */ -extern void mmput_async(struct mm_struct *); -#endif - -/* Grab a reference to a task's mm, if it is not already going away */ -extern struct mm_struct *get_task_mm(struct task_struct *task); -/* - * Grab a reference to a task's mm, if it is not already going away - * and ptrace_may_access with the mode parameter passed to it - * succeeds. - */ -extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); -/* Remove the current tasks stale references to the old mm_struct */ -extern void mm_release(struct task_struct *, struct mm_struct *); - #ifdef CONFIG_HAVE_COPY_THREAD_TLS extern int copy_thread_tls(unsigned long, unsigned long, unsigned long, struct task_struct *, unsigned long); diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index d32e3932b2e3..be1ae55f5ab9 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -5,4 +5,99 @@ #include #include +/* + * Routines for handling mm_structs + */ +extern struct mm_struct * mm_alloc(void); + +/** + * mmgrab() - Pin a &struct mm_struct. + * @mm: The &struct mm_struct to pin. + * + * Make sure that @mm will not get freed even after the owning task + * exits. This doesn't guarantee that the associated address space + * will still exist later on and mmget_not_zero() has to be used before + * accessing it. + * + * This is a preferred way to to pin @mm for a longer/unbounded amount + * of time. + * + * Use mmdrop() to release the reference acquired by mmgrab(). + * + * See also for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmgrab(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_count); +} + +/* mmdrop drops the mm and the page tables */ +extern void __mmdrop(struct mm_struct *); +static inline void mmdrop(struct mm_struct *mm) +{ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) + __mmdrop(mm); +} + +static inline void mmdrop_async_fn(struct work_struct *work) +{ + struct mm_struct *mm = container_of(work, struct mm_struct, async_put_work); + __mmdrop(mm); +} + +static inline void mmdrop_async(struct mm_struct *mm) +{ + if (unlikely(atomic_dec_and_test(&mm->mm_count))) { + INIT_WORK(&mm->async_put_work, mmdrop_async_fn); + schedule_work(&mm->async_put_work); + } +} + +/** + * mmget() - Pin the address space associated with a &struct mm_struct. + * @mm: The address space to pin. + * + * Make sure that the address space of the given &struct mm_struct doesn't + * go away. This does not protect against parts of the address space being + * modified or freed, however. + * + * Never use this function to pin this address space for an + * unbounded/indefinite amount of time. + * + * Use mmput() to release the reference acquired by mmget(). + * + * See also for an in-depth explanation + * of &mm_struct.mm_count vs &mm_struct.mm_users. + */ +static inline void mmget(struct mm_struct *mm) +{ + atomic_inc(&mm->mm_users); +} + +static inline bool mmget_not_zero(struct mm_struct *mm) +{ + return atomic_inc_not_zero(&mm->mm_users); +} + +/* mmput gets rid of the mappings and all user-space */ +extern void mmput(struct mm_struct *); +#ifdef CONFIG_MMU +/* same as above but performs the slow path from the async context. Can + * be called from the atomic context as well + */ +extern void mmput_async(struct mm_struct *); +#endif + +/* Grab a reference to a task's mm, if it is not already going away */ +extern struct mm_struct *get_task_mm(struct task_struct *task); +/* + * Grab a reference to a task's mm, if it is not already going away + * and ptrace_may_access with the mode parameter passed to it + * succeeds. + */ +extern struct mm_struct *mm_access(struct task_struct *task, unsigned int mode); +/* Remove the current tasks stale references to the old mm_struct */ +extern void mm_release(struct task_struct *, struct mm_struct *); + #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 1a79a72c65b8f1b4f44e166d1fb5cad560461b26 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 6 Feb 2017 09:58:35 +0100 Subject: sched/headers, x86/apic: Remove the header inclusion from We want to simplify 's header dependencies, but one roadblock to that is 's inclusion of pm.h, which brings in other, problematic headers. Remove it, as it appears to be entirely spurious, apic.h does not actually make use of any PM facilities. Acked-by: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index eff8e36aaf72..730ef65e8393 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -2,7 +2,6 @@ #define _ASM_X86_APIC_H #include -#include #include #include -- cgit v1.2.3 From a528d35e8bfcc521d7cb70aaf03e1bd296c8493f Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 31 Jan 2017 16:46:22 +0000 Subject: statx: Add a system call to make enhanced file info available Add a system call to make extended file information available, including file creation and some attribute flags where available through the underlying filesystem. The getattr inode operation is altered to take two additional arguments: a u32 request_mask and an unsigned int flags that indicate the synchronisation mode. This change is propagated to the vfs_getattr*() function. Functions like vfs_stat() are now inline wrappers around new functions vfs_statx() and vfs_statx_fd() to reduce stack usage. ======== OVERVIEW ======== The idea was initially proposed as a set of xattrs that could be retrieved with getxattr(), but the general preference proved to be for a new syscall with an extended stat structure. A number of requests were gathered for features to be included. The following have been included: (1) Make the fields a consistent size on all arches and make them large. (2) Spare space, request flags and information flags are provided for future expansion. (3) Better support for the y2038 problem [Arnd Bergmann] (tv_sec is an __s64). (4) Creation time: The SMB protocol carries the creation time, which could be exported by Samba, which will in turn help CIFS make use of FS-Cache as that can be used for coherency data (stx_btime). This is also specified in NFSv4 as a recommended attribute and could be exported by NFSD [Steve French]. (5) Lightweight stat: Ask for just those details of interest, and allow a netfs (such as NFS) to approximate anything not of interest, possibly without going to the server [Trond Myklebust, Ulrich Drepper, Andreas Dilger] (AT_STATX_DONT_SYNC). (6) Heavyweight stat: Force a netfs to go to the server, even if it thinks its cached attributes are up to date [Trond Myklebust] (AT_STATX_FORCE_SYNC). And the following have been left out for future extension: (7) Data version number: Could be used by userspace NFS servers [Aneesh Kumar]. Can also be used to modify fill_post_wcc() in NFSD which retrieves i_version directly, but has just called vfs_getattr(). It could get it from the kstat struct if it used vfs_xgetattr() instead. (There's disagreement on the exact semantics of a single field, since not all filesystems do this the same way). (8) BSD stat compatibility: Including more fields from the BSD stat such as creation time (st_btime) and inode generation number (st_gen) [Jeremy Allison, Bernd Schubert]. (9) Inode generation number: Useful for FUSE and userspace NFS servers [Bernd Schubert]. (This was asked for but later deemed unnecessary with the open-by-handle capability available and caused disagreement as to whether it's a security hole or not). (10) Extra coherency data may be useful in making backups [Andreas Dilger]. (No particular data were offered, but things like last backup timestamp, the data version number and the DOS archive bit would come into this category). (11) Allow the filesystem to indicate what it can/cannot provide: A filesystem can now say it doesn't support a standard stat feature if that isn't available, so if, for instance, inode numbers or UIDs don't exist or are fabricated locally... (This requires a separate system call - I have an fsinfo() call idea for this). (12) Store a 16-byte volume ID in the superblock that can be returned in struct xstat [Steve French]. (Deferred to fsinfo). (13) Include granularity fields in the time data to indicate the granularity of each of the times (NFSv4 time_delta) [Steve French]. (Deferred to fsinfo). (14) FS_IOC_GETFLAGS value. These could be translated to BSD's st_flags. Note that the Linux IOC flags are a mess and filesystems such as Ext4 define flags that aren't in linux/fs.h, so translation in the kernel may be a necessity (or, possibly, we provide the filesystem type too). (Some attributes are made available in stx_attributes, but the general feeling was that the IOC flags were to ext[234]-specific and shouldn't be exposed through statx this way). (15) Mask of features available on file (eg: ACLs, seclabel) [Brad Boyer, Michael Kerrisk]. (Deferred, probably to fsinfo. Finding out if there's an ACL or seclabal might require extra filesystem operations). (16) Femtosecond-resolution timestamps [Dave Chinner]. (A __reserved field has been left in the statx_timestamp struct for this - if there proves to be a need). (17) A set multiple attributes syscall to go with this. =============== NEW SYSTEM CALL =============== The new system call is: int ret = statx(int dfd, const char *filename, unsigned int flags, unsigned int mask, struct statx *buffer); The dfd, filename and flags parameters indicate the file to query, in a similar way to fstatat(). There is no equivalent of lstat() as that can be emulated with statx() by passing AT_SYMLINK_NOFOLLOW in flags. There is also no equivalent of fstat() as that can be emulated by passing a NULL filename to statx() with the fd of interest in dfd. Whether or not statx() synchronises the attributes with the backing store can be controlled by OR'ing a value into the flags argument (this typically only affects network filesystems): (1) AT_STATX_SYNC_AS_STAT tells statx() to behave as stat() does in this respect. (2) AT_STATX_FORCE_SYNC will require a network filesystem to synchronise its attributes with the server - which might require data writeback to occur to get the timestamps correct. (3) AT_STATX_DONT_SYNC will suppress synchronisation with the server in a network filesystem. The resulting values should be considered approximate. mask is a bitmask indicating the fields in struct statx that are of interest to the caller. The user should set this to STATX_BASIC_STATS to get the basic set returned by stat(). It should be noted that asking for more information may entail extra I/O operations. buffer points to the destination for the data. This must be 256 bytes in size. ====================== MAIN ATTRIBUTES RECORD ====================== The following structures are defined in which to return the main attribute set: struct statx_timestamp { __s64 tv_sec; __s32 tv_nsec; __s32 __reserved; }; struct statx { __u32 stx_mask; __u32 stx_blksize; __u64 stx_attributes; __u32 stx_nlink; __u32 stx_uid; __u32 stx_gid; __u16 stx_mode; __u16 __spare0[1]; __u64 stx_ino; __u64 stx_size; __u64 stx_blocks; __u64 __spare1[1]; struct statx_timestamp stx_atime; struct statx_timestamp stx_btime; struct statx_timestamp stx_ctime; struct statx_timestamp stx_mtime; __u32 stx_rdev_major; __u32 stx_rdev_minor; __u32 stx_dev_major; __u32 stx_dev_minor; __u64 __spare2[14]; }; The defined bits in request_mask and stx_mask are: STATX_TYPE Want/got stx_mode & S_IFMT STATX_MODE Want/got stx_mode & ~S_IFMT STATX_NLINK Want/got stx_nlink STATX_UID Want/got stx_uid STATX_GID Want/got stx_gid STATX_ATIME Want/got stx_atime{,_ns} STATX_MTIME Want/got stx_mtime{,_ns} STATX_CTIME Want/got stx_ctime{,_ns} STATX_INO Want/got stx_ino STATX_SIZE Want/got stx_size STATX_BLOCKS Want/got stx_blocks STATX_BASIC_STATS [The stuff in the normal stat struct] STATX_BTIME Want/got stx_btime{,_ns} STATX_ALL [All currently available stuff] stx_btime is the file creation time, stx_mask is a bitmask indicating the data provided and __spares*[] are where as-yet undefined fields can be placed. Time fields are structures with separate seconds and nanoseconds fields plus a reserved field in case we want to add even finer resolution. Note that times will be negative if before 1970; in such a case, the nanosecond fields will also be negative if not zero. The bits defined in the stx_attributes field convey information about a file, how it is accessed, where it is and what it does. The following attributes map to FS_*_FL flags and are the same numerical value: STATX_ATTR_COMPRESSED File is compressed by the fs STATX_ATTR_IMMUTABLE File is marked immutable STATX_ATTR_APPEND File is append-only STATX_ATTR_NODUMP File is not to be dumped STATX_ATTR_ENCRYPTED File requires key to decrypt in fs Within the kernel, the supported flags are listed by: KSTAT_ATTR_FS_IOC_FLAGS [Are any other IOC flags of sufficient general interest to be exposed through this interface?] New flags include: STATX_ATTR_AUTOMOUNT Object is an automount trigger These are for the use of GUI tools that might want to mark files specially, depending on what they are. Fields in struct statx come in a number of classes: (0) stx_dev_*, stx_blksize. These are local system information and are always available. (1) stx_mode, stx_nlinks, stx_uid, stx_gid, stx_[amc]time, stx_ino, stx_size, stx_blocks. These will be returned whether the caller asks for them or not. The corresponding bits in stx_mask will be set to indicate whether they actually have valid values. If the caller didn't ask for them, then they may be approximated. For example, NFS won't waste any time updating them from the server, unless as a byproduct of updating something requested. If the values don't actually exist for the underlying object (such as UID or GID on a DOS file), then the bit won't be set in the stx_mask, even if the caller asked for the value. In such a case, the returned value will be a fabrication. Note that there are instances where the type might not be valid, for instance Windows reparse points. (2) stx_rdev_*. This will be set only if stx_mode indicates we're looking at a blockdev or a chardev, otherwise will be 0. (3) stx_btime. Similar to (1), except this will be set to 0 if it doesn't exist. ======= TESTING ======= The following test program can be used to test the statx system call: samples/statx/test-statx.c Just compile and run, passing it paths to the files you want to examine. The file is built automatically if CONFIG_SAMPLES is enabled. Here's some example output. Firstly, an NFS directory that crosses to another FSID. Note that the AUTOMOUNT attribute is set because transiting this directory will cause d_automount to be invoked by the VFS. [root@andromeda ~]# /tmp/test-statx -A /warthog/data statx(/warthog/data) = 0 results=7ff Size: 4096 Blocks: 8 IO Block: 1048576 directory Device: 00:26 Inode: 1703937 Links: 125 Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041 Access: 2016-11-24 09:02:12.219699527+0000 Modify: 2016-11-17 10:44:36.225653653+0000 Change: 2016-11-17 10:44:36.225653653+0000 Attributes: 0000000000001000 (-------- -------- -------- -------- -------- -------- ---m---- --------) Secondly, the result of automounting on that directory. [root@andromeda ~]# /tmp/test-statx /warthog/data statx(/warthog/data) = 0 results=7ff Size: 4096 Blocks: 8 IO Block: 1048576 directory Device: 00:27 Inode: 2 Links: 125 Access: (3777/drwxrwxrwx) Uid: 0 Gid: 4041 Access: 2016-11-24 09:02:12.219699527+0000 Modify: 2016-11-17 10:44:36.225653653+0000 Change: 2016-11-17 10:44:36.225653653+0000 Signed-off-by: David Howells Signed-off-by: Al Viro --- Documentation/filesystems/Locking | 3 +- Documentation/filesystems/vfs.txt | 3 +- arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + drivers/base/devtmpfs.c | 3 +- drivers/block/loop.c | 3 +- drivers/mtd/ubi/build.c | 2 +- drivers/mtd/ubi/kapi.c | 2 +- drivers/staging/lustre/lustre/llite/file.c | 9 +- .../staging/lustre/lustre/llite/llite_internal.h | 3 +- fs/9p/vfs_inode.c | 10 +- fs/9p/vfs_inode_dotl.c | 5 +- fs/afs/inode.c | 8 +- fs/afs/internal.h | 2 +- fs/bad_inode.c | 4 +- fs/btrfs/inode.c | 6 +- fs/ceph/inode.c | 6 +- fs/ceph/super.h | 4 +- fs/cifs/cifsfs.h | 2 +- fs/cifs/inode.c | 5 +- fs/coda/coda_linux.h | 2 +- fs/coda/inode.c | 7 +- fs/ecryptfs/inode.c | 13 +- fs/exportfs/expfs.c | 3 +- fs/ext4/ext4.h | 3 +- fs/ext4/inode.c | 6 +- fs/f2fs/f2fs.h | 4 +- fs/f2fs/file.c | 6 +- fs/fat/fat.h | 4 +- fs/fat/file.c | 5 +- fs/fuse/dir.c | 6 +- fs/gfs2/inode.c | 11 +- fs/kernfs/inode.c | 8 +- fs/kernfs/kernfs-internal.h | 4 +- fs/libfs.c | 12 +- fs/minix/inode.c | 11 +- fs/minix/minix.h | 2 +- fs/nfs/inode.c | 13 +- fs/nfs/namespace.c | 9 +- fs/nfsd/nfs4xdr.c | 4 +- fs/nfsd/vfs.h | 3 +- fs/ocfs2/file.c | 11 +- fs/ocfs2/file.h | 4 +- fs/orangefs/inode.c | 13 +- fs/orangefs/orangefs-kernel.h | 5 +- fs/overlayfs/copy_up.c | 6 +- fs/overlayfs/dir.c | 10 +- fs/overlayfs/inode.c | 7 +- fs/proc/base.c | 12 +- fs/proc/generic.c | 6 +- fs/proc/internal.h | 2 +- fs/proc/proc_net.c | 6 +- fs/proc/proc_sysctl.c | 5 +- fs/proc/root.c | 6 +- fs/stat.c | 214 ++++++++++++++--- fs/sysv/itree.c | 7 +- fs/sysv/sysv.h | 2 +- fs/ubifs/dir.c | 6 +- fs/ubifs/ubifs.h | 4 +- fs/udf/symlink.c | 5 +- fs/xfs/xfs_iops.c | 9 +- include/linux/fs.h | 35 ++- include/linux/nfs_fs.h | 2 +- include/linux/stat.h | 24 +- include/linux/syscalls.h | 3 + include/uapi/linux/fcntl.h | 5 + include/uapi/linux/stat.h | 131 +++++++++++ mm/shmem.c | 6 +- samples/Kconfig | 6 + samples/Makefile | 2 +- samples/statx/Makefile | 10 + samples/statx/test-statx.c | 254 +++++++++++++++++++++ 72 files changed, 822 insertions(+), 214 deletions(-) create mode 100644 samples/statx/Makefile create mode 100644 samples/statx/test-statx.c (limited to 'arch/x86') diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking index ace63cd7af8c..fdcfdd79682a 100644 --- a/Documentation/filesystems/Locking +++ b/Documentation/filesystems/Locking @@ -58,7 +58,8 @@ prototypes: int (*permission) (struct inode *, int, unsigned int); int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (struct vfsmount *, struct dentry *, struct kstat *); + int (*getattr) (const struct path *, struct dentry *, struct kstat *, + u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); void (*update_time)(struct inode *, struct timespec *, int); diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt index b968084eeac1..569211703721 100644 --- a/Documentation/filesystems/vfs.txt +++ b/Documentation/filesystems/vfs.txt @@ -382,7 +382,8 @@ struct inode_operations { int (*permission) (struct inode *, int); int (*get_acl)(struct inode *, int); int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); + int (*getattr) (const struct path *, struct dentry *, struct kstat *, + u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); void (*update_time)(struct inode *, struct timespec *, int); int (*atomic_open)(struct inode *, struct dentry *, struct file *, diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 2b3618542544..9ba050fe47f3 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -389,3 +389,4 @@ 380 i386 pkey_mprotect sys_pkey_mprotect 381 i386 pkey_alloc sys_pkey_alloc 382 i386 pkey_free sys_pkey_free +383 i386 statx sys_statx diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index e93ef0b38db8..5aef183e2f85 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -338,6 +338,7 @@ 329 common pkey_mprotect sys_pkey_mprotect 330 common pkey_alloc sys_pkey_alloc 331 common pkey_free sys_pkey_free +332 common statx sys_statx # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/drivers/base/devtmpfs.c b/drivers/base/devtmpfs.c index 44a74cf1372c..d2fb9c8ed205 100644 --- a/drivers/base/devtmpfs.c +++ b/drivers/base/devtmpfs.c @@ -309,7 +309,8 @@ static int handle_remove(const char *nodename, struct device *dev) if (d_really_is_positive(dentry)) { struct kstat stat; struct path p = {.mnt = parent.mnt, .dentry = dentry}; - err = vfs_getattr(&p, &stat); + err = vfs_getattr(&p, &stat, STATX_TYPE | STATX_MODE, + AT_STATX_SYNC_AS_STAT); if (!err && dev_mynode(dev, d_inode(dentry), &stat)) { struct iattr newattrs; /* diff --git a/drivers/block/loop.c b/drivers/block/loop.c index eeb1db73f44e..8f4051999741 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1175,7 +1175,8 @@ loop_get_status(struct loop_device *lo, struct loop_info64 *info) if (lo->lo_state != Lo_bound) return -ENXIO; - error = vfs_getattr(&file->f_path, &stat); + error = vfs_getattr(&file->f_path, &stat, + STATX_INO, AT_STATX_SYNC_AS_STAT); if (error) return error; memset(info, 0, sizeof(*info)); diff --git a/drivers/mtd/ubi/build.c b/drivers/mtd/ubi/build.c index 85d54f37e28f..77513195f50e 100644 --- a/drivers/mtd/ubi/build.c +++ b/drivers/mtd/ubi/build.c @@ -1159,7 +1159,7 @@ static struct mtd_info * __init open_mtd_by_chdev(const char *mtd_dev) if (err) return ERR_PTR(err); - err = vfs_getattr(&path, &stat); + err = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT); path_put(&path); if (err) return ERR_PTR(err); diff --git a/drivers/mtd/ubi/kapi.c b/drivers/mtd/ubi/kapi.c index 88b1897aeb40..d4b2e8744498 100644 --- a/drivers/mtd/ubi/kapi.c +++ b/drivers/mtd/ubi/kapi.c @@ -314,7 +314,7 @@ struct ubi_volume_desc *ubi_open_volume_path(const char *pathname, int mode) if (error) return ERR_PTR(error); - error = vfs_getattr(&path, &stat); + error = vfs_getattr(&path, &stat, STATX_TYPE, AT_STATX_SYNC_AS_STAT); path_put(&path); if (error) return ERR_PTR(error); diff --git a/drivers/staging/lustre/lustre/llite/file.c b/drivers/staging/lustre/lustre/llite/file.c index 10adfcdd7035..481c0d01d4c6 100644 --- a/drivers/staging/lustre/lustre/llite/file.c +++ b/drivers/staging/lustre/lustre/llite/file.c @@ -2952,15 +2952,16 @@ static int ll_inode_revalidate(struct dentry *dentry, __u64 ibits) return rc; } -int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat) +int ll_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - struct inode *inode = d_inode(de); + struct inode *inode = d_inode(path->dentry); struct ll_sb_info *sbi = ll_i2sbi(inode); struct ll_inode_info *lli = ll_i2info(inode); int res; - res = ll_inode_revalidate(de, MDS_INODELOCK_UPDATE | - MDS_INODELOCK_LOOKUP); + res = ll_inode_revalidate(path->dentry, + MDS_INODELOCK_UPDATE | MDS_INODELOCK_LOOKUP); ll_stats_ops_tally(sbi, LPROC_LL_GETATTR, 1); if (res) diff --git a/drivers/staging/lustre/lustre/llite/llite_internal.h b/drivers/staging/lustre/lustre/llite/llite_internal.h index ecdfd0c29b7f..55f68acd85d1 100644 --- a/drivers/staging/lustre/lustre/llite/llite_internal.h +++ b/drivers/staging/lustre/lustre/llite/llite_internal.h @@ -750,7 +750,8 @@ int ll_file_open(struct inode *inode, struct file *file); int ll_file_release(struct inode *inode, struct file *file); int ll_release_openhandle(struct inode *, struct lookup_intent *); int ll_md_real_close(struct inode *inode, fmode_t fmode); -int ll_getattr(struct vfsmount *mnt, struct dentry *de, struct kstat *stat); +int ll_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); struct posix_acl *ll_get_acl(struct inode *inode, int type); int ll_migrate(struct inode *parent, struct file *file, int mdtidx, const char *name, int namelen); diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f4f4450119e4..f1d96233670c 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -1047,16 +1047,18 @@ done: /** * v9fs_vfs_getattr - retrieve file metadata - * @mnt: mount information - * @dentry: file to get attributes on + * @path: Object to query * @stat: metadata structure to populate + * @request_mask: Mask of STATX_xxx flags indicating the caller's interests + * @flags: AT_STATX_xxx setting * */ static int -v9fs_vfs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +v9fs_vfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { + struct dentry *dentry = path->dentry; struct v9fs_session_info *v9ses; struct p9_fid *fid; struct p9_wstat *st; diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 5999bd050678..570e63ee5b71 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -468,9 +468,10 @@ error: } static int -v9fs_vfs_getattr_dotl(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +v9fs_vfs_getattr_dotl(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { + struct dentry *dentry = path->dentry; struct v9fs_session_info *v9ses; struct p9_fid *fid; struct p9_stat_dotl *st; diff --git a/fs/afs/inode.c b/fs/afs/inode.c index 86cc7264c21c..1e4897a048d2 100644 --- a/fs/afs/inode.c +++ b/fs/afs/inode.c @@ -375,12 +375,10 @@ error_unlock: /* * read the attributes of an inode */ -int afs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +int afs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode; - - inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); _enter("{ ino=%lu v=%u }", inode->i_ino, inode->i_generation); diff --git a/fs/afs/internal.h b/fs/afs/internal.h index 8acf3670e756..5dfa56903a2d 100644 --- a/fs/afs/internal.h +++ b/fs/afs/internal.h @@ -533,7 +533,7 @@ extern struct inode *afs_iget(struct super_block *, struct key *, struct afs_callback *); extern void afs_zap_data(struct afs_vnode *); extern int afs_validate(struct afs_vnode *, struct key *); -extern int afs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int afs_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int afs_setattr(struct dentry *, struct iattr *); extern void afs_evict_inode(struct inode *); extern int afs_drop_inode(struct inode *); diff --git a/fs/bad_inode.c b/fs/bad_inode.c index 5f685c819298..bb53728c7a31 100644 --- a/fs/bad_inode.c +++ b/fs/bad_inode.c @@ -89,8 +89,8 @@ static int bad_inode_permission(struct inode *inode, int mask) return -EIO; } -static int bad_inode_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int bad_inode_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { return -EIO; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index ee6978d80491..c40060cc481f 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -9413,11 +9413,11 @@ fail: return -ENOMEM; } -static int btrfs_getattr(struct vfsmount *mnt, - struct dentry *dentry, struct kstat *stat) +static int btrfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { u64 delalloc_bytes; - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); u32 blocksize = inode->i_sb->s_blocksize; generic_fillattr(inode, stat); diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c index fd8f771f99b7..d449e1c03cbd 100644 --- a/fs/ceph/inode.c +++ b/fs/ceph/inode.c @@ -2187,10 +2187,10 @@ int ceph_permission(struct inode *inode, int mask) * Get all attributes. Hopefully somedata we'll have a statlite() * and can limit the fields we require to be accurate. */ -int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +int ceph_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct ceph_inode_info *ci = ceph_inode(inode); int err; diff --git a/fs/ceph/super.h b/fs/ceph/super.h index e9410bcf4113..fe6b9cfc4013 100644 --- a/fs/ceph/super.h +++ b/fs/ceph/super.h @@ -784,8 +784,8 @@ static inline int ceph_do_getattr(struct inode *inode, int mask, bool force) extern int ceph_permission(struct inode *inode, int mask); extern int __ceph_setattr(struct inode *inode, struct iattr *attr); extern int ceph_setattr(struct dentry *dentry, struct iattr *attr); -extern int ceph_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); +extern int ceph_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); /* xattr.c */ int __ceph_setxattr(struct inode *, const char *, const void *, size_t, int); diff --git a/fs/cifs/cifsfs.h b/fs/cifs/cifsfs.h index c9c00a862036..da717fee3026 100644 --- a/fs/cifs/cifsfs.h +++ b/fs/cifs/cifsfs.h @@ -83,7 +83,7 @@ extern int cifs_revalidate_dentry(struct dentry *); extern int cifs_invalidate_mapping(struct inode *inode); extern int cifs_revalidate_mapping(struct inode *inode); extern int cifs_zap_mapping(struct inode *inode); -extern int cifs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int cifs_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int cifs_setattr(struct dentry *, struct iattr *); extern const struct inode_operations cifs_file_inode_ops; diff --git a/fs/cifs/inode.c b/fs/cifs/inode.c index 7ab5be7944aa..1363fff460b9 100644 --- a/fs/cifs/inode.c +++ b/fs/cifs/inode.c @@ -1990,9 +1990,10 @@ int cifs_revalidate_dentry(struct dentry *dentry) return cifs_revalidate_mapping(inode); } -int cifs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +int cifs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { + struct dentry *dentry = path->dentry; struct cifs_sb_info *cifs_sb = CIFS_SB(dentry->d_sb); struct cifs_tcon *tcon = cifs_sb_master_tcon(cifs_sb); struct inode *inode = d_inode(dentry); diff --git a/fs/coda/coda_linux.h b/fs/coda/coda_linux.h index 5104d84c4f64..d3c361883c28 100644 --- a/fs/coda/coda_linux.h +++ b/fs/coda/coda_linux.h @@ -47,7 +47,7 @@ int coda_open(struct inode *i, struct file *f); int coda_release(struct inode *i, struct file *f); int coda_permission(struct inode *inode, int mask); int coda_revalidate_inode(struct inode *); -int coda_getattr(struct vfsmount *, struct dentry *, struct kstat *); +int coda_getattr(const struct path *, struct kstat *, u32, unsigned int); int coda_setattr(struct dentry *, struct iattr *); /* this file: heloers */ diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 71dbe7e287ce..2dea594da199 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -255,11 +255,12 @@ static void coda_evict_inode(struct inode *inode) coda_cache_clear_inode(inode); } -int coda_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int coda_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - int err = coda_revalidate_inode(d_inode(dentry)); + int err = coda_revalidate_inode(d_inode(path->dentry)); if (!err) - generic_fillattr(d_inode(dentry), stat); + generic_fillattr(d_inode(path->dentry), stat); return err; } diff --git a/fs/ecryptfs/inode.c b/fs/ecryptfs/inode.c index e7413f82d27b..efc2db42d175 100644 --- a/fs/ecryptfs/inode.c +++ b/fs/ecryptfs/inode.c @@ -959,9 +959,10 @@ out: return rc; } -static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int ecryptfs_getattr_link(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { + struct dentry *dentry = path->dentry; struct ecryptfs_mount_crypt_stat *mount_crypt_stat; int rc = 0; @@ -983,13 +984,15 @@ static int ecryptfs_getattr_link(struct vfsmount *mnt, struct dentry *dentry, return rc; } -static int ecryptfs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int ecryptfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { + struct dentry *dentry = path->dentry; struct kstat lower_stat; int rc; - rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat); + rc = vfs_getattr(ecryptfs_dentry_to_lower_path(dentry), &lower_stat, + request_mask, flags); if (!rc) { fsstack_copy_attr_all(d_inode(dentry), ecryptfs_inode_to_lower(d_inode(dentry))); diff --git a/fs/exportfs/expfs.c b/fs/exportfs/expfs.c index a4b531be9168..f2d24bb8d745 100644 --- a/fs/exportfs/expfs.c +++ b/fs/exportfs/expfs.c @@ -299,7 +299,8 @@ static int get_name(const struct path *path, char *name, struct dentry *child) * filesystem supports 64-bit inode numbers. So we need to * actually call ->getattr, not just read i_ino: */ - error = vfs_getattr_nosec(&child_path, &stat); + error = vfs_getattr_nosec(&child_path, &stat, + STATX_INO, AT_STATX_SYNC_AS_STAT); if (error) return error; buffer.ino = stat.ino; diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 2fd17e8e4984..025d2e85f454 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -2462,8 +2462,7 @@ extern struct inode *ext4_iget(struct super_block *, unsigned long); extern struct inode *ext4_iget_normal(struct super_block *, unsigned long); extern int ext4_write_inode(struct inode *, struct writeback_control *); extern int ext4_setattr(struct dentry *, struct iattr *); -extern int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); +extern int ext4_getattr(const struct path *, struct kstat *, u32, unsigned int); extern void ext4_evict_inode(struct inode *); extern void ext4_clear_inode(struct inode *); extern int ext4_sync_inode(handle_t *, struct inode *); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 971f66342080..7385e6a6b6cb 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -5387,13 +5387,13 @@ err_out: return error; } -int ext4_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +int ext4_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { struct inode *inode; unsigned long long delalloc_blocks; - inode = d_inode(dentry); + inode = d_inode(path->dentry); generic_fillattr(inode, stat); /* diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index d1483136fed6..e849f83d6114 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2040,8 +2040,8 @@ int f2fs_sync_file(struct file *file, loff_t start, loff_t end, int datasync); void truncate_data_blocks(struct dnode_of_data *dn); int truncate_blocks(struct inode *inode, u64 from, bool lock); int f2fs_truncate(struct inode *inode); -int f2fs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); +int f2fs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); int f2fs_setattr(struct dentry *dentry, struct iattr *attr); int truncate_hole(struct inode *inode, pgoff_t pg_start, pgoff_t pg_end); int truncate_data_blocks_range(struct dnode_of_data *dn, int count); diff --git a/fs/f2fs/file.c b/fs/f2fs/file.c index 78e65288f2b2..5f7317875a67 100644 --- a/fs/f2fs/file.c +++ b/fs/f2fs/file.c @@ -633,10 +633,10 @@ int f2fs_truncate(struct inode *inode) return 0; } -int f2fs_getattr(struct vfsmount *mnt, - struct dentry *dentry, struct kstat *stat) +int f2fs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); generic_fillattr(inode, stat); stat->blocks <<= 3; return 0; diff --git a/fs/fat/fat.h b/fs/fat/fat.h index e6b764a17a9c..051dac1ce3be 100644 --- a/fs/fat/fat.h +++ b/fs/fat/fat.h @@ -364,8 +364,8 @@ extern const struct file_operations fat_file_operations; extern const struct inode_operations fat_file_inode_operations; extern int fat_setattr(struct dentry *dentry, struct iattr *attr); extern void fat_truncate_blocks(struct inode *inode, loff_t offset); -extern int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); +extern int fat_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); extern int fat_file_fsync(struct file *file, loff_t start, loff_t end, int datasync); diff --git a/fs/fat/file.c b/fs/fat/file.c index 3d04b124bce0..4724cc9ad650 100644 --- a/fs/fat/file.c +++ b/fs/fat/file.c @@ -365,9 +365,10 @@ void fat_truncate_blocks(struct inode *inode, loff_t offset) fat_flush_inodes(inode->i_sb, inode, NULL); } -int fat_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int fat_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); generic_fillattr(inode, stat); stat->blksize = MSDOS_SB(inode->i_sb)->cluster_size; diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index 811fd8929a18..beb3d64f16e2 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1777,10 +1777,10 @@ static int fuse_setattr(struct dentry *entry, struct iattr *attr) return ret; } -static int fuse_getattr(struct vfsmount *mnt, struct dentry *entry, - struct kstat *stat) +static int fuse_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - struct inode *inode = d_inode(entry); + struct inode *inode = d_inode(path->dentry); struct fuse_conn *fc = get_fuse_conn(inode); if (!fuse_allow_current_process(fc)) diff --git a/fs/gfs2/inode.c b/fs/gfs2/inode.c index eb7724b8578a..288c15f385bd 100644 --- a/fs/gfs2/inode.c +++ b/fs/gfs2/inode.c @@ -1959,9 +1959,10 @@ out: /** * gfs2_getattr - Read out an inode's attributes - * @mnt: The vfsmount the inode is being accessed from - * @dentry: The dentry to stat + * @path: Object to query * @stat: The inode's stats + * @request_mask: Mask of STATX_xxx flags indicating the caller's interests + * @flags: AT_STATX_xxx setting * * This may be called from the VFS directly, or from within GFS2 with the * inode locked, so we look to see if the glock is already locked and only @@ -1972,10 +1973,10 @@ out: * Returns: errno */ -static int gfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int gfs2_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct gfs2_inode *ip = GFS2_I(inode); struct gfs2_holder gh; int error; diff --git a/fs/kernfs/inode.c b/fs/kernfs/inode.c index ac9e108ce1ea..fb4b4a79a0d6 100644 --- a/fs/kernfs/inode.c +++ b/fs/kernfs/inode.c @@ -200,11 +200,11 @@ static void kernfs_refresh_inode(struct kernfs_node *kn, struct inode *inode) set_nlink(inode, kn->dir.subdirs + 2); } -int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +int kernfs_iop_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct kernfs_node *kn = dentry->d_fsdata; - struct inode *inode = d_inode(dentry); + struct kernfs_node *kn = path->dentry->d_fsdata; + struct inode *inode = d_inode(path->dentry); mutex_lock(&kernfs_mutex); kernfs_refresh_inode(kn, inode); diff --git a/fs/kernfs/kernfs-internal.h b/fs/kernfs/kernfs-internal.h index 3100987cf8ba..2d5144ab4251 100644 --- a/fs/kernfs/kernfs-internal.h +++ b/fs/kernfs/kernfs-internal.h @@ -80,8 +80,8 @@ extern const struct xattr_handler *kernfs_xattr_handlers[]; void kernfs_evict_inode(struct inode *inode); int kernfs_iop_permission(struct inode *inode, int mask); int kernfs_iop_setattr(struct dentry *dentry, struct iattr *iattr); -int kernfs_iop_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); +int kernfs_iop_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags); ssize_t kernfs_iop_listxattr(struct dentry *dentry, char *buf, size_t size); /* diff --git a/fs/libfs.c b/fs/libfs.c index 28d6f35feed6..1dfaf8f606c0 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -20,10 +20,10 @@ #include "internal.h" -int simple_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +int simple_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); generic_fillattr(inode, stat); stat->blocks = inode->i_mapping->nrpages << (PAGE_SHIFT - 9); return 0; @@ -1143,10 +1143,10 @@ static struct dentry *empty_dir_lookup(struct inode *dir, struct dentry *dentry, return ERR_PTR(-ENOENT); } -static int empty_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int empty_dir_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); generic_fillattr(inode, stat); return 0; } diff --git a/fs/minix/inode.c b/fs/minix/inode.c index e7d9bf86d975..6ac76b0434e9 100644 --- a/fs/minix/inode.c +++ b/fs/minix/inode.c @@ -622,11 +622,14 @@ static int minix_write_inode(struct inode *inode, struct writeback_control *wbc) return err; } -int minix_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int minix_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - struct super_block *sb = dentry->d_sb; - generic_fillattr(d_inode(dentry), stat); - if (INODE_VERSION(d_inode(dentry)) == MINIX_V1) + struct super_block *sb = path->dentry->d_sb; + struct inode *inode = d_inode(path->dentry); + + generic_fillattr(inode, stat); + if (INODE_VERSION(inode) == MINIX_V1) stat->blocks = (BLOCK_SIZE / 512) * V1_minix_blocks(stat->size, sb); else stat->blocks = (sb->s_blocksize / 512) * V2_minix_blocks(stat->size, sb); diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 01ad81dcacc5..663d66138d06 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -51,7 +51,7 @@ extern unsigned long minix_count_free_inodes(struct super_block *sb); extern int minix_new_block(struct inode * inode); extern void minix_free_block(struct inode *inode, unsigned long block); extern unsigned long minix_count_free_blocks(struct super_block *sb); -extern int minix_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int minix_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int minix_prepare_chunk(struct page *page, loff_t pos, unsigned len); extern void V1_minix_truncate(struct inode *); diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 5ca4d96b1942..b5425315adcc 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -703,9 +703,10 @@ static bool nfs_need_revalidate_inode(struct inode *inode) return false; } -int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int nfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); int need_atime = NFS_I(inode)->cache_validity & NFS_INO_INVALID_ATIME; int err = 0; @@ -726,17 +727,17 @@ int nfs_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) * - NFS never sets MS_NOATIME or MS_NODIRATIME so there is * no point in checking those. */ - if ((mnt->mnt_flags & MNT_NOATIME) || - ((mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) + if ((path->mnt->mnt_flags & MNT_NOATIME) || + ((path->mnt->mnt_flags & MNT_NODIRATIME) && S_ISDIR(inode->i_mode))) need_atime = 0; if (need_atime || nfs_need_revalidate_inode(inode)) { struct nfs_server *server = NFS_SERVER(inode); - nfs_readdirplus_parent_cache_miss(dentry); + nfs_readdirplus_parent_cache_miss(path->dentry); err = __nfs_revalidate_inode(server, inode); } else - nfs_readdirplus_parent_cache_hit(dentry); + nfs_readdirplus_parent_cache_hit(path->dentry); if (!err) { generic_fillattr(inode, stat); stat->ino = nfs_compat_user_ino64(NFS_FILEID(inode)); diff --git a/fs/nfs/namespace.c b/fs/nfs/namespace.c index e49d831c4e85..786f17580582 100644 --- a/fs/nfs/namespace.c +++ b/fs/nfs/namespace.c @@ -178,11 +178,12 @@ out_nofree: } static int -nfs_namespace_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +nfs_namespace_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - if (NFS_FH(d_inode(dentry))->size != 0) - return nfs_getattr(mnt, dentry, stat); - generic_fillattr(d_inode(dentry), stat); + if (NFS_FH(d_inode(path->dentry))->size != 0) + return nfs_getattr(path, stat, request_mask, query_flags); + generic_fillattr(d_inode(path->dentry), stat); return 0; } diff --git a/fs/nfsd/nfs4xdr.c b/fs/nfsd/nfs4xdr.c index 382c1fd05b4c..33017d652b1d 100644 --- a/fs/nfsd/nfs4xdr.c +++ b/fs/nfsd/nfs4xdr.c @@ -2301,7 +2301,7 @@ static int get_parent_attributes(struct svc_export *exp, struct kstat *stat) if (path.dentry != path.mnt->mnt_root) break; } - err = vfs_getattr(&path, stat); + err = vfs_getattr(&path, stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); path_put(&path); return err; } @@ -2385,7 +2385,7 @@ nfsd4_encode_fattr(struct xdr_stream *xdr, struct svc_fh *fhp, goto out; } - err = vfs_getattr(&path, &stat); + err = vfs_getattr(&path, &stat, STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (err) goto out_nfserr; if ((bmval0 & (FATTR4_WORD0_FILES_AVAIL | FATTR4_WORD0_FILES_FREE | diff --git a/fs/nfsd/vfs.h b/fs/nfsd/vfs.h index db98c48c735a..1bbdccecbf3d 100644 --- a/fs/nfsd/vfs.h +++ b/fs/nfsd/vfs.h @@ -135,7 +135,8 @@ static inline __be32 fh_getattr(struct svc_fh *fh, struct kstat *stat) { struct path p = {.mnt = fh->fh_export->ex_path.mnt, .dentry = fh->fh_dentry}; - return nfserrno(vfs_getattr(&p, stat)); + return nfserrno(vfs_getattr(&p, stat, STATX_BASIC_STATS, + AT_STATX_SYNC_AS_STAT)); } static inline int nfsd_create_is_exclusive(int createmode) diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 8836305eb378..bfeb647459d9 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1306,16 +1306,15 @@ bail: return status; } -int ocfs2_getattr(struct vfsmount *mnt, - struct dentry *dentry, - struct kstat *stat) +int ocfs2_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - struct inode *inode = d_inode(dentry); - struct super_block *sb = dentry->d_sb; + struct inode *inode = d_inode(path->dentry); + struct super_block *sb = path->dentry->d_sb; struct ocfs2_super *osb = sb->s_fs_info; int err; - err = ocfs2_inode_revalidate(dentry); + err = ocfs2_inode_revalidate(path->dentry); if (err) { if (err != -ENOENT) mlog_errno(err); diff --git a/fs/ocfs2/file.h b/fs/ocfs2/file.h index 897fd9a2e51d..1fdc9839cd93 100644 --- a/fs/ocfs2/file.h +++ b/fs/ocfs2/file.h @@ -68,8 +68,8 @@ int ocfs2_zero_extend(struct inode *inode, struct buffer_head *di_bh, int ocfs2_extend_allocation(struct inode *inode, u32 logical_start, u32 clusters_to_add, int mark_unwritten); int ocfs2_setattr(struct dentry *dentry, struct iattr *attr); -int ocfs2_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); +int ocfs2_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); int ocfs2_permission(struct inode *inode, int mask); int ocfs2_should_update_atime(struct inode *inode, diff --git a/fs/orangefs/inode.c b/fs/orangefs/inode.c index 5cd617980fbf..a304bf34b212 100644 --- a/fs/orangefs/inode.c +++ b/fs/orangefs/inode.c @@ -245,25 +245,24 @@ out: /* * Obtain attributes of an object given a dentry */ -int orangefs_getattr(struct vfsmount *mnt, - struct dentry *dentry, - struct kstat *kstat) +int orangefs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { int ret = -ENOENT; - struct inode *inode = dentry->d_inode; + struct inode *inode = path->dentry->d_inode; struct orangefs_inode_s *orangefs_inode = NULL; gossip_debug(GOSSIP_INODE_DEBUG, "orangefs_getattr: called on %pd\n", - dentry); + path->dentry); ret = orangefs_inode_getattr(inode, 0, 0); if (ret == 0) { - generic_fillattr(inode, kstat); + generic_fillattr(inode, stat); /* override block size reported to stat */ orangefs_inode = ORANGEFS_I(inode); - kstat->blksize = orangefs_inode->blksize; + stat->blksize = orangefs_inode->blksize; } return ret; } diff --git a/fs/orangefs/orangefs-kernel.h b/fs/orangefs/orangefs-kernel.h index 70355a9a2596..0c4f03c22ce0 100644 --- a/fs/orangefs/orangefs-kernel.h +++ b/fs/orangefs/orangefs-kernel.h @@ -439,9 +439,8 @@ struct inode *orangefs_new_inode(struct super_block *sb, int orangefs_setattr(struct dentry *dentry, struct iattr *iattr); -int orangefs_getattr(struct vfsmount *mnt, - struct dentry *dentry, - struct kstat *kstat); +int orangefs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); int orangefs_permission(struct inode *inode, int mask); diff --git a/fs/overlayfs/copy_up.c b/fs/overlayfs/copy_up.c index f57043dace62..a6f9ca621e0b 100644 --- a/fs/overlayfs/copy_up.c +++ b/fs/overlayfs/copy_up.c @@ -346,7 +346,8 @@ static int ovl_copy_up_one(struct dentry *parent, struct dentry *dentry, ovl_path_upper(parent, &parentpath); upperdir = parentpath.dentry; - err = vfs_getattr(&parentpath, &pstat); + err = vfs_getattr(&parentpath, &pstat, + STATX_ATIME | STATX_MTIME, AT_STATX_SYNC_AS_STAT); if (err) return err; @@ -409,7 +410,8 @@ int ovl_copy_up_flags(struct dentry *dentry, int flags) } ovl_path_lower(next, &lowerpath); - err = vfs_getattr(&lowerpath, &stat); + err = vfs_getattr(&lowerpath, &stat, + STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); /* maybe truncate regular file. this has no effect on dirs */ if (flags & O_TRUNC) stat.size = 0; diff --git a/fs/overlayfs/dir.c b/fs/overlayfs/dir.c index 16e06dd89457..6515796460df 100644 --- a/fs/overlayfs/dir.c +++ b/fs/overlayfs/dir.c @@ -138,9 +138,10 @@ static int ovl_set_opaque(struct dentry *dentry, struct dentry *upperdentry) return err; } -static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int ovl_dir_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { + struct dentry *dentry = path->dentry; int err; enum ovl_path_type type; struct path realpath; @@ -148,7 +149,7 @@ static int ovl_dir_getattr(struct vfsmount *mnt, struct dentry *dentry, type = ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat); + err = vfs_getattr(&realpath, stat, request_mask, flags); revert_creds(old_cred); if (err) return err; @@ -264,7 +265,8 @@ static struct dentry *ovl_clear_empty(struct dentry *dentry, goto out; ovl_path_upper(dentry, &upperpath); - err = vfs_getattr(&upperpath, &stat); + err = vfs_getattr(&upperpath, &stat, + STATX_BASIC_STATS, AT_STATX_SYNC_AS_STAT); if (err) goto out_unlock; diff --git a/fs/overlayfs/inode.c b/fs/overlayfs/inode.c index 08643ac44a02..d4bb54f7b6b4 100644 --- a/fs/overlayfs/inode.c +++ b/fs/overlayfs/inode.c @@ -56,16 +56,17 @@ out: return err; } -static int ovl_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int ovl_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { + struct dentry *dentry = path->dentry; struct path realpath; const struct cred *old_cred; int err; ovl_path_real(dentry, &realpath); old_cred = ovl_override_creds(dentry->d_sb); - err = vfs_getattr(&realpath, stat); + err = vfs_getattr(&realpath, stat, request_mask, flags); revert_creds(old_cred); return err; } diff --git a/fs/proc/base.c b/fs/proc/base.c index 1e1e182d571b..3b5e6aa2a326 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -1724,11 +1724,12 @@ out_unlock: return NULL; } -int pid_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int pid_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct task_struct *task; - struct pid_namespace *pid = dentry->d_sb->s_fs_info; + struct pid_namespace *pid = path->dentry->d_sb->s_fs_info; generic_fillattr(inode, stat); @@ -3511,9 +3512,10 @@ static int proc_task_readdir(struct file *file, struct dir_context *ctx) return 0; } -static int proc_task_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +static int proc_task_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct task_struct *p = get_proc_task(inode); generic_fillattr(inode, stat); diff --git a/fs/proc/generic.c b/fs/proc/generic.c index 06c73904d497..ee27feb34cf4 100644 --- a/fs/proc/generic.c +++ b/fs/proc/generic.c @@ -118,10 +118,10 @@ static int proc_notify_change(struct dentry *dentry, struct iattr *iattr) return 0; } -static int proc_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int proc_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct proc_dir_entry *de = PDE(inode); if (de && de->nlink) set_nlink(inode, de->nlink); diff --git a/fs/proc/internal.h b/fs/proc/internal.h index 5d6960f5f1c0..e93cdc6ddb31 100644 --- a/fs/proc/internal.h +++ b/fs/proc/internal.h @@ -149,7 +149,7 @@ extern int proc_pid_statm(struct seq_file *, struct pid_namespace *, * base.c */ extern const struct dentry_operations pid_dentry_operations; -extern int pid_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int pid_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int proc_setattr(struct dentry *, struct iattr *); extern struct inode *proc_pid_make_inode(struct super_block *, struct task_struct *, umode_t); extern int pid_revalidate(struct dentry *, unsigned int); diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c index ffd72a6c6e04..9db1df2537fc 100644 --- a/fs/proc/proc_net.c +++ b/fs/proc/proc_net.c @@ -140,10 +140,10 @@ static struct dentry *proc_tgid_net_lookup(struct inode *dir, return de; } -static int proc_tgid_net_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int proc_tgid_net_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct net *net; net = get_proc_task_net(inode); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index 3e64c6502dc8..3d8726445ad1 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -801,9 +801,10 @@ static int proc_sys_setattr(struct dentry *dentry, struct iattr *attr) return 0; } -static int proc_sys_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +static int proc_sys_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; diff --git a/fs/proc/root.c b/fs/proc/root.c index b90da888b81a..fb1955c82274 100644 --- a/fs/proc/root.c +++ b/fs/proc/root.c @@ -149,10 +149,10 @@ void __init proc_root_init(void) proc_sys_init(); } -static int proc_root_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat -) +static int proc_root_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - generic_fillattr(d_inode(dentry), stat); + generic_fillattr(d_inode(path->dentry), stat); stat->nlink = proc_root.nlink + nr_processes(); return 0; } diff --git a/fs/stat.c b/fs/stat.c index 3f14d1ef0868..a3804feadade 100644 --- a/fs/stat.c +++ b/fs/stat.c @@ -18,6 +18,15 @@ #include #include +/** + * generic_fillattr - Fill in the basic attributes from the inode struct + * @inode: Inode to use as the source + * @stat: Where to fill in the attributes + * + * Fill in the basic attributes in the kstat structure from data that's to be + * found on the VFS inode structure. This is the default if no getattr inode + * operation is supplied. + */ void generic_fillattr(struct inode *inode, struct kstat *stat) { stat->dev = inode->i_sb->s_dev; @@ -33,81 +42,147 @@ void generic_fillattr(struct inode *inode, struct kstat *stat) stat->ctime = inode->i_ctime; stat->blksize = i_blocksize(inode); stat->blocks = inode->i_blocks; -} + if (IS_NOATIME(inode)) + stat->result_mask &= ~STATX_ATIME; + if (IS_AUTOMOUNT(inode)) + stat->attributes |= STATX_ATTR_AUTOMOUNT; +} EXPORT_SYMBOL(generic_fillattr); /** * vfs_getattr_nosec - getattr without security checks * @path: file to get attributes from * @stat: structure to return attributes in + * @request_mask: STATX_xxx flags indicating what the caller wants + * @query_flags: Query mode (KSTAT_QUERY_FLAGS) * * Get attributes without calling security_inode_getattr. * * Currently the only caller other than vfs_getattr is internal to the - * filehandle lookup code, which uses only the inode number and returns - * no attributes to any user. Any other code probably wants - * vfs_getattr. + * filehandle lookup code, which uses only the inode number and returns no + * attributes to any user. Any other code probably wants vfs_getattr. */ -int vfs_getattr_nosec(struct path *path, struct kstat *stat) +int vfs_getattr_nosec(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { struct inode *inode = d_backing_inode(path->dentry); + memset(stat, 0, sizeof(*stat)); + stat->result_mask |= STATX_BASIC_STATS; + request_mask &= STATX_ALL; + query_flags &= KSTAT_QUERY_FLAGS; if (inode->i_op->getattr) - return inode->i_op->getattr(path->mnt, path->dentry, stat); + return inode->i_op->getattr(path, stat, request_mask, + query_flags); generic_fillattr(inode, stat); return 0; } - EXPORT_SYMBOL(vfs_getattr_nosec); -int vfs_getattr(struct path *path, struct kstat *stat) +/* + * vfs_getattr - Get the enhanced basic attributes of a file + * @path: The file of interest + * @stat: Where to return the statistics + * @request_mask: STATX_xxx flags indicating what the caller wants + * @query_flags: Query mode (KSTAT_QUERY_FLAGS) + * + * Ask the filesystem for a file's attributes. The caller must indicate in + * request_mask and query_flags to indicate what they want. + * + * If the file is remote, the filesystem can be forced to update the attributes + * from the backing store by passing AT_STATX_FORCE_SYNC in query_flags or can + * suppress the update by passing AT_STATX_DONT_SYNC. + * + * Bits must have been set in request_mask to indicate which attributes the + * caller wants retrieving. Any such attribute not requested may be returned + * anyway, but the value may be approximate, and, if remote, may not have been + * synchronised with the server. + * + * 0 will be returned on success, and a -ve error code if unsuccessful. + */ +int vfs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { int retval; retval = security_inode_getattr(path); if (retval) return retval; - return vfs_getattr_nosec(path, stat); + return vfs_getattr_nosec(path, stat, request_mask, query_flags); } - EXPORT_SYMBOL(vfs_getattr); -int vfs_fstat(unsigned int fd, struct kstat *stat) +/** + * vfs_statx_fd - Get the enhanced basic attributes by file descriptor + * @fd: The file descriptor referring to the file of interest + * @stat: The result structure to fill in. + * @request_mask: STATX_xxx flags indicating what the caller wants + * @query_flags: Query mode (KSTAT_QUERY_FLAGS) + * + * This function is a wrapper around vfs_getattr(). The main difference is + * that it uses a file descriptor to determine the file location. + * + * 0 will be returned on success, and a -ve error code if unsuccessful. + */ +int vfs_statx_fd(unsigned int fd, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { struct fd f = fdget_raw(fd); int error = -EBADF; if (f.file) { - error = vfs_getattr(&f.file->f_path, stat); + error = vfs_getattr(&f.file->f_path, stat, + request_mask, query_flags); fdput(f); } return error; } -EXPORT_SYMBOL(vfs_fstat); +EXPORT_SYMBOL(vfs_statx_fd); -int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, - int flag) +/** + * vfs_statx - Get basic and extra attributes by filename + * @dfd: A file descriptor representing the base dir for a relative filename + * @filename: The name of the file of interest + * @flags: Flags to control the query + * @stat: The result structure to fill in. + * @request_mask: STATX_xxx flags indicating what the caller wants + * + * This function is a wrapper around vfs_getattr(). The main difference is + * that it uses a filename and base directory to determine the file location. + * Additionally, the use of AT_SYMLINK_NOFOLLOW in flags will prevent a symlink + * at the given name from being referenced. + * + * The caller must have preset stat->request_mask as for vfs_getattr(). The + * flags are also used to load up stat->query_flags. + * + * 0 will be returned on success, and a -ve error code if unsuccessful. + */ +int vfs_statx(int dfd, const char __user *filename, int flags, + struct kstat *stat, u32 request_mask) { struct path path; int error = -EINVAL; - unsigned int lookup_flags = 0; + unsigned int lookup_flags = LOOKUP_FOLLOW | LOOKUP_AUTOMOUNT; - if ((flag & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | - AT_EMPTY_PATH)) != 0) - goto out; + if ((flags & ~(AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT | + AT_EMPTY_PATH | KSTAT_QUERY_FLAGS)) != 0) + return -EINVAL; - if (!(flag & AT_SYMLINK_NOFOLLOW)) - lookup_flags |= LOOKUP_FOLLOW; - if (flag & AT_EMPTY_PATH) + if (flags & AT_SYMLINK_NOFOLLOW) + lookup_flags &= ~LOOKUP_FOLLOW; + if (flags & AT_NO_AUTOMOUNT) + lookup_flags &= ~LOOKUP_AUTOMOUNT; + if (flags & AT_EMPTY_PATH) lookup_flags |= LOOKUP_EMPTY; + retry: error = user_path_at(dfd, filename, lookup_flags, &path); if (error) goto out; - error = vfs_getattr(&path, stat); + error = vfs_getattr(&path, stat, request_mask, flags); path_put(&path); if (retry_estale(error, lookup_flags)) { lookup_flags |= LOOKUP_REVAL; @@ -116,19 +191,7 @@ retry: out: return error; } -EXPORT_SYMBOL(vfs_fstatat); - -int vfs_stat(const char __user *name, struct kstat *stat) -{ - return vfs_fstatat(AT_FDCWD, name, stat, 0); -} -EXPORT_SYMBOL(vfs_stat); - -int vfs_lstat(const char __user *name, struct kstat *stat) -{ - return vfs_fstatat(AT_FDCWD, name, stat, AT_SYMLINK_NOFOLLOW); -} -EXPORT_SYMBOL(vfs_lstat); +EXPORT_SYMBOL(vfs_statx); #ifdef __ARCH_WANT_OLD_STAT @@ -141,7 +204,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta { static int warncount = 5; struct __old_kernel_stat tmp; - + if (warncount > 0) { warncount--; printk(KERN_WARNING "VFS: Warning: %s using old stat() call. Recompile your binary.\n", @@ -166,7 +229,7 @@ static int cp_old_stat(struct kstat *stat, struct __old_kernel_stat __user * sta #if BITS_PER_LONG == 32 if (stat->size > MAX_NON_LFS) return -EOVERFLOW; -#endif +#endif tmp.st_size = stat->size; tmp.st_atime = stat->atime.tv_sec; tmp.st_mtime = stat->mtime.tv_sec; @@ -445,6 +508,81 @@ SYSCALL_DEFINE4(fstatat64, int, dfd, const char __user *, filename, } #endif /* __ARCH_WANT_STAT64 || __ARCH_WANT_COMPAT_STAT64 */ +static inline int __put_timestamp(struct timespec *kts, + struct statx_timestamp __user *uts) +{ + return (__put_user(kts->tv_sec, &uts->tv_sec ) || + __put_user(kts->tv_nsec, &uts->tv_nsec ) || + __put_user(0, &uts->__reserved )); +} + +/* + * Set the statx results. + */ +static long statx_set_result(struct kstat *stat, struct statx __user *buffer) +{ + uid_t uid = from_kuid_munged(current_user_ns(), stat->uid); + gid_t gid = from_kgid_munged(current_user_ns(), stat->gid); + + if (__put_user(stat->result_mask, &buffer->stx_mask ) || + __put_user(stat->mode, &buffer->stx_mode ) || + __clear_user(&buffer->__spare0, sizeof(buffer->__spare0)) || + __put_user(stat->nlink, &buffer->stx_nlink ) || + __put_user(uid, &buffer->stx_uid ) || + __put_user(gid, &buffer->stx_gid ) || + __put_user(stat->attributes, &buffer->stx_attributes ) || + __put_user(stat->blksize, &buffer->stx_blksize ) || + __put_user(MAJOR(stat->rdev), &buffer->stx_rdev_major ) || + __put_user(MINOR(stat->rdev), &buffer->stx_rdev_minor ) || + __put_user(MAJOR(stat->dev), &buffer->stx_dev_major ) || + __put_user(MINOR(stat->dev), &buffer->stx_dev_minor ) || + __put_timestamp(&stat->atime, &buffer->stx_atime ) || + __put_timestamp(&stat->btime, &buffer->stx_btime ) || + __put_timestamp(&stat->ctime, &buffer->stx_ctime ) || + __put_timestamp(&stat->mtime, &buffer->stx_mtime ) || + __put_user(stat->ino, &buffer->stx_ino ) || + __put_user(stat->size, &buffer->stx_size ) || + __put_user(stat->blocks, &buffer->stx_blocks ) || + __clear_user(&buffer->__spare1, sizeof(buffer->__spare1)) || + __clear_user(&buffer->__spare2, sizeof(buffer->__spare2))) + return -EFAULT; + + return 0; +} + +/** + * sys_statx - System call to get enhanced stats + * @dfd: Base directory to pathwalk from *or* fd to stat. + * @filename: File to stat *or* NULL. + * @flags: AT_* flags to control pathwalk. + * @mask: Parts of statx struct actually required. + * @buffer: Result buffer. + * + * Note that if filename is NULL, then it does the equivalent of fstat() using + * dfd to indicate the file of interest. + */ +SYSCALL_DEFINE5(statx, + int, dfd, const char __user *, filename, unsigned, flags, + unsigned int, mask, + struct statx __user *, buffer) +{ + struct kstat stat; + int error; + + if ((flags & AT_STATX_SYNC_TYPE) == AT_STATX_SYNC_TYPE) + return -EINVAL; + if (!access_ok(VERIFY_WRITE, buffer, sizeof(*buffer))) + return -EFAULT; + + if (filename) + error = vfs_statx(dfd, filename, flags, &stat, mask); + else + error = vfs_statx_fd(dfd, &stat, mask, flags); + if (error) + return error; + return statx_set_result(&stat, buffer); +} + /* Caller is here responsible for sufficient locking (ie. inode->i_lock) */ void __inode_add_bytes(struct inode *inode, loff_t bytes) { diff --git a/fs/sysv/itree.c b/fs/sysv/itree.c index 08d3e630b49c..83809f5b5eca 100644 --- a/fs/sysv/itree.c +++ b/fs/sysv/itree.c @@ -440,10 +440,11 @@ static unsigned sysv_nblocks(struct super_block *s, loff_t size) return blocks; } -int sysv_getattr(struct vfsmount *mnt, struct dentry *dentry, struct kstat *stat) +int sysv_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { - struct super_block *s = dentry->d_sb; - generic_fillattr(d_inode(dentry), stat); + struct super_block *s = path->dentry->d_sb; + generic_fillattr(d_inode(path->dentry), stat); stat->blocks = (s->s_blocksize / 512) * sysv_nblocks(s, stat->size); stat->blksize = s->s_blocksize; return 0; diff --git a/fs/sysv/sysv.h b/fs/sysv/sysv.h index 6c212288adcb..1e7e27c729af 100644 --- a/fs/sysv/sysv.h +++ b/fs/sysv/sysv.h @@ -142,7 +142,7 @@ extern struct inode *sysv_iget(struct super_block *, unsigned int); extern int sysv_write_inode(struct inode *, struct writeback_control *wbc); extern int sysv_sync_inode(struct inode *); extern void sysv_set_inode(struct inode *, dev_t); -extern int sysv_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int sysv_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int sysv_init_icache(void); extern void sysv_destroy_icache(void); diff --git a/fs/ubifs/dir.c b/fs/ubifs/dir.c index 528369f3e472..30825d882aa9 100644 --- a/fs/ubifs/dir.c +++ b/fs/ubifs/dir.c @@ -1622,11 +1622,11 @@ static int ubifs_rename(struct inode *old_dir, struct dentry *old_dentry, return do_rename(old_dir, old_dentry, new_dir, new_dentry, flags); } -int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +int ubifs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { loff_t size; - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct ubifs_inode *ui = ubifs_inode(inode); mutex_lock(&ui->ui_mutex); diff --git a/fs/ubifs/ubifs.h b/fs/ubifs/ubifs.h index f0c86f076535..4d57e488038e 100644 --- a/fs/ubifs/ubifs.h +++ b/fs/ubifs/ubifs.h @@ -1749,8 +1749,8 @@ int ubifs_update_time(struct inode *inode, struct timespec *time, int flags); /* dir.c */ struct inode *ubifs_new_inode(struct ubifs_info *c, struct inode *dir, umode_t mode); -int ubifs_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat); +int ubifs_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags); int ubifs_check_dir_empty(struct inode *dir); /* xattr.c */ diff --git a/fs/udf/symlink.c b/fs/udf/symlink.c index f7dfef53f739..6023c97c6da2 100644 --- a/fs/udf/symlink.c +++ b/fs/udf/symlink.c @@ -152,9 +152,10 @@ out_unmap: return err; } -static int udf_symlink_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int udf_symlink_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int flags) { + struct dentry *dentry = path->dentry; struct inode *inode = d_backing_inode(dentry); struct page *page; diff --git a/fs/xfs/xfs_iops.c b/fs/xfs/xfs_iops.c index 22c16155f1b4..229cc6a6d8ef 100644 --- a/fs/xfs/xfs_iops.c +++ b/fs/xfs/xfs_iops.c @@ -489,11 +489,12 @@ xfs_vn_get_link_inline( STATIC int xfs_vn_getattr( - struct vfsmount *mnt, - struct dentry *dentry, - struct kstat *stat) + const struct path *path, + struct kstat *stat, + u32 request_mask, + unsigned int query_flags) { - struct inode *inode = d_inode(dentry); + struct inode *inode = d_inode(path->dentry); struct xfs_inode *ip = XFS_I(inode); struct xfs_mount *mp = ip->i_mount; diff --git a/include/linux/fs.h b/include/linux/fs.h index 52350947c670..aad3fd0ff5f8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1709,7 +1709,7 @@ struct inode_operations { int (*rename) (struct inode *, struct dentry *, struct inode *, struct dentry *, unsigned int); int (*setattr) (struct dentry *, struct iattr *); - int (*getattr) (struct vfsmount *mnt, struct dentry *, struct kstat *); + int (*getattr) (const struct path *, struct kstat *, u32, unsigned int); ssize_t (*listxattr) (struct dentry *, char *, size_t); int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len); @@ -2902,8 +2902,8 @@ extern int page_symlink(struct inode *inode, const char *symname, int len); extern const struct inode_operations page_symlink_inode_operations; extern void kfree_link(void *); extern void generic_fillattr(struct inode *, struct kstat *); -int vfs_getattr_nosec(struct path *path, struct kstat *stat); -extern int vfs_getattr(struct path *, struct kstat *); +extern int vfs_getattr_nosec(const struct path *, struct kstat *, u32, unsigned int); +extern int vfs_getattr(const struct path *, struct kstat *, u32, unsigned int); void __inode_add_bytes(struct inode *inode, loff_t bytes); void inode_add_bytes(struct inode *inode, loff_t bytes); void __inode_sub_bytes(struct inode *inode, loff_t bytes); @@ -2916,10 +2916,29 @@ extern const struct inode_operations simple_symlink_inode_operations; extern int iterate_dir(struct file *, struct dir_context *); -extern int vfs_stat(const char __user *, struct kstat *); -extern int vfs_lstat(const char __user *, struct kstat *); -extern int vfs_fstat(unsigned int, struct kstat *); -extern int vfs_fstatat(int , const char __user *, struct kstat *, int); +extern int vfs_statx(int, const char __user *, int, struct kstat *, u32); +extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int); + +static inline int vfs_stat(const char __user *filename, struct kstat *stat) +{ + return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS); +} +static inline int vfs_lstat(const char __user *name, struct kstat *stat) +{ + return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW, + stat, STATX_BASIC_STATS); +} +static inline int vfs_fstatat(int dfd, const char __user *filename, + struct kstat *stat, int flags) +{ + return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); +} +static inline int vfs_fstat(int fd, struct kstat *stat) +{ + return vfs_statx_fd(fd, stat, STATX_BASIC_STATS, 0); +} + + extern const char *vfs_get_link(struct dentry *, struct delayed_call *); extern int vfs_readlink(struct dentry *, char __user *, int); @@ -2949,7 +2968,7 @@ extern int dcache_dir_close(struct inode *, struct file *); extern loff_t dcache_dir_lseek(struct file *, loff_t, int); extern int dcache_readdir(struct file *, struct dir_context *); extern int simple_setattr(struct dentry *, struct iattr *); -extern int simple_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int simple_getattr(const struct path *, struct kstat *, u32, unsigned int); extern int simple_statfs(struct dentry *, struct kstatfs *); extern int simple_open(struct inode *inode, struct file *file); extern int simple_link(struct dentry *, struct inode *, struct dentry *); diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f1da8c8dd473..287f34161086 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -335,7 +335,7 @@ extern int nfs_refresh_inode(struct inode *, struct nfs_fattr *); extern int nfs_post_op_update_inode(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc(struct inode *inode, struct nfs_fattr *fattr); extern int nfs_post_op_update_inode_force_wcc_locked(struct inode *inode, struct nfs_fattr *fattr); -extern int nfs_getattr(struct vfsmount *, struct dentry *, struct kstat *); +extern int nfs_getattr(const struct path *, struct kstat *, u32, unsigned int); extern void nfs_access_add_cache(struct inode *, struct nfs_access_entry *); extern void nfs_access_set_mask(struct nfs_access_entry *, u32); extern int nfs_permission(struct inode *, int); diff --git a/include/linux/stat.h b/include/linux/stat.h index 075cb0c7eb2a..c76e524fb34b 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -18,20 +18,32 @@ #include #include +#define KSTAT_QUERY_FLAGS (AT_STATX_SYNC_TYPE) + struct kstat { - u64 ino; - dev_t dev; + u32 result_mask; /* What fields the user got */ umode_t mode; unsigned int nlink; + uint32_t blksize; /* Preferred I/O size */ + u64 attributes; +#define KSTAT_ATTR_FS_IOC_FLAGS \ + (STATX_ATTR_COMPRESSED | \ + STATX_ATTR_IMMUTABLE | \ + STATX_ATTR_APPEND | \ + STATX_ATTR_NODUMP | \ + STATX_ATTR_ENCRYPTED \ + )/* Attrs corresponding to FS_*_FL flags */ + u64 ino; + dev_t dev; + dev_t rdev; kuid_t uid; kgid_t gid; - dev_t rdev; loff_t size; - struct timespec atime; + struct timespec atime; struct timespec mtime; struct timespec ctime; - unsigned long blksize; - unsigned long long blocks; + struct timespec btime; /* File creation time */ + u64 blocks; }; #endif diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 91a740f6b884..980c3c9b06f8 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -48,6 +48,7 @@ struct stat; struct stat64; struct statfs; struct statfs64; +struct statx; struct __sysctl_args; struct sysinfo; struct timespec; @@ -902,5 +903,7 @@ asmlinkage long sys_pkey_mprotect(unsigned long start, size_t len, unsigned long prot, int pkey); asmlinkage long sys_pkey_alloc(unsigned long flags, unsigned long init_val); asmlinkage long sys_pkey_free(int pkey); +asmlinkage long sys_statx(int dfd, const char __user *path, unsigned flags, + unsigned mask, struct statx __user *buffer); #endif diff --git a/include/uapi/linux/fcntl.h b/include/uapi/linux/fcntl.h index beed138bd359..813afd6eee71 100644 --- a/include/uapi/linux/fcntl.h +++ b/include/uapi/linux/fcntl.h @@ -63,5 +63,10 @@ #define AT_NO_AUTOMOUNT 0x800 /* Suppress terminal automount traversal */ #define AT_EMPTY_PATH 0x1000 /* Allow empty relative pathname */ +#define AT_STATX_SYNC_TYPE 0x6000 /* Type of synchronisation required from statx() */ +#define AT_STATX_SYNC_AS_STAT 0x0000 /* - Do whatever stat() does */ +#define AT_STATX_FORCE_SYNC 0x2000 /* - Force the attributes to be sync'd with the server */ +#define AT_STATX_DONT_SYNC 0x4000 /* - Don't sync attributes with the server */ + #endif /* _UAPI_LINUX_FCNTL_H */ diff --git a/include/uapi/linux/stat.h b/include/uapi/linux/stat.h index 7fec7e36d921..51a6b86e3700 100644 --- a/include/uapi/linux/stat.h +++ b/include/uapi/linux/stat.h @@ -1,6 +1,7 @@ #ifndef _UAPI_LINUX_STAT_H #define _UAPI_LINUX_STAT_H +#include #if defined(__KERNEL__) || !defined(__GLIBC__) || (__GLIBC__ < 2) @@ -41,5 +42,135 @@ #endif +/* + * Timestamp structure for the timestamps in struct statx. + * + * tv_sec holds the number of seconds before (negative) or after (positive) + * 00:00:00 1st January 1970 UTC. + * + * tv_nsec holds a number of nanoseconds before (0..-999,999,999 if tv_sec is + * negative) or after (0..999,999,999 if tv_sec is positive) the tv_sec time. + * + * Note that if both tv_sec and tv_nsec are non-zero, then the two values must + * either be both positive or both negative. + * + * __reserved is held in case we need a yet finer resolution. + */ +struct statx_timestamp { + __s64 tv_sec; + __s32 tv_nsec; + __s32 __reserved; +}; + +/* + * Structures for the extended file attribute retrieval system call + * (statx()). + * + * The caller passes a mask of what they're specifically interested in as a + * parameter to statx(). What statx() actually got will be indicated in + * st_mask upon return. + * + * For each bit in the mask argument: + * + * - if the datum is not supported: + * + * - the bit will be cleared, and + * + * - the datum will be set to an appropriate fabricated value if one is + * available (eg. CIFS can take a default uid and gid), otherwise + * + * - the field will be cleared; + * + * - otherwise, if explicitly requested: + * + * - the datum will be synchronised to the server if AT_STATX_FORCE_SYNC is + * set or if the datum is considered out of date, and + * + * - the field will be filled in and the bit will be set; + * + * - otherwise, if not requested, but available in approximate form without any + * effort, it will be filled in anyway, and the bit will be set upon return + * (it might not be up to date, however, and no attempt will be made to + * synchronise the internal state first); + * + * - otherwise the field and the bit will be cleared before returning. + * + * Items in STATX_BASIC_STATS may be marked unavailable on return, but they + * will have values installed for compatibility purposes so that stat() and + * co. can be emulated in userspace. + */ +struct statx { + /* 0x00 */ + __u32 stx_mask; /* What results were written [uncond] */ + __u32 stx_blksize; /* Preferred general I/O size [uncond] */ + __u64 stx_attributes; /* Flags conveying information about the file [uncond] */ + /* 0x10 */ + __u32 stx_nlink; /* Number of hard links */ + __u32 stx_uid; /* User ID of owner */ + __u32 stx_gid; /* Group ID of owner */ + __u16 stx_mode; /* File mode */ + __u16 __spare0[1]; + /* 0x20 */ + __u64 stx_ino; /* Inode number */ + __u64 stx_size; /* File size */ + __u64 stx_blocks; /* Number of 512-byte blocks allocated */ + __u64 __spare1[1]; + /* 0x40 */ + struct statx_timestamp stx_atime; /* Last access time */ + struct statx_timestamp stx_btime; /* File creation time */ + struct statx_timestamp stx_ctime; /* Last attribute change time */ + struct statx_timestamp stx_mtime; /* Last data modification time */ + /* 0x80 */ + __u32 stx_rdev_major; /* Device ID of special file [if bdev/cdev] */ + __u32 stx_rdev_minor; + __u32 stx_dev_major; /* ID of device containing file [uncond] */ + __u32 stx_dev_minor; + /* 0x90 */ + __u64 __spare2[14]; /* Spare space for future expansion */ + /* 0x100 */ +}; + +/* + * Flags to be stx_mask + * + * Query request/result mask for statx() and struct statx::stx_mask. + * + * These bits should be set in the mask argument of statx() to request + * particular items when calling statx(). + */ +#define STATX_TYPE 0x00000001U /* Want/got stx_mode & S_IFMT */ +#define STATX_MODE 0x00000002U /* Want/got stx_mode & ~S_IFMT */ +#define STATX_NLINK 0x00000004U /* Want/got stx_nlink */ +#define STATX_UID 0x00000008U /* Want/got stx_uid */ +#define STATX_GID 0x00000010U /* Want/got stx_gid */ +#define STATX_ATIME 0x00000020U /* Want/got stx_atime */ +#define STATX_MTIME 0x00000040U /* Want/got stx_mtime */ +#define STATX_CTIME 0x00000080U /* Want/got stx_ctime */ +#define STATX_INO 0x00000100U /* Want/got stx_ino */ +#define STATX_SIZE 0x00000200U /* Want/got stx_size */ +#define STATX_BLOCKS 0x00000400U /* Want/got stx_blocks */ +#define STATX_BASIC_STATS 0x000007ffU /* The stuff in the normal stat struct */ +#define STATX_BTIME 0x00000800U /* Want/got stx_btime */ +#define STATX_ALL 0x00000fffU /* All currently supported flags */ + +/* + * Attributes to be found in stx_attributes + * + * These give information about the features or the state of a file that might + * be of use to ordinary userspace programs such as GUIs or ls rather than + * specialised tools. + * + * Note that the flags marked [I] correspond to generic FS_IOC_FLAGS + * semantically. Where possible, the numerical value is picked to correspond + * also. + */ +#define STATX_ATTR_COMPRESSED 0x00000004 /* [I] File is compressed by the fs */ +#define STATX_ATTR_IMMUTABLE 0x00000010 /* [I] File is marked immutable */ +#define STATX_ATTR_APPEND 0x00000020 /* [I] File is append-only */ +#define STATX_ATTR_NODUMP 0x00000040 /* [I] File is not to be dumped */ +#define STATX_ATTR_ENCRYPTED 0x00000800 /* [I] File requires key to decrypt in fs */ + +#define STATX_ATTR_AUTOMOUNT 0x00001000 /* Dir: Automount trigger */ + #endif /* _UAPI_LINUX_STAT_H */ diff --git a/mm/shmem.c b/mm/shmem.c index a26649a6633f..e07728f716b2 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -958,10 +958,10 @@ void shmem_truncate_range(struct inode *inode, loff_t lstart, loff_t lend) } EXPORT_SYMBOL_GPL(shmem_truncate_range); -static int shmem_getattr(struct vfsmount *mnt, struct dentry *dentry, - struct kstat *stat) +static int shmem_getattr(const struct path *path, struct kstat *stat, + u32 request_mask, unsigned int query_flags) { - struct inode *inode = dentry->d_inode; + struct inode *inode = path->dentry->d_inode; struct shmem_inode_info *info = SHMEM_I(inode); if (info->alloced - info->swapped != inode->i_mapping->nrpages) { diff --git a/samples/Kconfig b/samples/Kconfig index b124f62ed6cb..9cb63188d3ef 100644 --- a/samples/Kconfig +++ b/samples/Kconfig @@ -112,4 +112,10 @@ config SAMPLE_VFIO_MDEV_MTTY Build a virtual tty sample driver for use as a VFIO mediated device +config SAMPLE_STATX + bool "Build example extended-stat using code" + depends on BROKEN + help + Build example userspace program to use the new extended-stat syscall. + endif # SAMPLES diff --git a/samples/Makefile b/samples/Makefile index 86a137e451d9..db54e766ddb1 100644 --- a/samples/Makefile +++ b/samples/Makefile @@ -3,4 +3,4 @@ obj-$(CONFIG_SAMPLES) += kobject/ kprobes/ trace_events/ livepatch/ \ hw_breakpoint/ kfifo/ kdb/ hidraw/ rpmsg/ seccomp/ \ configfs/ connector/ v4l/ trace_printk/ blackfin/ \ - vfio-mdev/ + vfio-mdev/ statx/ diff --git a/samples/statx/Makefile b/samples/statx/Makefile new file mode 100644 index 000000000000..1f80a3d8cf45 --- /dev/null +++ b/samples/statx/Makefile @@ -0,0 +1,10 @@ +# kbuild trick to avoid linker error. Can be omitted if a module is built. +obj- := dummy.o + +# List of programs to build +hostprogs-$(CONFIG_SAMPLE_STATX) := test-statx + +# Tell kbuild to always build the programs +always := $(hostprogs-y) + +HOSTCFLAGS_test-statx.o += -I$(objtree)/usr/include diff --git a/samples/statx/test-statx.c b/samples/statx/test-statx.c new file mode 100644 index 000000000000..8571d766331d --- /dev/null +++ b/samples/statx/test-statx.c @@ -0,0 +1,254 @@ +/* Test the statx() system call. + * + * Note that the output of this program is intended to look like the output of + * /bin/stat where possible. + * + * Copyright (C) 2015 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#define _GNU_SOURCE +#define _ATFILE_SOURCE +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define AT_STATX_SYNC_TYPE 0x6000 +#define AT_STATX_SYNC_AS_STAT 0x0000 +#define AT_STATX_FORCE_SYNC 0x2000 +#define AT_STATX_DONT_SYNC 0x4000 + +static __attribute__((unused)) +ssize_t statx(int dfd, const char *filename, unsigned flags, + unsigned int mask, struct statx *buffer) +{ + return syscall(__NR_statx, dfd, filename, flags, mask, buffer); +} + +static void print_time(const char *field, struct statx_timestamp *ts) +{ + struct tm tm; + time_t tim; + char buffer[100]; + int len; + + tim = ts->tv_sec; + if (!localtime_r(&tim, &tm)) { + perror("localtime_r"); + exit(1); + } + len = strftime(buffer, 100, "%F %T", &tm); + if (len == 0) { + perror("strftime"); + exit(1); + } + printf("%s", field); + fwrite(buffer, 1, len, stdout); + printf(".%09u", ts->tv_nsec); + len = strftime(buffer, 100, "%z", &tm); + if (len == 0) { + perror("strftime2"); + exit(1); + } + fwrite(buffer, 1, len, stdout); + printf("\n"); +} + +static void dump_statx(struct statx *stx) +{ + char buffer[256], ft = '?'; + + printf("results=%x\n", stx->stx_mask); + + printf(" "); + if (stx->stx_mask & STATX_SIZE) + printf(" Size: %-15llu", (unsigned long long)stx->stx_size); + if (stx->stx_mask & STATX_BLOCKS) + printf(" Blocks: %-10llu", (unsigned long long)stx->stx_blocks); + printf(" IO Block: %-6llu", (unsigned long long)stx->stx_blksize); + if (stx->stx_mask & STATX_TYPE) { + switch (stx->stx_mode & S_IFMT) { + case S_IFIFO: printf(" FIFO\n"); ft = 'p'; break; + case S_IFCHR: printf(" character special file\n"); ft = 'c'; break; + case S_IFDIR: printf(" directory\n"); ft = 'd'; break; + case S_IFBLK: printf(" block special file\n"); ft = 'b'; break; + case S_IFREG: printf(" regular file\n"); ft = '-'; break; + case S_IFLNK: printf(" symbolic link\n"); ft = 'l'; break; + case S_IFSOCK: printf(" socket\n"); ft = 's'; break; + default: + printf(" unknown type (%o)\n", stx->stx_mode & S_IFMT); + break; + } + } else { + printf(" no type\n"); + } + + sprintf(buffer, "%02x:%02x", stx->stx_dev_major, stx->stx_dev_minor); + printf("Device: %-15s", buffer); + if (stx->stx_mask & STATX_INO) + printf(" Inode: %-11llu", (unsigned long long) stx->stx_ino); + if (stx->stx_mask & STATX_NLINK) + printf(" Links: %-5u", stx->stx_nlink); + if (stx->stx_mask & STATX_TYPE) { + switch (stx->stx_mode & S_IFMT) { + case S_IFBLK: + case S_IFCHR: + printf(" Device type: %u,%u", + stx->stx_rdev_major, stx->stx_rdev_minor); + break; + } + } + printf("\n"); + + if (stx->stx_mask & STATX_MODE) + printf("Access: (%04o/%c%c%c%c%c%c%c%c%c%c) ", + stx->stx_mode & 07777, + ft, + stx->stx_mode & S_IRUSR ? 'r' : '-', + stx->stx_mode & S_IWUSR ? 'w' : '-', + stx->stx_mode & S_IXUSR ? 'x' : '-', + stx->stx_mode & S_IRGRP ? 'r' : '-', + stx->stx_mode & S_IWGRP ? 'w' : '-', + stx->stx_mode & S_IXGRP ? 'x' : '-', + stx->stx_mode & S_IROTH ? 'r' : '-', + stx->stx_mode & S_IWOTH ? 'w' : '-', + stx->stx_mode & S_IXOTH ? 'x' : '-'); + if (stx->stx_mask & STATX_UID) + printf("Uid: %5d ", stx->stx_uid); + if (stx->stx_mask & STATX_GID) + printf("Gid: %5d\n", stx->stx_gid); + + if (stx->stx_mask & STATX_ATIME) + print_time("Access: ", &stx->stx_atime); + if (stx->stx_mask & STATX_MTIME) + print_time("Modify: ", &stx->stx_mtime); + if (stx->stx_mask & STATX_CTIME) + print_time("Change: ", &stx->stx_ctime); + if (stx->stx_mask & STATX_BTIME) + print_time(" Birth: ", &stx->stx_btime); + + if (stx->stx_attributes) { + unsigned char bits; + int loop, byte; + + static char attr_representation[64 + 1] = + /* STATX_ATTR_ flags: */ + "????????" /* 63-56 */ + "????????" /* 55-48 */ + "????????" /* 47-40 */ + "????????" /* 39-32 */ + "????????" /* 31-24 0x00000000-ff000000 */ + "????????" /* 23-16 0x00000000-00ff0000 */ + "???me???" /* 15- 8 0x00000000-0000ff00 */ + "?dai?c??" /* 7- 0 0x00000000-000000ff */ + ; + + printf("Attributes: %016llx (", stx->stx_attributes); + for (byte = 64 - 8; byte >= 0; byte -= 8) { + bits = stx->stx_attributes >> byte; + for (loop = 7; loop >= 0; loop--) { + int bit = byte + loop; + + if (bits & 0x80) + putchar(attr_representation[63 - bit]); + else + putchar('-'); + bits <<= 1; + } + if (byte) + putchar(' '); + } + printf(")\n"); + } +} + +static void dump_hex(unsigned long long *data, int from, int to) +{ + unsigned offset, print_offset = 1, col = 0; + + from /= 8; + to = (to + 7) / 8; + + for (offset = from; offset < to; offset++) { + if (print_offset) { + printf("%04x: ", offset * 8); + print_offset = 0; + } + printf("%016llx", data[offset]); + col++; + if ((col & 3) == 0) { + printf("\n"); + print_offset = 1; + } else { + printf(" "); + } + } + + if (!print_offset) + printf("\n"); +} + +int main(int argc, char **argv) +{ + struct statx stx; + int ret, raw = 0, atflag = AT_SYMLINK_NOFOLLOW; + + unsigned int mask = STATX_ALL; + + for (argv++; *argv; argv++) { + if (strcmp(*argv, "-F") == 0) { + atflag &= ~AT_STATX_SYNC_TYPE; + atflag |= AT_STATX_FORCE_SYNC; + continue; + } + if (strcmp(*argv, "-D") == 0) { + atflag &= ~AT_STATX_SYNC_TYPE; + atflag |= AT_STATX_DONT_SYNC; + continue; + } + if (strcmp(*argv, "-L") == 0) { + atflag &= ~AT_SYMLINK_NOFOLLOW; + continue; + } + if (strcmp(*argv, "-O") == 0) { + mask &= ~STATX_BASIC_STATS; + continue; + } + if (strcmp(*argv, "-A") == 0) { + atflag |= AT_NO_AUTOMOUNT; + continue; + } + if (strcmp(*argv, "-R") == 0) { + raw = 1; + continue; + } + + memset(&stx, 0xbf, sizeof(stx)); + ret = statx(AT_FDCWD, *argv, atflag, mask, &stx); + printf("statx(%s) = %d\n", *argv, ret); + if (ret < 0) { + perror(*argv); + exit(1); + } + + if (raw) + dump_hex((unsigned long long *)&stx, 0, sizeof(stx)); + + dump_statx(&stx); + } + return 0; +} -- cgit v1.2.3 From 3b3e78552d3077ec70d2640e629e07e3ab416a6a Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Sun, 5 Mar 2017 19:16:44 +0100 Subject: x86/reboot/quirks: Add ASUS EeeBook X205TA/W reboot quirk Without the parameter reboot=a, ASUS EeeBook X205TA/W will hang when it should reboot. This adds the appropriate quirk, thus fixing the problem. Signed-off-by: Matjaz Hegedic Link: http://lkml.kernel.org/r/1488737804-20681-1-git-send-email-matjaz.hegedic@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 01c9cda3e1b7..4194d6f9bb29 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -231,6 +231,14 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), }, }, + { /* Handle problems with rebooting on ASUS EeeBook X205TAW */ + .callback = set_acpi_reboot, + .ident = "ASUS EeeBook X205TAW", + .matches = { + DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + }, + }, /* Certec */ { /* Handle problems with rebooting on Certec BPC600 */ -- cgit v1.2.3 From f2853308b6409b97799b0beceacd9da43a82fe43 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Mon, 6 Mar 2017 10:57:48 +0200 Subject: x86/build/x86_64_defconfig: Enable CONFIG_R8169 Very common PCIe ethernet card. Already enabled in i386_defconfig. Signed-off-by: Andy Shevchenko Cc: Peter Zijlstra Cc: Konstantin Khlebnikov Link: http://lkml.kernel.org/r/20170306085748.85957-1-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/configs/x86_64_defconfig | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/configs/x86_64_defconfig b/arch/x86/configs/x86_64_defconfig index 7ef4a099defc..6205d3b81e6d 100644 --- a/arch/x86/configs/x86_64_defconfig +++ b/arch/x86/configs/x86_64_defconfig @@ -176,6 +176,7 @@ CONFIG_E1000E=y CONFIG_SKY2=y CONFIG_FORCEDETH=y CONFIG_8139TOO=y +CONFIG_R8169=y CONFIG_FDDI=y CONFIG_INPUT_POLLDEV=y # CONFIG_INPUT_MOUSEDEV_PSAUX is not set -- cgit v1.2.3 From 587d7e72aedca91cee80c0a56811649c3efab765 Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 2 Mar 2017 12:41:48 -0800 Subject: kvm: nVMX: VMCLEAR should not cause the vCPU to shut down MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit VMCLEAR should silently ignore a failure to clear the launch state of the VMCS referenced by the operand. Signed-off-by: Jim Mattson [Changed "kvm_write_guest(vcpu->kvm" to "kvm_vcpu_write_guest(vcpu".] Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 283aa8601833..3b626d6dc3ac 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -7258,9 +7258,8 @@ static int handle_vmoff(struct kvm_vcpu *vcpu) static int handle_vmclear(struct kvm_vcpu *vcpu) { struct vcpu_vmx *vmx = to_vmx(vcpu); + u32 zero = 0; gpa_t vmptr; - struct vmcs12 *vmcs12; - struct page *page; if (!nested_vmx_check_permission(vcpu)) return 1; @@ -7271,22 +7270,9 @@ static int handle_vmclear(struct kvm_vcpu *vcpu) if (vmptr == vmx->nested.current_vmptr) nested_release_vmcs12(vmx); - page = nested_get_page(vcpu, vmptr); - if (page == NULL) { - /* - * For accurate processor emulation, VMCLEAR beyond available - * physical memory should do nothing at all. However, it is - * possible that a nested vmx bug, not a guest hypervisor bug, - * resulted in this case, so let's shut down before doing any - * more damage: - */ - kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu); - return 1; - } - vmcs12 = kmap(page); - vmcs12->launch_state = 0; - kunmap(page); - nested_release_page(page); + kvm_vcpu_write_guest(vcpu, + vmptr + offsetof(struct vmcs12, launch_state), + &zero, sizeof(zero)); nested_free_vmcs02(vmx, vmptr); -- cgit v1.2.3 From 2f707d97982286b307ef2a9b034e19aabc1abb56 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 6 Mar 2017 04:03:28 -0800 Subject: KVM: nVMX: reset nested_run_pending if the vCPU is going to be reset MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Reported by syzkaller: WARNING: CPU: 1 PID: 27742 at arch/x86/kvm/vmx.c:11029 nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029 CPU: 1 PID: 27742 Comm: a.out Not tainted 4.10.0+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 panic+0x1fb/0x412 kernel/panic.c:179 __warn+0x1c4/0x1e0 kernel/panic.c:540 warn_slowpath_null+0x2c/0x40 kernel/panic.c:583 nested_vmx_vmexit+0x5c35/0x74d0 arch/x86/kvm/vmx.c:11029 vmx_leave_nested arch/x86/kvm/vmx.c:11136 [inline] vmx_set_msr+0x1565/0x1910 arch/x86/kvm/vmx.c:3324 kvm_set_msr+0xd4/0x170 arch/x86/kvm/x86.c:1099 do_set_msr+0x11e/0x190 arch/x86/kvm/x86.c:1128 __msr_io arch/x86/kvm/x86.c:2577 [inline] msr_io+0x24b/0x450 arch/x86/kvm/x86.c:2614 kvm_arch_vcpu_ioctl+0x35b/0x46a0 arch/x86/kvm/x86.c:3497 kvm_vcpu_ioctl+0x232/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2721 vfs_ioctl fs/ioctl.c:43 [inline] do_vfs_ioctl+0x1bf/0x1790 fs/ioctl.c:683 SYSC_ioctl fs/ioctl.c:698 [inline] SyS_ioctl+0x8f/0xc0 fs/ioctl.c:689 entry_SYSCALL_64_fastpath+0x1f/0xc2 The syzkaller folks reported a nested_run_pending warning during userspace clear VMX capability which is exposed to L1 before. The warning gets thrown while doing (*(uint32_t*)0x20aecfe8 = (uint32_t)0x1); (*(uint32_t*)0x20aecfec = (uint32_t)0x0); (*(uint32_t*)0x20aecff0 = (uint32_t)0x3a); (*(uint32_t*)0x20aecff4 = (uint32_t)0x0); (*(uint64_t*)0x20aecff8 = (uint64_t)0x0); r[29] = syscall(__NR_ioctl, r[4], 0x4008ae89ul, 0x20aecfe8ul, 0, 0, 0, 0, 0, 0); i.e. KVM_SET_MSR ioctl with struct kvm_msrs { .nmsrs = 1, .pad = 0, .entries = { {.index = MSR_IA32_FEATURE_CONTROL, .reserved = 0, .data = 0} } } The VMLANCH/VMRESUME emulation should be stopped since the CPU is going to reset here. This patch resets the nested_run_pending since the CPU is going to be reset hence there should be nothing pending. Reported-by: Dmitry Vyukov Suggested-by: Radim Krčmář Cc: Paolo Bonzini Cc: Radim Krčmář Cc: Dmitry Vyukov Cc: David Hildenbrand Signed-off-by: Wanpeng Li Reviewed-by: David Hildenbrand Reviewed-by: Jim Mattson Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 3b626d6dc3ac..ab338581b3ec 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -11107,8 +11107,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, */ static void vmx_leave_nested(struct kvm_vcpu *vcpu) { - if (is_guest_mode(vcpu)) + if (is_guest_mode(vcpu)) { + to_vmx(vcpu)->nested.nested_run_pending = 0; nested_vmx_vmexit(vcpu, -1, 0, 0); + } free_nested(to_vmx(vcpu)); } -- cgit v1.2.3 From 05d8d34611139f8435af90ac54b65eb31e82e388 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Tue, 7 Mar 2017 17:51:49 +0100 Subject: KVM: nVMX: do not warn when MSR bitmap address is not backed MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Before trying to do nested_get_page() in nested_vmx_merge_msr_bitmap(), we have already checked that the MSR bitmap address is valid (4k aligned and within physical limits). SDM doesn't specify what happens if the there is no memory mapped at the valid address, but Intel CPUs treat the situation as if the bitmap was configured to trap all MSRs. KVM already does that by returning false and a correct handling doesn't need the guest-trigerrable warning that was reported by syzkaller: (The warning was originally there to catch some possible bugs in nVMX.) ------------[ cut here ]------------ WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709 nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline] WARNING: CPU: 0 PID: 7832 at arch/x86/kvm/vmx.c:9709 nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640 Kernel panic - not syncing: panic_on_warn set ... CPU: 0 PID: 7832 Comm: syz-executor1 Not tainted 4.10.0+ #229 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace: __dump_stack lib/dump_stack.c:15 [inline] dump_stack+0x2ee/0x3ef lib/dump_stack.c:51 panic+0x1fb/0x412 kernel/panic.c:179 __warn+0x1c4/0x1e0 kernel/panic.c:540 warn_slowpath_null+0x2c/0x40 kernel/panic.c:583 nested_vmx_merge_msr_bitmap arch/x86/kvm/vmx.c:9709 [inline] nested_get_vmcs12_pages+0xfb6/0x15c0 arch/x86/kvm/vmx.c:9640 enter_vmx_non_root_mode arch/x86/kvm/vmx.c:10471 [inline] nested_vmx_run+0x6186/0xaab0 arch/x86/kvm/vmx.c:10561 handle_vmlaunch+0x1a/0x20 arch/x86/kvm/vmx.c:7312 vmx_handle_exit+0xfc0/0x3f00 arch/x86/kvm/vmx.c:8526 vcpu_enter_guest arch/x86/kvm/x86.c:6982 [inline] vcpu_run arch/x86/kvm/x86.c:7044 [inline] kvm_arch_vcpu_ioctl_run+0x1418/0x4840 arch/x86/kvm/x86.c:7205 kvm_vcpu_ioctl+0x673/0x1120 arch/x86/kvm/../../../virt/kvm/kvm_main.c:2570 Reported-by: Dmitry Vyukov Reviewed-by: Jim Mattson [Jim Mattson explained the bare metal behavior: "I believe this behavior would be documented in the chipset data sheet rather than the SDM, since the chipset returns all 1s for an unclaimed read."] Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index ab338581b3ec..98e82ee1e699 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9680,10 +9680,8 @@ static inline bool nested_vmx_merge_msr_bitmap(struct kvm_vcpu *vcpu, return false; page = nested_get_page(vcpu, vmcs12->msr_bitmap); - if (!page) { - WARN_ON(1); + if (!page) return false; - } msr_bitmap_l1 = (unsigned long *)kmap(page); memset(msr_bitmap_l0, 0xff, PAGE_SIZE); -- cgit v1.2.3 From 6fb895692a034393d58679cd8d00c9e229719a5f Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:02 +0300 Subject: x86/cpufeature: Add 5-level paging detection Look for 'la57' in /proc/cpuinfo to see if your machine supports 5-level paging. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- arch/x86/include/asm/cpufeatures.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/cpufeatures.h b/arch/x86/include/asm/cpufeatures.h index 4e7772387c6e..b04bb6dfed7f 100644 --- a/arch/x86/include/asm/cpufeatures.h +++ b/arch/x86/include/asm/cpufeatures.h @@ -289,7 +289,8 @@ #define X86_FEATURE_PKU (16*32+ 3) /* Protection Keys for Userspace */ #define X86_FEATURE_OSPKE (16*32+ 4) /* OS Protection Keys Enable */ #define X86_FEATURE_AVX512_VPOPCNTDQ (16*32+14) /* POPCNT for vectors of DW/QW */ -#define X86_FEATURE_RDPID (16*32+ 22) /* RDPID instruction */ +#define X86_FEATURE_LA57 (16*32+16) /* 5-level page tables */ +#define X86_FEATURE_RDPID (16*32+22) /* RDPID instruction */ /* AMD-defined CPU features, CPUID level 0x80000007 (ebx), word 17 */ #define X86_FEATURE_OVERFLOW_RECOV (17*32+0) /* MCA overflow recovery support */ -- cgit v1.2.3 From 9849a5697d3defb2087cb6b9be5573a142697889 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 9 Mar 2017 17:24:05 +0300 Subject: arch, mm: convert all architectures to use 5level-fixup.h If an architecture uses 4level-fixup.h we don't need to do anything as it includes 5level-fixup.h. If an architecture uses pgtable-nop*d.h, define __ARCH_USE_5LEVEL_HACK before inclusion of the header. It makes asm-generic code to use 5level-fixup.h. If an architecture has 4-level paging or folds levels on its own, include 5level-fixup.h directly. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- arch/arc/include/asm/hugepage.h | 1 + arch/arc/include/asm/pgtable.h | 1 + arch/arm/include/asm/pgtable.h | 1 + arch/arm64/include/asm/pgtable-types.h | 4 ++++ arch/avr32/include/asm/pgtable-2level.h | 1 + arch/cris/include/asm/pgtable.h | 1 + arch/frv/include/asm/pgtable.h | 1 + arch/h8300/include/asm/pgtable.h | 1 + arch/hexagon/include/asm/pgtable.h | 1 + arch/ia64/include/asm/pgtable.h | 2 ++ arch/metag/include/asm/pgtable.h | 1 + arch/microblaze/include/asm/page.h | 3 ++- arch/mips/include/asm/pgtable-32.h | 1 + arch/mips/include/asm/pgtable-64.h | 1 + arch/mn10300/include/asm/page.h | 1 + arch/nios2/include/asm/pgtable.h | 1 + arch/openrisc/include/asm/pgtable.h | 1 + arch/powerpc/include/asm/book3s/32/pgtable.h | 1 + arch/powerpc/include/asm/book3s/64/pgtable.h | 3 +++ arch/powerpc/include/asm/nohash/32/pgtable.h | 1 + arch/powerpc/include/asm/nohash/64/pgtable-4k.h | 3 +++ arch/powerpc/include/asm/nohash/64/pgtable-64k.h | 1 + arch/s390/include/asm/pgtable.h | 1 + arch/score/include/asm/pgtable.h | 1 + arch/sh/include/asm/pgtable-2level.h | 1 + arch/sh/include/asm/pgtable-3level.h | 1 + arch/sparc/include/asm/pgtable_64.h | 1 + arch/tile/include/asm/pgtable_32.h | 1 + arch/tile/include/asm/pgtable_64.h | 1 + arch/um/include/asm/pgtable-2level.h | 1 + arch/um/include/asm/pgtable-3level.h | 1 + arch/unicore32/include/asm/pgtable.h | 1 + arch/x86/include/asm/pgtable_types.h | 4 ++++ arch/xtensa/include/asm/pgtable.h | 1 + 34 files changed, 46 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/arc/include/asm/hugepage.h b/arch/arc/include/asm/hugepage.h index 317ff773e1ca..b18fcb606908 100644 --- a/arch/arc/include/asm/hugepage.h +++ b/arch/arc/include/asm/hugepage.h @@ -11,6 +11,7 @@ #define _ASM_ARC_HUGEPAGE_H #include +#define __ARCH_USE_5LEVEL_HACK #include static inline pte_t pmd_pte(pmd_t pmd) diff --git a/arch/arc/include/asm/pgtable.h b/arch/arc/include/asm/pgtable.h index e94ca72b974e..ee22d40afef4 100644 --- a/arch/arc/include/asm/pgtable.h +++ b/arch/arc/include/asm/pgtable.h @@ -37,6 +37,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/arm/include/asm/pgtable.h b/arch/arm/include/asm/pgtable.h index a8d656d9aec7..1c462381c225 100644 --- a/arch/arm/include/asm/pgtable.h +++ b/arch/arm/include/asm/pgtable.h @@ -20,6 +20,7 @@ #else +#define __ARCH_USE_5LEVEL_HACK #include #include #include diff --git a/arch/arm64/include/asm/pgtable-types.h b/arch/arm64/include/asm/pgtable-types.h index 69b2fd41503c..345a072b5856 100644 --- a/arch/arm64/include/asm/pgtable-types.h +++ b/arch/arm64/include/asm/pgtable-types.h @@ -55,9 +55,13 @@ typedef struct { pteval_t pgprot; } pgprot_t; #define __pgprot(x) ((pgprot_t) { (x) } ) #if CONFIG_PGTABLE_LEVELS == 2 +#define __ARCH_USE_5LEVEL_HACK #include #elif CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include +#elif CONFIG_PGTABLE_LEVELS == 4 +#include #endif #endif /* __ASM_PGTABLE_TYPES_H */ diff --git a/arch/avr32/include/asm/pgtable-2level.h b/arch/avr32/include/asm/pgtable-2level.h index 425dd567b5b9..d5b1c63993ec 100644 --- a/arch/avr32/include/asm/pgtable-2level.h +++ b/arch/avr32/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __ASM_AVR32_PGTABLE_2LEVEL_H #define __ASM_AVR32_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/cris/include/asm/pgtable.h b/arch/cris/include/asm/pgtable.h index 2a3210ba4c72..fa3a73004cc5 100644 --- a/arch/cris/include/asm/pgtable.h +++ b/arch/cris/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _CRIS_PGTABLE_H #include +#define __ARCH_USE_5LEVEL_HACK #include #ifndef __ASSEMBLY__ diff --git a/arch/frv/include/asm/pgtable.h b/arch/frv/include/asm/pgtable.h index a0513d463a1f..ab6e7e961b54 100644 --- a/arch/frv/include/asm/pgtable.h +++ b/arch/frv/include/asm/pgtable.h @@ -16,6 +16,7 @@ #ifndef _ASM_PGTABLE_H #define _ASM_PGTABLE_H +#include #include #include #include diff --git a/arch/h8300/include/asm/pgtable.h b/arch/h8300/include/asm/pgtable.h index 8341db67821d..7d265d28ba5e 100644 --- a/arch/h8300/include/asm/pgtable.h +++ b/arch/h8300/include/asm/pgtable.h @@ -1,5 +1,6 @@ #ifndef _H8300_PGTABLE_H #define _H8300_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #include #define pgtable_cache_init() do { } while (0) diff --git a/arch/hexagon/include/asm/pgtable.h b/arch/hexagon/include/asm/pgtable.h index 49eab8136ec3..24a9177fb897 100644 --- a/arch/hexagon/include/asm/pgtable.h +++ b/arch/hexagon/include/asm/pgtable.h @@ -26,6 +26,7 @@ */ #include #include +#define __ARCH_USE_5LEVEL_HACK #include /* A handy thing to have if one has the RAM. Declared in head.S */ diff --git a/arch/ia64/include/asm/pgtable.h b/arch/ia64/include/asm/pgtable.h index 384794e665fc..6cc22c8d8923 100644 --- a/arch/ia64/include/asm/pgtable.h +++ b/arch/ia64/include/asm/pgtable.h @@ -587,8 +587,10 @@ extern struct page *zero_page_memmap_ptr; #if CONFIG_PGTABLE_LEVELS == 3 +#define __ARCH_USE_5LEVEL_HACK #include #endif +#include #include #endif /* _ASM_IA64_PGTABLE_H */ diff --git a/arch/metag/include/asm/pgtable.h b/arch/metag/include/asm/pgtable.h index ffa3a3a2ecad..0c151e5af079 100644 --- a/arch/metag/include/asm/pgtable.h +++ b/arch/metag/include/asm/pgtable.h @@ -6,6 +6,7 @@ #define _METAG_PGTABLE_H #include +#define __ARCH_USE_5LEVEL_HACK #include /* Invalid regions on Meta: 0x00000000-0x001FFFFF and 0xFFFF0000-0xFFFFFFFF */ diff --git a/arch/microblaze/include/asm/page.h b/arch/microblaze/include/asm/page.h index fd850879854d..d506bb0893f9 100644 --- a/arch/microblaze/include/asm/page.h +++ b/arch/microblaze/include/asm/page.h @@ -95,7 +95,8 @@ typedef struct { unsigned long pgd; } pgd_t; # else /* CONFIG_MMU */ typedef struct { unsigned long ste[64]; } pmd_t; typedef struct { pmd_t pue[1]; } pud_t; -typedef struct { pud_t pge[1]; } pgd_t; +typedef struct { pud_t p4e[1]; } p4d_t; +typedef struct { p4d_t pge[1]; } pgd_t; # endif /* CONFIG_MMU */ # define pte_val(x) ((x).pte) diff --git a/arch/mips/include/asm/pgtable-32.h b/arch/mips/include/asm/pgtable-32.h index d21f3da7bdb6..6f94bed571c4 100644 --- a/arch/mips/include/asm/pgtable-32.h +++ b/arch/mips/include/asm/pgtable-32.h @@ -16,6 +16,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #include extern int temp_tlb_entry; diff --git a/arch/mips/include/asm/pgtable-64.h b/arch/mips/include/asm/pgtable-64.h index 514cbc0a6a67..130a2a6c1531 100644 --- a/arch/mips/include/asm/pgtable-64.h +++ b/arch/mips/include/asm/pgtable-64.h @@ -17,6 +17,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #if defined(CONFIG_PAGE_SIZE_64KB) && !defined(CONFIG_MIPS_VA_BITS_48) #include #else diff --git a/arch/mn10300/include/asm/page.h b/arch/mn10300/include/asm/page.h index 3810a6f740fd..dfe730a5ede0 100644 --- a/arch/mn10300/include/asm/page.h +++ b/arch/mn10300/include/asm/page.h @@ -57,6 +57,7 @@ typedef struct page *pgtable_t; #define __pgd(x) ((pgd_t) { (x) }) #define __pgprot(x) ((pgprot_t) { (x) }) +#define __ARCH_USE_5LEVEL_HACK #include #endif /* !__ASSEMBLY__ */ diff --git a/arch/nios2/include/asm/pgtable.h b/arch/nios2/include/asm/pgtable.h index 298393c3cb42..db4f7d179220 100644 --- a/arch/nios2/include/asm/pgtable.h +++ b/arch/nios2/include/asm/pgtable.h @@ -22,6 +22,7 @@ #include #include +#define __ARCH_USE_5LEVEL_HACK #include #define FIRST_USER_ADDRESS 0UL diff --git a/arch/openrisc/include/asm/pgtable.h b/arch/openrisc/include/asm/pgtable.h index 3567aa7be555..ff97374ca069 100644 --- a/arch/openrisc/include/asm/pgtable.h +++ b/arch/openrisc/include/asm/pgtable.h @@ -25,6 +25,7 @@ #ifndef __ASM_OPENRISC_PGTABLE_H #define __ASM_OPENRISC_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/book3s/32/pgtable.h b/arch/powerpc/include/asm/book3s/32/pgtable.h index 012223638815..26ed228d4dc6 100644 --- a/arch/powerpc/include/asm/book3s/32/pgtable.h +++ b/arch/powerpc/include/asm/book3s/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_BOOK3S_32_PGTABLE_H #define _ASM_POWERPC_BOOK3S_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/powerpc/include/asm/book3s/64/pgtable.h b/arch/powerpc/include/asm/book3s/64/pgtable.h index 1eeeb72c7015..13c39b6d5d64 100644 --- a/arch/powerpc/include/asm/book3s/64/pgtable.h +++ b/arch/powerpc/include/asm/book3s/64/pgtable.h @@ -1,9 +1,12 @@ #ifndef _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ #define _ASM_POWERPC_BOOK3S_64_PGTABLE_H_ +#include + #ifndef __ASSEMBLY__ #include #endif + /* * Common bits between hash and Radix page table */ diff --git a/arch/powerpc/include/asm/nohash/32/pgtable.h b/arch/powerpc/include/asm/nohash/32/pgtable.h index ba9921bf202e..5134ade2e850 100644 --- a/arch/powerpc/include/asm/nohash/32/pgtable.h +++ b/arch/powerpc/include/asm/nohash/32/pgtable.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_32_PGTABLE_H #define _ASM_POWERPC_NOHASH_32_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #ifndef __ASSEMBLY__ diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h index d0db98793dd8..9f4de0a1035e 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-4k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-4k.h @@ -1,5 +1,8 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_4K_H + +#include + /* * Entries per page directory level. The PTE level must use a 64b record * for each page table entry. The PMD and PGD level use a 32b record for diff --git a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h index 55b28ef3409a..1facb584dd29 100644 --- a/arch/powerpc/include/asm/nohash/64/pgtable-64k.h +++ b/arch/powerpc/include/asm/nohash/64/pgtable-64k.h @@ -1,6 +1,7 @@ #ifndef _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H #define _ASM_POWERPC_NOHASH_64_PGTABLE_64K_H +#define __ARCH_USE_5LEVEL_HACK #include diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 7ed1972b1920..93e37b12e882 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -24,6 +24,7 @@ * the S390 page table tree. */ #ifndef __ASSEMBLY__ +#include #include #include #include diff --git a/arch/score/include/asm/pgtable.h b/arch/score/include/asm/pgtable.h index 0553e5cd5985..46ff8fd678a7 100644 --- a/arch/score/include/asm/pgtable.h +++ b/arch/score/include/asm/pgtable.h @@ -2,6 +2,7 @@ #define _ASM_SCORE_PGTABLE_H #include +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/sh/include/asm/pgtable-2level.h b/arch/sh/include/asm/pgtable-2level.h index 19bd89db17e7..f75cf4387257 100644 --- a/arch/sh/include/asm/pgtable-2level.h +++ b/arch/sh/include/asm/pgtable-2level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_2LEVEL_H #define __ASM_SH_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/sh/include/asm/pgtable-3level.h b/arch/sh/include/asm/pgtable-3level.h index 249a985d9648..9b1e776eca31 100644 --- a/arch/sh/include/asm/pgtable-3level.h +++ b/arch/sh/include/asm/pgtable-3level.h @@ -1,6 +1,7 @@ #ifndef __ASM_SH_PGTABLE_3LEVEL_H #define __ASM_SH_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/sparc/include/asm/pgtable_64.h b/arch/sparc/include/asm/pgtable_64.h index 56e49c8f770d..8a598528ec1f 100644 --- a/arch/sparc/include/asm/pgtable_64.h +++ b/arch/sparc/include/asm/pgtable_64.h @@ -12,6 +12,7 @@ * the SpitFire page tables. */ +#include #include #include #include diff --git a/arch/tile/include/asm/pgtable_32.h b/arch/tile/include/asm/pgtable_32.h index d26a42279036..5f8c615cb5e9 100644 --- a/arch/tile/include/asm/pgtable_32.h +++ b/arch/tile/include/asm/pgtable_32.h @@ -74,6 +74,7 @@ extern unsigned long VMALLOC_RESERVE /* = CONFIG_VMALLOC_RESERVE */; #define MAXMEM (_VMALLOC_START - PAGE_OFFSET) /* We have no pmd or pud since we are strictly a two-level page table */ +#define __ARCH_USE_5LEVEL_HACK #include static inline int pud_huge_page(pud_t pud) { return 0; } diff --git a/arch/tile/include/asm/pgtable_64.h b/arch/tile/include/asm/pgtable_64.h index e96cec52f6d8..96fe58b45118 100644 --- a/arch/tile/include/asm/pgtable_64.h +++ b/arch/tile/include/asm/pgtable_64.h @@ -59,6 +59,7 @@ #ifndef __ASSEMBLY__ /* We have no pud since we are a three-level page table. */ +#define __ARCH_USE_5LEVEL_HACK #include /* diff --git a/arch/um/include/asm/pgtable-2level.h b/arch/um/include/asm/pgtable-2level.h index cfbe59752469..179c0ea87a0c 100644 --- a/arch/um/include/asm/pgtable-2level.h +++ b/arch/um/include/asm/pgtable-2level.h @@ -8,6 +8,7 @@ #ifndef __UM_PGTABLE_2LEVEL_H #define __UM_PGTABLE_2LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/um/include/asm/pgtable-3level.h b/arch/um/include/asm/pgtable-3level.h index bae8523a162f..c4d876dfb9ac 100644 --- a/arch/um/include/asm/pgtable-3level.h +++ b/arch/um/include/asm/pgtable-3level.h @@ -7,6 +7,7 @@ #ifndef __UM_PGTABLE_3LEVEL_H #define __UM_PGTABLE_3LEVEL_H +#define __ARCH_USE_5LEVEL_HACK #include /* PGDIR_SHIFT determines what a third-level page table entry can map */ diff --git a/arch/unicore32/include/asm/pgtable.h b/arch/unicore32/include/asm/pgtable.h index 818d0f5598e3..a4f2bef37e70 100644 --- a/arch/unicore32/include/asm/pgtable.h +++ b/arch/unicore32/include/asm/pgtable.h @@ -12,6 +12,7 @@ #ifndef __UNICORE_PGTABLE_H__ #define __UNICORE_PGTABLE_H__ +#define __ARCH_USE_5LEVEL_HACK #include #include diff --git a/arch/x86/include/asm/pgtable_types.h b/arch/x86/include/asm/pgtable_types.h index 8b4de22d6429..62484333673d 100644 --- a/arch/x86/include/asm/pgtable_types.h +++ b/arch/x86/include/asm/pgtable_types.h @@ -273,6 +273,8 @@ static inline pgdval_t pgd_flags(pgd_t pgd) } #if CONFIG_PGTABLE_LEVELS > 3 +#include + typedef struct { pudval_t pud; } pud_t; static inline pud_t native_make_pud(pmdval_t val) @@ -285,6 +287,7 @@ static inline pudval_t native_pud_val(pud_t pud) return pud.pud; } #else +#define __ARCH_USE_5LEVEL_HACK #include static inline pudval_t native_pud_val(pud_t pud) @@ -306,6 +309,7 @@ static inline pmdval_t native_pmd_val(pmd_t pmd) return pmd.pmd; } #else +#define __ARCH_USE_5LEVEL_HACK #include static inline pmdval_t native_pmd_val(pmd_t pmd) diff --git a/arch/xtensa/include/asm/pgtable.h b/arch/xtensa/include/asm/pgtable.h index 8aa0e0d9cbb2..30dd5b2e4ad5 100644 --- a/arch/xtensa/include/asm/pgtable.h +++ b/arch/xtensa/include/asm/pgtable.h @@ -11,6 +11,7 @@ #ifndef _XTENSA_PGTABLE_H #define _XTENSA_PGTABLE_H +#define __ARCH_USE_5LEVEL_HACK #include #include #include -- cgit v1.2.3 From 8a1115ff6b6d90cf1066ec3a0c4e51276553eebe Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 9 Mar 2017 16:16:31 -0800 Subject: scripts/spelling.txt: add "disble(d)" pattern and fix typo instances Fix typos and add the following to the scripts/spelling.txt: disble||disable disbled||disabled I kept the TSL2563_INT_DISBLED in /drivers/iio/light/tsl2563.c untouched. The macro is not referenced at all, but this commit is touching only comment blocks just in case. Link: http://lkml.kernel.org/r/1481573103-11329-20-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- Documentation/dev-tools/kcov.rst | 2 +- arch/cris/arch-v32/drivers/cryptocop.c | 2 +- arch/x86/kernel/ftrace.c | 2 +- drivers/crypto/ux500/cryp/cryp.c | 2 +- drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c | 2 +- drivers/hv/channel.c | 2 +- drivers/isdn/hisax/st5481_b.c | 2 +- drivers/mtd/spi-nor/spi-nor.c | 2 +- drivers/net/ethernet/qlogic/qlge/qlge.h | 2 +- drivers/scsi/aic7xxx/aic79xx_core.c | 2 +- drivers/usb/gadget/legacy/inode.c | 3 +-- drivers/usb/host/xhci.c | 4 ++-- include/linux/regulator/machine.h | 2 +- kernel/cgroup/cgroup.c | 2 +- kernel/events/core.c | 2 +- scripts/spelling.txt | 2 ++ sound/soc/amd/acp-pcm-dma.c | 2 +- 17 files changed, 19 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/Documentation/dev-tools/kcov.rst b/Documentation/dev-tools/kcov.rst index 2c41b713841f..44886c91e112 100644 --- a/Documentation/dev-tools/kcov.rst +++ b/Documentation/dev-tools/kcov.rst @@ -10,7 +10,7 @@ Note that kcov does not aim to collect as much coverage as possible. It aims to collect more or less stable coverage that is function of syscall inputs. To achieve this goal it does not collect coverage in soft/hard interrupts and instrumentation of some inherently non-deterministic parts of kernel is -disbled (e.g. scheduler, locking). +disabled (e.g. scheduler, locking). Usage ----- diff --git a/arch/cris/arch-v32/drivers/cryptocop.c b/arch/cris/arch-v32/drivers/cryptocop.c index ae6903d7fdbe..14970f11bbf2 100644 --- a/arch/cris/arch-v32/drivers/cryptocop.c +++ b/arch/cris/arch-v32/drivers/cryptocop.c @@ -2086,7 +2086,7 @@ static void cryptocop_job_queue_close(void) dma_in_cfg.en = regk_dma_no; REG_WR(dma, IN_DMA_INST, rw_cfg, dma_in_cfg); - /* Disble the cryptocop. */ + /* Disable the cryptocop. */ rw_cfg = REG_RD(strcop, regi_strcop, rw_cfg); rw_cfg.en = 0; REG_WR(strcop, regi_strcop, rw_cfg, rw_cfg); diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8639bb2ae058..8f3d9cf26ff9 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -535,7 +535,7 @@ static void run_sync(void) { int enable_irqs = irqs_disabled(); - /* We may be called with interrupts disbled (on bootup). */ + /* We may be called with interrupts disabled (on bootup). */ if (enable_irqs) local_irq_enable(); on_each_cpu(do_sync_core, NULL, 1); diff --git a/drivers/crypto/ux500/cryp/cryp.c b/drivers/crypto/ux500/cryp/cryp.c index 43a0c8a26ab0..00a16ab601cb 100644 --- a/drivers/crypto/ux500/cryp/cryp.c +++ b/drivers/crypto/ux500/cryp/cryp.c @@ -82,7 +82,7 @@ void cryp_activity(struct cryp_device_data *device_data, void cryp_flush_inoutfifo(struct cryp_device_data *device_data) { /* - * We always need to disble the hardware before trying to flush the + * We always need to disable the hardware before trying to flush the * FIFO. This is something that isn't written in the design * specification, but we have been informed by the hardware designers * that this must be done. diff --git a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c index 31375bdde6f1..011800f621c6 100644 --- a/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c +++ b/drivers/gpu/drm/amd/amdgpu/sdma_v3_0.c @@ -788,7 +788,7 @@ static int sdma_v3_0_start(struct amdgpu_device *adev) } } - /* disble sdma engine before programing it */ + /* disable sdma engine before programing it */ sdma_v3_0_ctx_switch_enable(adev, false); sdma_v3_0_enable(adev, false); diff --git a/drivers/hv/channel.c b/drivers/hv/channel.c index 81a80c82f1bd..bd0d1988feb2 100644 --- a/drivers/hv/channel.c +++ b/drivers/hv/channel.c @@ -543,7 +543,7 @@ static int vmbus_close_internal(struct vmbus_channel *channel) /* * In case a device driver's probe() fails (e.g., * util_probe() -> vmbus_open() returns -ENOMEM) and the device is - * rescinded later (e.g., we dynamically disble an Integrated Service + * rescinded later (e.g., we dynamically disable an Integrated Service * in Hyper-V Manager), the driver's remove() invokes vmbus_close(): * here we should skip most of the below cleanup work. */ diff --git a/drivers/isdn/hisax/st5481_b.c b/drivers/isdn/hisax/st5481_b.c index 409849165838..f64a36007800 100644 --- a/drivers/isdn/hisax/st5481_b.c +++ b/drivers/isdn/hisax/st5481_b.c @@ -239,7 +239,7 @@ static void st5481B_mode(struct st5481_bcs *bcs, int mode) } } } else { - // Disble B channel interrupts + // Disable B channel interrupts st5481_usb_device_ctrl_msg(adapter, FFMSK_B1+(bcs->channel * 2), 0, NULL, NULL); // Disable B channel FIFOs diff --git a/drivers/mtd/spi-nor/spi-nor.c b/drivers/mtd/spi-nor/spi-nor.c index 1ae872bfc3ba..747645c74134 100644 --- a/drivers/mtd/spi-nor/spi-nor.c +++ b/drivers/mtd/spi-nor/spi-nor.c @@ -186,7 +186,7 @@ static inline int write_enable(struct spi_nor *nor) } /* - * Send write disble instruction to the chip. + * Send write disable instruction to the chip. */ static inline int write_disable(struct spi_nor *nor) { diff --git a/drivers/net/ethernet/qlogic/qlge/qlge.h b/drivers/net/ethernet/qlogic/qlge/qlge.h index 6d31f92ef2b6..90b3b46f85cc 100644 --- a/drivers/net/ethernet/qlogic/qlge/qlge.h +++ b/drivers/net/ethernet/qlogic/qlge/qlge.h @@ -1163,7 +1163,7 @@ struct ib_mac_iocb_rsp { u8 opcode; /* 0x20 */ u8 flags1; #define IB_MAC_IOCB_RSP_OI 0x01 /* Overide intr delay */ -#define IB_MAC_IOCB_RSP_I 0x02 /* Disble Intr Generation */ +#define IB_MAC_IOCB_RSP_I 0x02 /* Disable Intr Generation */ #define IB_MAC_CSUM_ERR_MASK 0x1c /* A mask to use for csum errs */ #define IB_MAC_IOCB_RSP_TE 0x04 /* Checksum error */ #define IB_MAC_IOCB_RSP_NU 0x08 /* No checksum rcvd */ diff --git a/drivers/scsi/aic7xxx/aic79xx_core.c b/drivers/scsi/aic7xxx/aic79xx_core.c index 109e2c99e6c1..95d8f25cbcca 100644 --- a/drivers/scsi/aic7xxx/aic79xx_core.c +++ b/drivers/scsi/aic7xxx/aic79xx_core.c @@ -6278,7 +6278,7 @@ ahd_reset(struct ahd_softc *ahd, int reinit) * does not disable its parity logic prior to * the start of the reset. This may cause a * parity error to be detected and thus a - * spurious SERR or PERR assertion. Disble + * spurious SERR or PERR assertion. Disable * PERR and SERR responses during the CHIPRST. */ mod_cmd = cmd & ~(PCIM_CMD_PERRESPEN|PCIM_CMD_SERRESPEN); diff --git a/drivers/usb/gadget/legacy/inode.c b/drivers/usb/gadget/legacy/inode.c index a2615d64d07c..79a2d8fba6b6 100644 --- a/drivers/usb/gadget/legacy/inode.c +++ b/drivers/usb/gadget/legacy/inode.c @@ -84,8 +84,7 @@ static int ep_open(struct inode *, struct file *); /* /dev/gadget/$CHIP represents ep0 and the whole device */ enum ep0_state { - /* DISBLED is the initial state. - */ + /* DISABLED is the initial state. */ STATE_DEV_DISABLED = 0, /* Only one open() of /dev/gadget/$CHIP; only one file tracks diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 6d6c46000e56..50aee8b7718b 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -868,7 +868,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) spin_lock_irqsave(&xhci->lock, flags); - /* disble usb3 ports Wake bits*/ + /* disable usb3 ports Wake bits */ port_index = xhci->num_usb3_ports; port_array = xhci->usb3_ports; while (port_index--) { @@ -879,7 +879,7 @@ static void xhci_disable_port_wake_on_bits(struct xhci_hcd *xhci) writel(t2, port_array[port_index]); } - /* disble usb2 ports Wake bits*/ + /* disable usb2 ports Wake bits */ port_index = xhci->num_usb2_ports; port_array = xhci->usb2_ports; while (port_index--) { diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index ad3e5158e586..c9f795e9a2ee 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -65,7 +65,7 @@ struct regulator_state { int uV; /* suspend voltage */ unsigned int mode; /* suspend regulator operating mode */ int enabled; /* is regulator enabled in this suspend state */ - int disabled; /* is the regulator disbled in this suspend state */ + int disabled; /* is the regulator disabled in this suspend state */ }; /** diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 0125589c7428..48851327a15e 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -2669,7 +2669,7 @@ static bool css_visible(struct cgroup_subsys_state *css) * * Returns 0 on success, -errno on failure. On failure, csses which have * been processed already aren't cleaned up. The caller is responsible for - * cleaning up with cgroup_apply_control_disble(). + * cleaning up with cgroup_apply_control_disable(). */ static int cgroup_apply_control_enable(struct cgroup *cgrp) { diff --git a/kernel/events/core.c b/kernel/events/core.c index 6f41548f2e32..a17ed56c8ce1 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -998,7 +998,7 @@ list_update_cgroup_event(struct perf_event *event, */ #define PERF_CPU_HRTIMER (1000 / HZ) /* - * function must be called with interrupts disbled + * function must be called with interrupts disabled */ static enum hrtimer_restart perf_mux_hrtimer_handler(struct hrtimer *hr) { diff --git a/scripts/spelling.txt b/scripts/spelling.txt index 0458b037c8a1..6dae4df472f6 100644 --- a/scripts/spelling.txt +++ b/scripts/spelling.txt @@ -372,6 +372,8 @@ disassocation||disassociation disapear||disappear disapeared||disappeared disappared||disappeared +disble||disable +disbled||disabled disconnet||disconnect discontinous||discontinuous dispertion||dispersion diff --git a/sound/soc/amd/acp-pcm-dma.c b/sound/soc/amd/acp-pcm-dma.c index ec1067a679da..08b1399d1da2 100644 --- a/sound/soc/amd/acp-pcm-dma.c +++ b/sound/soc/amd/acp-pcm-dma.c @@ -89,7 +89,7 @@ static void acp_reg_write(u32 val, void __iomem *acp_mmio, u32 reg) writel(val, acp_mmio + (reg * 4)); } -/* Configure a given dma channel parameters - enable/disble, +/* Configure a given dma channel parameters - enable/disable, * number of descriptors, priority */ static void config_acp_dma_channel(void __iomem *acp_mmio, u8 ch_num, -- cgit v1.2.3 From ef947b2529f918d9606533eb9c32b187ed6a5ede Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Mar 2017 16:16:42 -0800 Subject: x86, mm: fix gup_pte_range() vs DAX mappings gup_pte_range() fails to check pte_allows_gup() before translating a DAX pte entry, pte_devmap(), to a page. This allows writes to read-only mappings, and bypasses the DAX cacheline dirty tracking due to missed 'mkwrite' faults. The gup_huge_pmd() path and the gup_huge_pud() path correctly check pte_allows_gup() before checking for _devmap() entries. Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Link: http://lkml.kernel.org/r/148804251312.36605.12665024794196605053.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ross Zwisler Signed-off-by: Dan Williams Reported-by: Dave Hansen Reported-by: Ross Zwisler Cc: Xiong Zhou Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/gup.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 99c7805a9693..9d32ee608807 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -120,6 +120,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, return 0; } + if (!pte_allows_gup(pte_val(pte), write)) { + pte_unmap(ptep); + return 0; + } + if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { @@ -127,8 +132,7 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, pte_unmap(ptep); return 0; } - } else if (!pte_allows_gup(pte_val(pte), write) || - pte_special(pte)) { + } else if (pte_special(pte)) { pte_unmap(ptep); return 0; } -- cgit v1.2.3 From b2e593e271b0760ebc8999e5f9dd068ae2b9d30a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 9 Mar 2017 16:16:45 -0800 Subject: x86, mm: unify exit paths in gup_pte_range() All exit paths from gup_pte_range() require pte_unmap() of the original pte page before returning. Refactor the code to have a single exit point to do the unmap. This mirrors the flow of the generic gup_pte_range() in mm/gup.c. Link: http://lkml.kernel.org/r/148804251828.36605.14910389618497006945.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Dan Williams Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Dave Hansen Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/mm/gup.c | 39 ++++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 19 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 9d32ee608807..1f3b6ef105cd 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -106,36 +106,35 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, unsigned long end, int write, struct page **pages, int *nr) { struct dev_pagemap *pgmap = NULL; - int nr_start = *nr; - pte_t *ptep; + int nr_start = *nr, ret = 0; + pte_t *ptep, *ptem; - ptep = pte_offset_map(&pmd, addr); + /* + * Keep the original mapped PTE value (ptem) around since we + * might increment ptep off the end of the page when finishing + * our loop iteration. + */ + ptem = ptep = pte_offset_map(&pmd, addr); do { pte_t pte = gup_get_pte(ptep); struct page *page; /* Similar to the PMD case, NUMA hinting must take slow path */ - if (pte_protnone(pte)) { - pte_unmap(ptep); - return 0; - } + if (pte_protnone(pte)) + break; - if (!pte_allows_gup(pte_val(pte), write)) { - pte_unmap(ptep); - return 0; - } + if (!pte_allows_gup(pte_val(pte), write)) + break; if (pte_devmap(pte)) { pgmap = get_dev_pagemap(pte_pfn(pte), pgmap); if (unlikely(!pgmap)) { undo_dev_pagemap(nr, nr_start, pages); - pte_unmap(ptep); - return 0; + break; } - } else if (pte_special(pte)) { - pte_unmap(ptep); - return 0; - } + } else if (pte_special(pte)) + break; + VM_BUG_ON(!pfn_valid(pte_pfn(pte))); page = pte_page(pte); get_page(page); @@ -145,9 +144,11 @@ static noinline int gup_pte_range(pmd_t pmd, unsigned long addr, (*nr)++; } while (ptep++, addr += PAGE_SIZE, addr != end); - pte_unmap(ptep - 1); + if (addr == end) + ret = 1; + pte_unmap(ptem); - return 1; + return ret; } static inline void get_head_page_multiple(struct page *page, int nr) -- cgit v1.2.3 From bba8376aea1dcbbe22bbda118c52abee317c7609 Mon Sep 17 00:00:00 2001 From: Matjaz Hegedic Date: Thu, 9 Mar 2017 14:00:17 +0100 Subject: x86/reboot/quirks: Fix typo in ASUS EeeBook X205TA reboot quirk The reboot quirk for ASUS EeeBook X205TA contains a typo in DMI_PRODUCT_NAME, improperly referring to X205TAW instead of X205TA, which prevents the quirk from being triggered. The model X205TAW already has a reboot quirk of its own. This fix simply removes the inappropriate final letter W. Fixes: 90b28ded88dd ("x86/reboot/quirks: Add ASUS EeeBook X205TA reboot quirk") Signed-off-by: Matjaz Hegedic Link: http://lkml.kernel.org/r/1489064417-7445-1-git-send-email-matjaz.hegedic@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/reboot.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/reboot.c b/arch/x86/kernel/reboot.c index 4194d6f9bb29..067f9813fd2c 100644 --- a/arch/x86/kernel/reboot.c +++ b/arch/x86/kernel/reboot.c @@ -228,7 +228,7 @@ static struct dmi_system_id __initdata reboot_dmi_table[] = { .ident = "ASUS EeeBook X205TA", .matches = { DMI_MATCH(DMI_SYS_VENDOR, "ASUSTeK COMPUTER INC."), - DMI_MATCH(DMI_PRODUCT_NAME, "X205TAW"), + DMI_MATCH(DMI_PRODUCT_NAME, "X205TA"), }, }, { /* Handle problems with rebooting on ASUS EeeBook X205TAW */ -- cgit v1.2.3 From 40c50c1fecdf012a3bf055ec813f0ef2eda2749c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 10 Mar 2017 13:17:18 +0100 Subject: kexec, x86/purgatory: Unbreak it and clean it up The purgatory code defines global variables which are referenced via a symbol lookup in the kexec code (core and arch). A recent commit addressing sparse warnings made these static and thereby broke kexec_file. Why did this happen? Simply because the whole machinery is undocumented and lacks any form of forward declarations. The variable names are unspecific and lack a prefix, so adding forward declarations creates shadow variables in the core code. Aside of that the code relies on magic constants and duplicate struct definitions with no way to ensure that these things stay in sync. The section placement of the purgatory variables happened by chance and not by design. Unbreak kexec and cleanup the mess: - Add proper forward declarations and document the usage - Use common struct definition - Use the proper common defines instead of magic constants - Add a purgatory_ prefix to have a proper name space - Use ARRAY_SIZE() instead of a homebrewn reimplementation - Add proper sections to the purgatory variables [ From Mike ] Fixes: 72042a8c7b01 ("x86/purgatory: Make functions and variables static") Reported-by: Mike Galbraith < Signed-off-by: Thomas Gleixner Cc: Nicholas Mc Guire Cc: Borislav Petkov Cc: Vivek Goyal Cc: "Tobin C. Harding" Link: http://lkml.kernel.org/r/alpine.DEB.2.20.1703101315140.3681@nanos Signed-off-by: Thomas Gleixner --- arch/powerpc/purgatory/trampoline.S | 12 ++++++------ arch/x86/include/asm/purgatory.h | 20 ++++++++++++++++++++ arch/x86/kernel/machine_kexec_64.c | 9 ++++++--- arch/x86/purgatory/purgatory.c | 35 +++++++++++++++++------------------ arch/x86/purgatory/purgatory.h | 8 -------- arch/x86/purgatory/setup-x86_64.S | 2 +- arch/x86/purgatory/sha256.h | 1 - include/linux/purgatory.h | 23 +++++++++++++++++++++++ kernel/kexec_file.c | 8 ++++---- kernel/kexec_internal.h | 6 +----- 10 files changed, 78 insertions(+), 46 deletions(-) create mode 100644 arch/x86/include/asm/purgatory.h delete mode 100644 arch/x86/purgatory/purgatory.h create mode 100644 include/linux/purgatory.h (limited to 'arch/x86') diff --git a/arch/powerpc/purgatory/trampoline.S b/arch/powerpc/purgatory/trampoline.S index f9760ccf4032..3696ea6c4826 100644 --- a/arch/powerpc/purgatory/trampoline.S +++ b/arch/powerpc/purgatory/trampoline.S @@ -116,13 +116,13 @@ dt_offset: .data .balign 8 -.globl sha256_digest -sha256_digest: +.globl purgatory_sha256_digest +purgatory_sha256_digest: .skip 32 - .size sha256_digest, . - sha256_digest + .size purgatory_sha256_digest, . - purgatory_sha256_digest .balign 8 -.globl sha_regions -sha_regions: +.globl purgatory_sha_regions +purgatory_sha_regions: .skip 8 * 2 * 16 - .size sha_regions, . - sha_regions + .size purgatory_sha_regions, . - purgatory_sha_regions diff --git a/arch/x86/include/asm/purgatory.h b/arch/x86/include/asm/purgatory.h new file mode 100644 index 000000000000..d7da2729903d --- /dev/null +++ b/arch/x86/include/asm/purgatory.h @@ -0,0 +1,20 @@ +#ifndef _ASM_X86_PURGATORY_H +#define _ASM_X86_PURGATORY_H + +#ifndef __ASSEMBLY__ +#include + +extern void purgatory(void); +/* + * These forward declarations serve two purposes: + * + * 1) Make sparse happy when checking arch/purgatory + * 2) Document that these are required to be global so the symbol + * lookup in kexec works + */ +extern unsigned long purgatory_backup_dest; +extern unsigned long purgatory_backup_src; +extern unsigned long purgatory_backup_sz; +#endif /* __ASSEMBLY__ */ + +#endif /* _ASM_PURGATORY_H */ diff --git a/arch/x86/kernel/machine_kexec_64.c b/arch/x86/kernel/machine_kexec_64.c index 307b1f4543de..857cdbd02867 100644 --- a/arch/x86/kernel/machine_kexec_64.c +++ b/arch/x86/kernel/machine_kexec_64.c @@ -194,19 +194,22 @@ static int arch_update_purgatory(struct kimage *image) /* Setup copying of backup region */ if (image->type == KEXEC_TYPE_CRASH) { - ret = kexec_purgatory_get_set_symbol(image, "backup_dest", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_dest", &image->arch.backup_load_addr, sizeof(image->arch.backup_load_addr), 0); if (ret) return ret; - ret = kexec_purgatory_get_set_symbol(image, "backup_src", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_src", &image->arch.backup_src_start, sizeof(image->arch.backup_src_start), 0); if (ret) return ret; - ret = kexec_purgatory_get_set_symbol(image, "backup_sz", + ret = kexec_purgatory_get_set_symbol(image, + "purgatory_backup_sz", &image->arch.backup_src_sz, sizeof(image->arch.backup_src_sz), 0); if (ret) diff --git a/arch/x86/purgatory/purgatory.c b/arch/x86/purgatory/purgatory.c index b6d5c8946e66..470edad96bb9 100644 --- a/arch/x86/purgatory/purgatory.c +++ b/arch/x86/purgatory/purgatory.c @@ -10,22 +10,19 @@ * Version 2. See the file COPYING for more details. */ +#include +#include + #include "sha256.h" -#include "purgatory.h" #include "../boot/string.h" -struct sha_region { - unsigned long start; - unsigned long len; -}; - -static unsigned long backup_dest; -static unsigned long backup_src; -static unsigned long backup_sz; +unsigned long purgatory_backup_dest __section(.kexec-purgatory); +unsigned long purgatory_backup_src __section(.kexec-purgatory); +unsigned long purgatory_backup_sz __section(.kexec-purgatory); -static u8 sha256_digest[SHA256_DIGEST_SIZE] = { 0 }; +u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE] __section(.kexec-purgatory); -struct sha_region sha_regions[16] = {}; +struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX] __section(.kexec-purgatory); /* * On x86, second kernel requries first 640K of memory to boot. Copy @@ -34,26 +31,28 @@ struct sha_region sha_regions[16] = {}; */ static int copy_backup_region(void) { - if (backup_dest) - memcpy((void *)backup_dest, (void *)backup_src, backup_sz); - + if (purgatory_backup_dest) { + memcpy((void *)purgatory_backup_dest, + (void *)purgatory_backup_src, purgatory_backup_sz); + } return 0; } static int verify_sha256_digest(void) { - struct sha_region *ptr, *end; + struct kexec_sha_region *ptr, *end; u8 digest[SHA256_DIGEST_SIZE]; struct sha256_state sctx; sha256_init(&sctx); - end = &sha_regions[sizeof(sha_regions)/sizeof(sha_regions[0])]; - for (ptr = sha_regions; ptr < end; ptr++) + end = purgatory_sha_regions + ARRAY_SIZE(purgatory_sha_regions); + + for (ptr = purgatory_sha_regions; ptr < end; ptr++) sha256_update(&sctx, (uint8_t *)(ptr->start), ptr->len); sha256_final(&sctx, digest); - if (memcmp(digest, sha256_digest, sizeof(digest))) + if (memcmp(digest, purgatory_sha256_digest, sizeof(digest))) return 1; return 0; diff --git a/arch/x86/purgatory/purgatory.h b/arch/x86/purgatory/purgatory.h deleted file mode 100644 index e2e365a6c192..000000000000 --- a/arch/x86/purgatory/purgatory.h +++ /dev/null @@ -1,8 +0,0 @@ -#ifndef PURGATORY_H -#define PURGATORY_H - -#ifndef __ASSEMBLY__ -extern void purgatory(void); -#endif /* __ASSEMBLY__ */ - -#endif /* PURGATORY_H */ diff --git a/arch/x86/purgatory/setup-x86_64.S b/arch/x86/purgatory/setup-x86_64.S index f90e9dfa90bb..dfae9b9e60b5 100644 --- a/arch/x86/purgatory/setup-x86_64.S +++ b/arch/x86/purgatory/setup-x86_64.S @@ -9,7 +9,7 @@ * This source code is licensed under the GNU General Public License, * Version 2. See the file COPYING for more details. */ -#include "purgatory.h" +#include .text .globl purgatory_start diff --git a/arch/x86/purgatory/sha256.h b/arch/x86/purgatory/sha256.h index bd15a4127735..2867d9825a57 100644 --- a/arch/x86/purgatory/sha256.h +++ b/arch/x86/purgatory/sha256.h @@ -10,7 +10,6 @@ #ifndef SHA256_H #define SHA256_H - #include #include diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h new file mode 100644 index 000000000000..d60d4e278609 --- /dev/null +++ b/include/linux/purgatory.h @@ -0,0 +1,23 @@ +#ifndef _LINUX_PURGATORY_H +#define _LINUX_PURGATORY_H + +#include +#include +#include + +struct kexec_sha_region { + unsigned long start; + unsigned long len; +}; + +/* + * These forward declarations serve two purposes: + * + * 1) Make sparse happy when checking arch/purgatory + * 2) Document that these are required to be global so the symbol + * lookup in kexec works + */ +extern struct kexec_sha_region purgatory_sha_regions[KEXEC_SEGMENT_MAX]; +extern u8 purgatory_sha256_digest[SHA256_DIGEST_SIZE]; + +#endif diff --git a/kernel/kexec_file.c b/kernel/kexec_file.c index b56a558e406d..b118735fea9d 100644 --- a/kernel/kexec_file.c +++ b/kernel/kexec_file.c @@ -614,13 +614,13 @@ static int kexec_calculate_store_digests(struct kimage *image) ret = crypto_shash_final(desc, digest); if (ret) goto out_free_digest; - ret = kexec_purgatory_get_set_symbol(image, "sha_regions", - sha_regions, sha_region_sz, 0); + ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha_regions", + sha_regions, sha_region_sz, 0); if (ret) goto out_free_digest; - ret = kexec_purgatory_get_set_symbol(image, "sha256_digest", - digest, SHA256_DIGEST_SIZE, 0); + ret = kexec_purgatory_get_set_symbol(image, "purgatory_sha256_digest", + digest, SHA256_DIGEST_SIZE, 0); if (ret) goto out_free_digest; } diff --git a/kernel/kexec_internal.h b/kernel/kexec_internal.h index 4cef7e4706b0..799a8a452187 100644 --- a/kernel/kexec_internal.h +++ b/kernel/kexec_internal.h @@ -15,11 +15,7 @@ int kimage_is_destination_range(struct kimage *image, extern struct mutex kexec_mutex; #ifdef CONFIG_KEXEC_FILE -struct kexec_sha_region { - unsigned long start; - unsigned long len; -}; - +#include void kimage_file_post_load_cleanup(struct kimage *image); #else /* CONFIG_KEXEC_FILE */ static inline void kimage_file_post_load_cleanup(struct kimage *image) { } -- cgit v1.2.3 From c962cff17dfa11f4a8227ac16de2b28aea3312e4 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:23 +0800 Subject: Revert "x86/acpi: Set persistent cpuid <-> nodeid mapping when booting" Revert: dc6db24d2476 ("x86/acpi: Set persistent cpuid <-> nodeid mapping when booting") The mapping of "cpuid <-> nodeid" is established at boot time via ACPI tables to keep associations of workqueues and other node related items consistent across cpu hotplug. But, ACPI tables are unreliable and failures with that boot time mapping have been reported on machines where the ACPI table and the physical information which is retrieved at actual hotplug is inconsistent. Revert the mapping implementation so it can be replaced with a less error prone approach. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-2-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 2 +- drivers/acpi/acpi_processor.c | 5 --- drivers/acpi/bus.c | 1 - drivers/acpi/processor_core.c | 73 ------------------------------------------- include/linux/acpi.h | 3 -- 5 files changed, 1 insertion(+), 83 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index ae32838cac5f..f6b0e87d2388 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -710,7 +710,7 @@ static void __init acpi_set_irq_model_ioapic(void) #ifdef CONFIG_ACPI_HOTPLUG_CPU #include -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) +static int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) { #ifdef CONFIG_ACPI_NUMA int nid; diff --git a/drivers/acpi/acpi_processor.c b/drivers/acpi/acpi_processor.c index 4467a8089ab8..5d208a99d0c9 100644 --- a/drivers/acpi/acpi_processor.c +++ b/drivers/acpi/acpi_processor.c @@ -182,11 +182,6 @@ int __weak arch_register_cpu(int cpu) void __weak arch_unregister_cpu(int cpu) {} -int __weak acpi_map_cpu2node(acpi_handle handle, int cpu, int physid) -{ - return -ENODEV; -} - static int acpi_processor_hotadd_init(struct acpi_processor *pr) { unsigned long long sta; diff --git a/drivers/acpi/bus.c b/drivers/acpi/bus.c index 80cb5eb75b63..34fbe027e73a 100644 --- a/drivers/acpi/bus.c +++ b/drivers/acpi/bus.c @@ -1249,7 +1249,6 @@ static int __init acpi_init(void) acpi_wakeup_device_init(); acpi_debugger_init(); acpi_setup_sb_notify_handler(); - acpi_set_processor_mapping(); return 0; } diff --git a/drivers/acpi/processor_core.c b/drivers/acpi/processor_core.c index 611a5585a902..a84386204659 100644 --- a/drivers/acpi/processor_core.c +++ b/drivers/acpi/processor_core.c @@ -278,79 +278,6 @@ int acpi_get_cpuid(acpi_handle handle, int type, u32 acpi_id) } EXPORT_SYMBOL_GPL(acpi_get_cpuid); -#ifdef CONFIG_ACPI_HOTPLUG_CPU -static bool __init -map_processor(acpi_handle handle, phys_cpuid_t *phys_id, int *cpuid) -{ - int type, id; - u32 acpi_id; - acpi_status status; - acpi_object_type acpi_type; - unsigned long long tmp; - union acpi_object object = { 0 }; - struct acpi_buffer buffer = { sizeof(union acpi_object), &object }; - - status = acpi_get_type(handle, &acpi_type); - if (ACPI_FAILURE(status)) - return false; - - switch (acpi_type) { - case ACPI_TYPE_PROCESSOR: - status = acpi_evaluate_object(handle, NULL, NULL, &buffer); - if (ACPI_FAILURE(status)) - return false; - acpi_id = object.processor.proc_id; - - /* validate the acpi_id */ - if(acpi_processor_validate_proc_id(acpi_id)) - return false; - break; - case ACPI_TYPE_DEVICE: - status = acpi_evaluate_integer(handle, "_UID", NULL, &tmp); - if (ACPI_FAILURE(status)) - return false; - acpi_id = tmp; - break; - default: - return false; - } - - type = (acpi_type == ACPI_TYPE_DEVICE) ? 1 : 0; - - *phys_id = __acpi_get_phys_id(handle, type, acpi_id, false); - id = acpi_map_cpuid(*phys_id, acpi_id); - - if (id < 0) - return false; - *cpuid = id; - return true; -} - -static acpi_status __init -set_processor_node_mapping(acpi_handle handle, u32 lvl, void *context, - void **rv) -{ - phys_cpuid_t phys_id; - int cpu_id; - - if (!map_processor(handle, &phys_id, &cpu_id)) - return AE_ERROR; - - acpi_map_cpu2node(handle, cpu_id, phys_id); - return AE_OK; -} - -void __init acpi_set_processor_mapping(void) -{ - /* Set persistent cpu <-> node mapping for all processors. */ - acpi_walk_namespace(ACPI_TYPE_PROCESSOR, ACPI_ROOT_OBJECT, - ACPI_UINT32_MAX, set_processor_node_mapping, - NULL, NULL, NULL); -} -#else -void __init acpi_set_processor_mapping(void) {} -#endif /* CONFIG_ACPI_HOTPLUG_CPU */ - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC static int get_ioapic_id(struct acpi_subtable_header *entry, u32 gsi_base, u64 *phys_addr, int *ioapic_id) diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 673acda012af..63a7519b00cc 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -294,11 +294,8 @@ bool acpi_processor_validate_proc_id(int proc_id); int acpi_map_cpu(acpi_handle handle, phys_cpuid_t physid, u32 acpi_id, int *pcpu); int acpi_unmap_cpu(int cpu); -int acpi_map_cpu2node(acpi_handle handle, int cpu, int physid); #endif /* CONFIG_ACPI_HOTPLUG_CPU */ -void acpi_set_processor_mapping(void); - #ifdef CONFIG_ACPI_HOTPLUG_IOAPIC int acpi_get_ioapic_id(acpi_handle handle, u32 gsi_base, u64 *phys_addr); #endif -- cgit v1.2.3 From 2b85b3d22920db7473e5fed5719e7955c0ec323e Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Fri, 3 Mar 2017 16:02:25 +0800 Subject: x86/acpi: Restore the order of CPU IDs The following commits: f7c28833c2 ("x86/acpi: Enable acpi to register all possible cpus at boot time") and 8f54969dc8 ("x86/acpi: Introduce persistent storage for cpuid <-> apicid mapping") ... registered all the possible CPUs at boot time via ACPI tables to make the mapping of cpuid <-> apicid fixed. Both enabled and disabled CPUs could have a logical CPU ID after boot time. But, ACPI tables are unreliable. the number amd order of Local APIC entries which depends on the firmware is often inconsistent with the physical devices. Even if they are consistent, The disabled CPUs which take up some logical CPU IDs will also make the order discontinuous. Revert the part of disabled CPUs registration, keep the allocation logic of logical CPU IDs and also keep some code location changes. Signed-off-by: Dou Liyang Tested-by: Xiaolong Ye Cc: rjw@rjwysocki.net Cc: linux-acpi@vger.kernel.org Cc: guzheng1@huawei.com Cc: izumi.taku@jp.fujitsu.com Cc: lenb@kernel.org Link: http://lkml.kernel.org/r/1488528147-2279-4-git-send-email-douly.fnst@cn.fujitsu.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/acpi/boot.c | 7 ++++++- arch/x86/kernel/apic/apic.c | 26 +++++++------------------- 2 files changed, 13 insertions(+), 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index f6b0e87d2388..b2879cc23db4 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -179,10 +179,15 @@ static int acpi_register_lapic(int id, u32 acpiid, u8 enabled) return -EINVAL; } + if (!enabled) { + ++disabled_cpus; + return -EINVAL; + } + if (boot_cpu_physical_apicid != -1U) ver = boot_cpu_apic_version; - cpu = __generic_processor_info(id, ver, enabled); + cpu = generic_processor_info(id, ver); if (cpu >= 0) early_per_cpu(x86_cpu_to_acpiid, cpu) = acpiid; diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index aee7deddabd0..8ccb7ef512e0 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -2063,7 +2063,7 @@ static int allocate_logical_cpuid(int apicid) return nr_logical_cpuids++; } -int __generic_processor_info(int apicid, int version, bool enabled) +int generic_processor_info(int apicid, int version) { int cpu, max = nr_cpu_ids; bool boot_cpu_detected = physid_isset(boot_cpu_physical_apicid, @@ -2121,11 +2121,9 @@ int __generic_processor_info(int apicid, int version, bool enabled) if (num_processors >= nr_cpu_ids) { int thiscpu = max + disabled_cpus; - if (enabled) { - pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " - "reached. Processor %d/0x%x ignored.\n", - max, thiscpu, apicid); - } + pr_warning("APIC: NR_CPUS/possible_cpus limit of %i " + "reached. Processor %d/0x%x ignored.\n", + max, thiscpu, apicid); disabled_cpus++; return -EINVAL; @@ -2177,23 +2175,13 @@ int __generic_processor_info(int apicid, int version, bool enabled) apic->x86_32_early_logical_apicid(cpu); #endif set_cpu_possible(cpu, true); - - if (enabled) { - num_processors++; - physid_set(apicid, phys_cpu_present_map); - set_cpu_present(cpu, true); - } else { - disabled_cpus++; - } + physid_set(apicid, phys_cpu_present_map); + set_cpu_present(cpu, true); + num_processors++; return cpu; } -int generic_processor_info(int apicid, int version) -{ - return __generic_processor_info(apicid, version, true); -} - int hard_smp_processor_id(void) { return read_apic_id(); -- cgit v1.2.3 From 2c4ea6e28dbf15ab93632c5c189f3948366b8885 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 11 Mar 2017 01:31:19 +0100 Subject: x86/tlb: Fix tlb flushing when lguest clears PGE Fengguang reported random corruptions from various locations on x86-32 after commits d2852a224050 ("arch: add ARCH_HAS_SET_MEMORY config") and 9d876e79df6a ("bpf: fix unlocking of jited image when module ronx not set") that uses the former. While x86-32 doesn't have a JIT like x86_64, the bpf_prog_lock_ro() and bpf_prog_unlock_ro() got enabled due to ARCH_HAS_SET_MEMORY, whereas Fengguang's test kernel doesn't have module support built in and therefore never had the DEBUG_SET_MODULE_RONX setting enabled. After investigating the crashes further, it turned out that using set_memory_ro() and set_memory_rw() didn't have the desired effect, for example, setting the pages as read-only on x86-32 would still let probe_kernel_write() succeed without error. This behavior would manifest itself in situations where the vmalloc'ed buffer was accessed prior to set_memory_*() such as in case of bpf_prog_alloc(). In cases where it wasn't, the page attribute changes seemed to have taken effect, leading to the conclusion that a TLB invalidate didn't happen. Moreover, it turned out that this issue reproduced with qemu in "-cpu kvm64" mode, but not for "-cpu host". When the issue occurs, change_page_attr_set_clr() did trigger a TLB flush as expected via __flush_tlb_all() through cpa_flush_range(), though. There are 3 variants for issuing a TLB flush: invpcid_flush_all() (depends on CPU feature bits X86_FEATURE_INVPCID, X86_FEATURE_PGE), cr4 based flush (depends on X86_FEATURE_PGE), and cr3 based flush. For "-cpu host" case in my setup, the flush used invpcid_flush_all() variant, whereas for "-cpu kvm64", the flush was cr4 based. Switching the kvm64 case to cr3 manually worked fine, and further investigating the cr4 one turned out that X86_CR4_PGE bit was not set in cr4 register, meaning the __native_flush_tlb_global_irq_disabled() wrote cr4 twice with the same value instead of clearing X86_CR4_PGE in the first write to trigger the flush. It turned out that X86_CR4_PGE was cleared from cr4 during init from lguest_arch_host_init() via adjust_pge(). The X86_FEATURE_PGE bit is also cleared from there due to concerns of using PGE in guest kernel that can lead to hard to trace bugs (see bff672e630a0 ("lguest: documentation V: Host") in init()). The CPU feature bits are cleared in dynamic boot_cpu_data, but they never propagated to __flush_tlb_all() as it uses static_cpu_has() instead of boot_cpu_has() for testing which variant of TLB flushing to use, meaning they still used the old setting of the host kernel. Clearing via setup_clear_cpu_cap(X86_FEATURE_PGE) so this would propagate to static_cpu_has() checks is too late at this point as sections have been patched already, so for now, it seems reasonable to switch back to boot_cpu_has(X86_FEATURE_PGE) as it was prior to commit c109bf95992b ("x86/cpufeature: Remove cpu_has_pge"). This lets the TLB flush trigger via cr3 as originally intended, properly makes the new page attributes visible and thus fixes the crashes seen by Fengguang. Fixes: c109bf95992b ("x86/cpufeature: Remove cpu_has_pge") Reported-by: Fengguang Wu Signed-off-by: Daniel Borkmann Cc: bp@suse.de Cc: Kees Cook Cc: "David S. Miller" Cc: netdev@vger.kernel.org Cc: Rusty Russell Cc: Alexei Starovoitov Cc: Linus Torvalds Cc: lkp@01.org Cc: Laura Abbott Cc: stable@vger.kernel.org Link: http://lkml.kernrl.org/r/20170301125426.l4nf65rx4wahohyl@wfg-t540p.sh.intel.com Link: http://lkml.kernel.org/r/25c41ad9eca164be4db9ad84f768965b7eb19d9e.1489191673.git.daniel@iogearbox.net Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/tlbflush.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/tlbflush.h b/arch/x86/include/asm/tlbflush.h index 6fa85944af83..fc5abff9b7fd 100644 --- a/arch/x86/include/asm/tlbflush.h +++ b/arch/x86/include/asm/tlbflush.h @@ -188,7 +188,7 @@ static inline void __native_flush_tlb_single(unsigned long addr) static inline void __flush_tlb_all(void) { - if (static_cpu_has(X86_FEATURE_PGE)) + if (boot_cpu_has(X86_FEATURE_PGE)) __flush_tlb_global(); else __flush_tlb(); -- cgit v1.2.3 From 80354c29025833acd72ddac1ffa21c6cb50128cd Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Sun, 12 Mar 2017 17:07:44 +0200 Subject: x86/platform/intel-mid: Correct MSI IRQ line for watchdog device The interrupt line used for the watchdog is 12, according to the official Intel Edison BSP code. And indeed after fixing it we start getting an interrupt and thus the watchdog starts working again: [ 191.699951] Kernel panic - not syncing: Kernel Watchdog Signed-off-by: Andy Shevchenko Cc: Borislav Petkov Cc: David Cohen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 78a3bb9e408b ("x86: intel-mid: add watchdog platform code for Merrifield") Link: http://lkml.kernel.org/r/20170312150744.45493-1-andriy.shevchenko@linux.intel.com Signed-off-by: Ingo Molnar --- arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c index 86edd1e941eb..9e304e2ea4f5 100644 --- a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_wdt.c @@ -19,7 +19,7 @@ #include #include -#define TANGIER_EXT_TIMER0_MSI 15 +#define TANGIER_EXT_TIMER0_MSI 12 static struct platform_device wdt_dev = { .name = "intel_mid_wdt", -- cgit v1.2.3 From 44fee88cea43d3c2cac962e0439cb10a3cabff6d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 13 Mar 2017 15:57:12 +0100 Subject: x86/tsc: Fix ART for TSC_KNOWN_FREQ Subhransu reported that convert_art_to_tsc() isn't working for him. The ART to TSC relation is only set up for systems which use the refined TSC calibration. Systems with known TSC frequency (available via CPUID 15) are not using the refined calibration and therefor the ART to TSC relation is never established. Add the setup to the known frequency init path which skips ART calibration. The init code needs to be duplicated as for systems which use refined calibration the ART setup must be delayed until calibration has been done. The problem has been there since the ART support was introdduced, but only detected now because Subhransu tested the first time on hardware which has TSC frequency enumerated via CPUID 15. Note for stable: The conditional has changed from TSC_RELIABLE to TSC_KNOWN_FREQUENCY. [ tglx: Rewrote changelog and identified the proper 'Fixes' commit ] Fixes: f9677e0f8308 ("x86/tsc: Always Running Timer (ART) correlated clocksource") Reported-by: "Prusty, Subhransu S" Signed-off-by: Peter Zijlstra (Intel) Cc: stable@vger.kernel.org Cc: christopher.s.hall@intel.com Cc: kevin.b.stanton@intel.com Cc: john.stultz@linaro.org Cc: akataria@vmware.com Link: http://lkml.kernel.org/r/20170313145712.GI3312@twins.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- arch/x86/kernel/tsc.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index 4f7a9833d8e5..c73a7f9e881a 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -1333,6 +1333,8 @@ static int __init init_tsc_clocksource(void) * the refined calibration and directly register it as a clocksource. */ if (boot_cpu_has(X86_FEATURE_TSC_KNOWN_FREQ)) { + if (boot_cpu_has(X86_FEATURE_ART)) + art_related_clocksource = &clocksource_tsc; clocksource_register_khz(&clocksource_tsc, tsc_khz); return 0; } -- cgit v1.2.3 From 0d443b70cc92d741cbc1dcbf1079897b3d8bc3cc Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 7 Mar 2017 15:08:42 -0600 Subject: x86/platform: Remove warning message for duplicate NMI handlers Remove the WARNING message associated with multiple NMI handlers as there are at least two that are legitimate. These are the KGDB and the UV handlers and both want to be called if the NMI has not been claimed by any other NMI handler. Use of the UNKNOWN NMI call chain dramatically lowers the NMI call rate when high frequency NMI tools are in use, notably the perf tools. It is required on systems that cannot sustain a high NMI call rate without adversely affecting the system operation. Signed-off-by: Mike Travis Reviewed-by: Dimitri Sivanich Cc: Don Zickus Cc: Peter Zijlstra Cc: Russ Anderson Cc: Frank Ramsay Cc: Tony Ernst Link: http://lkml.kernel.org/r/20170307210841.730959611@asylum.americas.sgi.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/nmi.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/nmi.c b/arch/x86/kernel/nmi.c index f088ea4c66e7..a723ae9440ab 100644 --- a/arch/x86/kernel/nmi.c +++ b/arch/x86/kernel/nmi.c @@ -166,11 +166,9 @@ int __register_nmi_handler(unsigned int type, struct nmiaction *action) spin_lock_irqsave(&desc->lock, flags); /* - * most handlers of type NMI_UNKNOWN never return because - * they just assume the NMI is theirs. Just a sanity check - * to manage expectations + * Indicate if there are multiple registrations on the + * internal NMI handler call chains (SERR and IO_CHECK). */ - WARN_ON_ONCE(type == NMI_UNKNOWN && !list_empty(&desc->head)); WARN_ON_ONCE(type == NMI_SERR && !list_empty(&desc->head)); WARN_ON_ONCE(type == NMI_IO_CHECK && !list_empty(&desc->head)); -- cgit v1.2.3 From c836e5cf0d0880d6668966476c4ffe727f29f69a Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 Mar 2017 13:24:21 +0200 Subject: x86/platform/intel-mid: Use common power off sequence Intel Medfield may use common for Intel MID devices power sequence. Remove unneded custom power off stub. While here, remove function forward declaration. Signed-off-by: Andy Shevchenko Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170308112422.67533-1-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/intel-mid/mfld.c | 15 ++++----------- 1 file changed, 4 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/mfld.c b/arch/x86/platform/intel-mid/mfld.c index e793fe509971..e42978d4deaf 100644 --- a/arch/x86/platform/intel-mid/mfld.c +++ b/arch/x86/platform/intel-mid/mfld.c @@ -17,16 +17,6 @@ #include "intel_mid_weak_decls.h" -static void penwell_arch_setup(void); -/* penwell arch ops */ -static struct intel_mid_ops penwell_ops = { - .arch_setup = penwell_arch_setup, -}; - -static void mfld_power_off(void) -{ -} - static unsigned long __init mfld_calibrate_tsc(void) { unsigned long fast_calibrate; @@ -63,9 +53,12 @@ static unsigned long __init mfld_calibrate_tsc(void) static void __init penwell_arch_setup(void) { x86_platform.calibrate_tsc = mfld_calibrate_tsc; - pm_power_off = mfld_power_off; } +static struct intel_mid_ops penwell_ops = { + .arch_setup = penwell_arch_setup, +}; + void *get_penwell_ops(void) { return &penwell_ops; -- cgit v1.2.3 From 859bb6d59066b96341020e0991f191631cabe59d Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 8 Mar 2017 13:24:22 +0200 Subject: x86/platform/intel-mid: Add power button support for Merrifield Intel Merrifield platform has a Basin Cove PMIC to handle in particular power button events. Add necessary bits to enable it. Signed-off-by: Andy Shevchenko Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170308112422.67533-2-andriy.shevchenko@linux.intel.com Signed-off-by: Thomas Gleixner --- arch/x86/platform/intel-mid/device_libs/Makefile | 1 + .../device_libs/platform_mrfld_power_btn.c | 82 ++++++++++++++++++++++ 2 files changed, 83 insertions(+) create mode 100644 arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c (limited to 'arch/x86') diff --git a/arch/x86/platform/intel-mid/device_libs/Makefile b/arch/x86/platform/intel-mid/device_libs/Makefile index a7dbec4dce27..3dbde04febdc 100644 --- a/arch/x86/platform/intel-mid/device_libs/Makefile +++ b/arch/x86/platform/intel-mid/device_libs/Makefile @@ -26,5 +26,6 @@ obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_pcal9555a.o obj-$(subst m,y,$(CONFIG_GPIO_PCA953X)) += platform_tca6416.o # MISC Devices obj-$(subst m,y,$(CONFIG_KEYBOARD_GPIO)) += platform_gpio_keys.o +obj-$(subst m,y,$(CONFIG_INTEL_MID_POWER_BUTTON)) += platform_mrfld_power_btn.o obj-$(subst m,y,$(CONFIG_RTC_DRV_CMOS)) += platform_mrfld_rtc.o obj-$(subst m,y,$(CONFIG_INTEL_MID_WATCHDOG)) += platform_mrfld_wdt.o diff --git a/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c new file mode 100644 index 000000000000..a6c3705a28ad --- /dev/null +++ b/arch/x86/platform/intel-mid/device_libs/platform_mrfld_power_btn.c @@ -0,0 +1,82 @@ +/* + * Intel Merrifield power button support + * + * (C) Copyright 2017 Intel Corporation + * + * Author: Andy Shevchenko + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; version 2 + * of the License. + */ + +#include +#include +#include +#include + +#include +#include + +static struct resource mrfld_power_btn_resources[] = { + { + .flags = IORESOURCE_IRQ, + }, +}; + +static struct platform_device mrfld_power_btn_dev = { + .name = "msic_power_btn", + .id = PLATFORM_DEVID_NONE, + .num_resources = ARRAY_SIZE(mrfld_power_btn_resources), + .resource = mrfld_power_btn_resources, +}; + +static int mrfld_power_btn_scu_status_change(struct notifier_block *nb, + unsigned long code, void *data) +{ + if (code == SCU_DOWN) { + platform_device_unregister(&mrfld_power_btn_dev); + return 0; + } + + return platform_device_register(&mrfld_power_btn_dev); +} + +static struct notifier_block mrfld_power_btn_scu_notifier = { + .notifier_call = mrfld_power_btn_scu_status_change, +}; + +static int __init register_mrfld_power_btn(void) +{ + if (intel_mid_identify_cpu() != INTEL_MID_CPU_CHIP_TANGIER) + return -ENODEV; + + /* + * We need to be sure that the SCU IPC is ready before + * PMIC power button device can be registered: + */ + intel_scu_notifier_add(&mrfld_power_btn_scu_notifier); + + return 0; +} +arch_initcall(register_mrfld_power_btn); + +static void __init *mrfld_power_btn_platform_data(void *info) +{ + struct resource *res = mrfld_power_btn_resources; + struct sfi_device_table_entry *pentry = info; + + res->start = res->end = pentry->irq; + return NULL; +} + +static const struct devs_id mrfld_power_btn_dev_id __initconst = { + .name = "bcove_power_btn", + .type = SFI_DEV_TYPE_IPC, + .delay = 1, + .msic = 1, + .get_platform_data = &mrfld_power_btn_platform_data, +}; + +sfi_device(mrfld_power_btn_dev_id); -- cgit v1.2.3 From be3606ff739d1c1be36389f8737c577ad87e1f57 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 13 Mar 2017 19:33:37 +0300 Subject: x86/kasan: Fix boot with KASAN=y and PROFILE_ANNOTATED_BRANCHES=y The kernel doesn't boot with both PROFILE_ANNOTATED_BRANCHES=y and KASAN=y options selected. With branch profiling enabled we end up calling ftrace_likely_update() before kasan_early_init(). ftrace_likely_update() is built with KASAN instrumentation, so calling it before kasan has been initialized leads to crash. Use DISABLE_BRANCH_PROFILING define to make sure that we don't call ftrace_likely_update() from early code before kasan_early_init(). Fixes: ef7f0d6a6ca8 ("x86_64: add KASan support") Reported-by: Fengguang Wu Signed-off-by: Andrey Ryabinin Cc: kasan-dev@googlegroups.com Cc: Alexander Potapenko Cc: stable@vger.kernel.org Cc: Andrew Morton Cc: lkp@01.org Cc: Dmitry Vyukov Link: http://lkml.kernel.org/r/20170313163337.1704-1-aryabinin@virtuozzo.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/head64.c | 1 + arch/x86/mm/kasan_init_64.c | 1 + 2 files changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/head64.c b/arch/x86/kernel/head64.c index 54a2372f5dbb..b5785c197e53 100644 --- a/arch/x86/kernel/head64.c +++ b/arch/x86/kernel/head64.c @@ -4,6 +4,7 @@ * Copyright (C) 2000 Andrea Arcangeli SuSE */ +#define DISABLE_BRANCH_PROFILING #include #include #include diff --git a/arch/x86/mm/kasan_init_64.c b/arch/x86/mm/kasan_init_64.c index 8d63d7a104c3..4c90cfdc128b 100644 --- a/arch/x86/mm/kasan_init_64.c +++ b/arch/x86/mm/kasan_init_64.c @@ -1,3 +1,4 @@ +#define DISABLE_BRANCH_PROFILING #define pr_fmt(fmt) "kasan: " fmt #include #include -- cgit v1.2.3 From d434936e4cbb10181463622962f30b989d3e9e19 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Mar 2017 13:12:53 +0100 Subject: mm, x86: Fix native_pud_clear build error We still get a build error in random configurations, after this has been modified a few times: In file included from include/linux/mm.h:68:0, from include/linux/suspend.h:8, from arch/x86/kernel/asm-offsets.c:12: arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear' #define pud_clear(pud) native_pud_clear(pud) My interpretation is that the build error comes from a typo in __PAGETABLE_PUD_FOLDED, so fix that typo now, and remove the incorrect #ifdef around the native_pud_clear definition. Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()") Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Signed-off-by: Arnd Bergmann Acked-by: Dave Jiang Cc: Kees Cook Cc: Dave Hansen Cc: Hugh Dickins Cc: Andrew Morton Cc: Borislav Petkov Cc: Thomas Garnier Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de Signed-off-by: Thomas Gleixner --- arch/x86/include/asm/pgtable-3level.h | 3 --- arch/x86/include/asm/pgtable.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 72277b1028a5..50d35e3185f5 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } -#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \ - defined(CONFIG_PARAVIRT)) static inline void native_pud_clear(pud_t *pudp) { } -#endif static inline void pud_clear(pud_t *pudp) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1cfb36b8c024..585ee0d42d18 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif -#ifndef __PAGETABLE_PMD_FOLDED +#ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif -- cgit v1.2.3 From 87a6b2975f0d340c75b7488d22d61d2f98fb8abf Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Mar 2017 23:27:47 -0500 Subject: x86/unwind: Fix last frame check for aligned function stacks Pavel Machek reported the following warning on x86-32: WARNING: kernel stack frame pointer at f50cdf98 in swapper/2:0 has bad value (null) The warning is caused by the unwinder not realizing that it reached the end of the stack, due to an unusual prologue which gcc sometimes generates for aligned stacks. The prologue is based on a gcc feature called the Dynamic Realign Argument Pointer (DRAP). It's almost always enabled for aligned stacks when -maccumulate-outgoing-args isn't set. This issue is similar to the one fixed by the following commit: 8023e0e2a48d ("x86/unwind: Adjust last frame check for aligned function stacks") ... but that fix was specific to x86-64. Make the fix more generic to cover x86-32 as well, and also ensure that the return address referred to by the frame pointer is a copy of the original return address. Fixes: acb4608ad186 ("x86/unwind: Create stack frames for saved syscall registers") Reported-by: Pavel Machek Signed-off-by: Josh Poimboeuf Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/50d4924db716c264b14f1633037385ec80bf89d2.1489465609.git.jpoimboe@redhat.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/unwind_frame.c | 36 ++++++++++++++++++++++++++++++------ 1 file changed, 30 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/unwind_frame.c b/arch/x86/kernel/unwind_frame.c index 478d15dbaee4..08339262b666 100644 --- a/arch/x86/kernel/unwind_frame.c +++ b/arch/x86/kernel/unwind_frame.c @@ -82,19 +82,43 @@ static size_t regs_size(struct pt_regs *regs) return sizeof(*regs); } +#ifdef CONFIG_X86_32 +#define GCC_REALIGN_WORDS 3 +#else +#define GCC_REALIGN_WORDS 1 +#endif + static bool is_last_task_frame(struct unwind_state *state) { - unsigned long bp = (unsigned long)state->bp; - unsigned long regs = (unsigned long)task_pt_regs(state->task); + unsigned long *last_bp = (unsigned long *)task_pt_regs(state->task) - 2; + unsigned long *aligned_bp = last_bp - GCC_REALIGN_WORDS; /* * We have to check for the last task frame at two different locations * because gcc can occasionally decide to realign the stack pointer and - * change the offset of the stack frame by a word in the prologue of a - * function called by head/entry code. + * change the offset of the stack frame in the prologue of a function + * called by head/entry code. Examples: + * + * : + * push %edi + * lea 0x8(%esp),%edi + * and $0xfffffff8,%esp + * pushl -0x4(%edi) + * push %ebp + * mov %esp,%ebp + * + * : + * lea 0x8(%rsp),%r10 + * and $0xfffffffffffffff0,%rsp + * pushq -0x8(%r10) + * push %rbp + * mov %rsp,%rbp + * + * Note that after aligning the stack, it pushes a duplicate copy of + * the return address before pushing the frame pointer. */ - return bp == regs - FRAME_HEADER_SIZE || - bp == regs - FRAME_HEADER_SIZE - sizeof(long); + return (state->bp == last_bp || + (state->bp == aligned_bp && *(aligned_bp+1) == *(last_bp+1))); } /* -- cgit v1.2.3 From 49ec8f5b6ae3ab60385492cad900ffc8a523c895 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 14 Mar 2017 15:20:53 +0100 Subject: x86/intel_rdt: Put group node in rdtgroup_kn_unlock The rdtgroup_kn_unlock waits for the last user to release and put its node. But it's calling kernfs_put on the node which calls the rdtgroup_kn_unlock, which might not be the group's directory node, but another group's file node. This race could be easily reproduced by running 2 instances of following script: mount -t resctrl resctrl /sys/fs/resctrl/ pushd /sys/fs/resctrl/ mkdir krava echo "krava" > krava/schemata rmdir krava popd umount /sys/fs/resctrl It triggers the slub debug error message with following command line config: slub_debug=,kernfs_node_cache. Call kernfs_put on the group's node to fix it. Fixes: 60cf5e101fd4 ("x86/intel_rdt: Add mkdir to resctrl file system") Signed-off-by: Jiri Olsa Cc: Fenghua Yu Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Shaohua Li Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1489501253-20248-1-git-send-email-jolsa@kernel.org Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/intel_rdt_rdtgroup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c index c05509d38b1f..9ac2a5cdd9c2 100644 --- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c +++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c @@ -727,7 +727,7 @@ void rdtgroup_kn_unlock(struct kernfs_node *kn) if (atomic_dec_and_test(&rdtgrp->waitcount) && (rdtgrp->flags & RDT_DELETED)) { kernfs_unbreak_active_protection(kn); - kernfs_put(kn); + kernfs_put(rdtgrp->kn); kfree(rdtgrp); } else { kernfs_unbreak_active_protection(kn); -- cgit v1.2.3 From 6bce725a78de1b171928ce66dec2bae4b569e5d1 Mon Sep 17 00:00:00 2001 From: Tobias Klauser Date: Wed, 8 Mar 2017 14:30:34 +0100 Subject: x86/mpx: Make unnecessarily global function static Make the function get_user_bd_entry() static as it is not used outside of arch/x86/mm/mpx.c This fixes a sparse warning. Signed-off-by: Tobias Klauser Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- arch/x86/mm/mpx.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/mpx.c b/arch/x86/mm/mpx.c index 5126dfd52b18..cd44ae727df7 100644 --- a/arch/x86/mm/mpx.c +++ b/arch/x86/mm/mpx.c @@ -590,7 +590,7 @@ static unsigned long mpx_bd_entry_to_bt_addr(struct mm_struct *mm, * we might run off the end of the bounds table if we are on * a 64-bit kernel and try to get 8 bytes. */ -int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, +static int get_user_bd_entry(struct mm_struct *mm, unsigned long *bd_entry_ret, long __user *bd_entry_ptr) { u32 bd_entry_32; -- cgit v1.2.3 From d0f33ac9ae7b2a727fb678235ae37baf1d0608d5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 16 Mar 2017 16:40:24 -0700 Subject: mm, x86: fix native_pud_clear build error We still get a build error in random configurations, after this has been modified a few times: In file included from include/linux/mm.h:68:0, from include/linux/suspend.h:8, from arch/x86/kernel/asm-offsets.c:12: arch/x86/include/asm/pgtable.h:66:26: error: redefinition of 'native_pud_clear' #define pud_clear(pud) native_pud_clear(pud) My interpretation is that the build error comes from a typo in __PAGETABLE_PUD_FOLDED, so fix that typo now, and remove the incorrect #ifdef around the native_pud_clear definition. Fixes: 3e761a42e19c ("mm, x86: fix HIGHMEM64 && PARAVIRT build config for native_pud_clear()") Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Link: http://lkml.kernel.org/r/20170314121330.182155-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Ackedy-by: Dave Jiang Cc: Matthew Wilcox Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Thomas Garnier Cc: Kees Cook Cc: Dave Hansen Cc: Hugh Dickins Cc: Borislav Petkov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/include/asm/pgtable-3level.h | 3 --- arch/x86/include/asm/pgtable.h | 2 +- 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/pgtable-3level.h b/arch/x86/include/asm/pgtable-3level.h index 72277b1028a5..50d35e3185f5 100644 --- a/arch/x86/include/asm/pgtable-3level.h +++ b/arch/x86/include/asm/pgtable-3level.h @@ -121,12 +121,9 @@ static inline void native_pmd_clear(pmd_t *pmd) *(tmp + 1) = 0; } -#if !defined(CONFIG_SMP) || (defined(CONFIG_HIGHMEM64G) && \ - defined(CONFIG_PARAVIRT)) static inline void native_pud_clear(pud_t *pudp) { } -#endif static inline void pud_clear(pud_t *pudp) { diff --git a/arch/x86/include/asm/pgtable.h b/arch/x86/include/asm/pgtable.h index 1cfb36b8c024..585ee0d42d18 100644 --- a/arch/x86/include/asm/pgtable.h +++ b/arch/x86/include/asm/pgtable.h @@ -62,7 +62,7 @@ extern struct mm_struct *pgd_page_get_mm(struct page *page); # define set_pud(pudp, pud) native_set_pud(pudp, pud) #endif -#ifndef __PAGETABLE_PMD_FOLDED +#ifndef __PAGETABLE_PUD_FOLDED #define pud_clear(pud) native_pud_clear(pud) #endif -- cgit v1.2.3 From 5dc855d44c2ad960a86f593c60461f1ae1566b6d Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 16 Mar 2017 12:59:39 -0700 Subject: x86/perf: Fix CR4.PCE propagation to use active_mm instead of mm If one thread mmaps a perf event while another thread in the same mm is in some context where active_mm != mm (which can happen in the scheduler, for example), refresh_pce() would write the wrong value to CR4.PCE. This broke some PAPI tests. Reported-and-tested-by: Vince Weaver Signed-off-by: Andy Lutomirski Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: stable@vger.kernel.org Fixes: 7911d3f7af14 ("perf/x86: Only allow rdpmc if a perf_event is mapped") Link: http://lkml.kernel.org/r/0c5b38a76ea50e405f9abe07a13dfaef87c173a1.1489694270.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 1635c0c8df23..e07b36c5588a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2100,8 +2100,8 @@ static int x86_pmu_event_init(struct perf_event *event) static void refresh_pce(void *ignored) { - if (current->mm) - load_mm_cr4(current->mm); + if (current->active_mm) + load_mm_cr4(current->active_mm); } static void x86_pmu_event_mapped(struct perf_event *event) -- cgit v1.2.3 From 4b07372a32c0c1505a7634ad7e607d83340ef645 Mon Sep 17 00:00:00 2001 From: Andy Lutomirski Date: Thu, 16 Mar 2017 12:59:40 -0700 Subject: x86/perf: Clarify why x86_pmu_event_mapped() isn't racy Naively, it looks racy, but ->mmap_sem saves it. Add a comment and a lockdep assertion. Signed-off-by: Andy Lutomirski Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Borislav Petkov Cc: H. Peter Anvin Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/03a1e629063899168dfc4707f3bb6e581e21f5c6.1489694270.git.luto@kernel.org Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index e07b36c5588a..183a972f9210 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2109,6 +2109,18 @@ static void x86_pmu_event_mapped(struct perf_event *event) if (!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED)) return; + /* + * This function relies on not being called concurrently in two + * tasks in the same mm. Otherwise one task could observe + * perf_rdpmc_allowed > 1 and return all the way back to + * userspace with CR4.PCE clear while another task is still + * doing on_each_cpu_mask() to propagate CR4.PCE. + * + * For now, this can't happen because all callers hold mmap_sem + * for write. If this changes, we'll need a different solution. + */ + lockdep_assert_held_exclusive(¤t->mm->mmap_sem); + if (atomic_inc_return(¤t->mm->context.perf_rdpmc_allowed) == 1) on_each_cpu_mask(mm_cpumask(current->mm), refresh_pce, NULL, 1); } -- cgit v1.2.3 From 6c6c5e0311c83ffe75e14260fb83e05e21e1d488 Mon Sep 17 00:00:00 2001 From: Radim Krčmář Date: Fri, 13 Jan 2017 18:59:04 +0100 Subject: KVM: VMX: downgrade warning on unexpected exit code MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We never needed the call trace and we better rate-limit if it can be triggered by a guest. Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 98e82ee1e699..e7ec88961b1a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -8501,7 +8501,8 @@ static int vmx_handle_exit(struct kvm_vcpu *vcpu) && kvm_vmx_exit_handlers[exit_reason]) return kvm_vmx_exit_handlers[exit_reason](vcpu); else { - WARN_ONCE(1, "vmx: unexpected exit reason 0x%x\n", exit_reason); + vcpu_unimpl(vcpu, "vmx: unexpected exit reason 0x%x\n", + exit_reason); kvm_queue_exception(vcpu, UD_VECTOR); return 1; } -- cgit v1.2.3 From 3863dff0c3dd72984395c93b12383b393c5c3989 Mon Sep 17 00:00:00 2001 From: Dmitry Vyukov Date: Tue, 24 Jan 2017 14:06:48 +0100 Subject: kvm: fix usage of uninit spinlock in avic_vm_destroy() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If avic is not enabled, avic_vm_init() does nothing and returns early. However, avic_vm_destroy() still tries to destroy what hasn't been created. The only bad consequence of this now is that avic_vm_destroy() uses svm_vm_data_hash_lock that hasn't been initialized (and is not meant to be used at all if avic is not enabled). Return early from avic_vm_destroy() if avic is not enabled. It has nothing to destroy. Signed-off-by: Dmitry Vyukov Cc: Joerg Roedel Cc: Paolo Bonzini Cc: "Radim Krčmář" Cc: David Hildenbrand Cc: kvm@vger.kernel.org Cc: syzkaller@googlegroups.com Reviewed-by: David Hildenbrand Signed-off-by: Radim Krčmář --- arch/x86/kvm/svm.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/svm.c b/arch/x86/kvm/svm.c index d1efe2c62b3f..5fba70646c32 100644 --- a/arch/x86/kvm/svm.c +++ b/arch/x86/kvm/svm.c @@ -1379,6 +1379,9 @@ static void avic_vm_destroy(struct kvm *kvm) unsigned long flags; struct kvm_arch *vm_data = &kvm->arch; + if (!avic) + return; + avic_free_vm_id(vm_data->avic_vm_id); if (vm_data->avic_logical_id_table_page) -- cgit v1.2.3 From 6d1b3ad2cd87150fc89bad2331beab173a8ad24d Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Sun, 12 Mar 2017 00:53:52 -0800 Subject: KVM: nVMX: don't reset kvm mmu twice MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm mmu is reset once successfully loading CR3 as part of emulating vmentry in nested_vmx_load_cr3(). We should not reset kvm mmu twice. Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e7ec88961b1a..c66436530a93 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -10287,8 +10287,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, entry_failure_code)) return 1; - kvm_mmu_reset_context(vcpu); - if (!enable_ept) vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested; -- cgit v1.2.3 From ad4830051aac0b967add82ac168f49edf11f01a0 Mon Sep 17 00:00:00 2001 From: Mike Travis Date: Tue, 21 Mar 2017 18:16:47 -0500 Subject: x86/platform/uv: Fix calculation of Global Physical Address The calculation of the global physical address (GPA) on UV4 is incorrect. The gnode_extra/upper global offset should only be applied for fixed address space systems (UV1..3). Tested-by: John Estabrook Signed-off-by: Mike Travis Cc: Dimitri Sivanich Cc: John Estabrook Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Russ Anderson Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170321231646.667689538@asylum.americas.sgi.com Signed-off-by: Ingo Molnar --- arch/x86/include/asm/uv/uv_hub.h | 8 +++++--- arch/x86/kernel/apic/x2apic_uv_x.c | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/uv/uv_hub.h b/arch/x86/include/asm/uv/uv_hub.h index 72e8300b1e8a..9cffb44a3cf5 100644 --- a/arch/x86/include/asm/uv/uv_hub.h +++ b/arch/x86/include/asm/uv/uv_hub.h @@ -485,15 +485,17 @@ static inline unsigned long uv_soc_phys_ram_to_gpa(unsigned long paddr) if (paddr < uv_hub_info->lowmem_remap_top) paddr |= uv_hub_info->lowmem_remap_base; - paddr |= uv_hub_info->gnode_upper; - if (m_val) + + if (m_val) { + paddr |= uv_hub_info->gnode_upper; paddr = ((paddr << uv_hub_info->m_shift) >> uv_hub_info->m_shift) | ((paddr >> uv_hub_info->m_val) << uv_hub_info->n_lshift); - else + } else { paddr |= uv_soc_phys_ram_to_nasid(paddr) << uv_hub_info->gpa_shift; + } return paddr; } diff --git a/arch/x86/kernel/apic/x2apic_uv_x.c b/arch/x86/kernel/apic/x2apic_uv_x.c index e9f8f8cdd570..86f20cc0a65e 100644 --- a/arch/x86/kernel/apic/x2apic_uv_x.c +++ b/arch/x86/kernel/apic/x2apic_uv_x.c @@ -1105,7 +1105,8 @@ void __init uv_init_hub_info(struct uv_hub_info_s *hi) node_id.v = uv_read_local_mmr(UVH_NODE_ID); uv_cpuid.gnode_shift = max_t(unsigned int, uv_cpuid.gnode_shift, mn.n_val); hi->gnode_extra = (node_id.s.node_id & ~((1 << uv_cpuid.gnode_shift) - 1)) >> 1; - hi->gnode_upper = (unsigned long)hi->gnode_extra << mn.m_val; + if (mn.m_val) + hi->gnode_upper = (u64)hi->gnode_extra << mn.m_val; if (uv_gp_table) { hi->global_mmr_base = uv_gp_table->mmr_base; -- cgit v1.2.3 From 26a37ab319a26d330bab298770d692bb9c852aff Mon Sep 17 00:00:00 2001 From: Tony Luck Date: Mon, 20 Mar 2017 14:40:30 -0700 Subject: x86/mce: Fix copy/paste error in exception table entries Back in commit: 92b0729c34cab ("x86/mm, x86/mce: Add memcpy_mcsafe()") ... I made a copy/paste error setting up the exception table entries and ended up with two for label .L_cache_w3 and none for .L_cache_w2. This means that if we take a machine check on: .L_cache_w2: movq 2*8(%rsi), %r10 then we don't have an exception table entry for this instruction and we can't recover. Fix: s/3/2/ Signed-off-by: Tony Luck Cc: Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 92b0729c34cab ("x86/mm, x86/mce: Add memcpy_mcsafe()") Link: http://lkml.kernel.org/r/1490046030-25862-1-git-send-email-tony.luck@intel.com Signed-off-by: Ingo Molnar --- arch/x86/lib/memcpy_64.S | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S index 779782f58324..9a53a06e5a3e 100644 --- a/arch/x86/lib/memcpy_64.S +++ b/arch/x86/lib/memcpy_64.S @@ -290,7 +290,7 @@ EXPORT_SYMBOL_GPL(memcpy_mcsafe_unrolled) _ASM_EXTABLE_FAULT(.L_copy_leading_bytes, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w0, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w1, .L_memcpy_mcsafe_fail) - _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) + _ASM_EXTABLE_FAULT(.L_cache_w2, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w3, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w4, .L_memcpy_mcsafe_fail) _ASM_EXTABLE_FAULT(.L_cache_w5, .L_memcpy_mcsafe_fail) -- cgit v1.2.3 From 698eff6355f735d46d1b7113df8b422874cd7988 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 17 Mar 2017 12:48:18 +0100 Subject: sched/clock, x86/perf: Fix "perf test tsc" People reported that commit: 5680d8094ffa ("sched/clock: Provide better clock continuity") broke "perf test tsc". That commit added another offset to the reported clock value; so take that into account when computing the provided offset values. Reported-by: Adrian Hunter Reported-by: Arnaldo Carvalho de Melo Tested-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Fixes: 5680d8094ffa ("sched/clock: Provide better clock continuity") Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 9 ++++++--- arch/x86/include/asm/timer.h | 2 ++ arch/x86/kernel/tsc.c | 4 ++-- include/linux/sched/clock.h | 13 +++++++------ kernel/sched/clock.c | 22 +++++++++++----------- 5 files changed, 28 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index 2aa1ad194db2..580b60f5ac83 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2256,6 +2256,7 @@ void arch_perf_update_userpage(struct perf_event *event, struct perf_event_mmap_page *userpg, u64 now) { struct cyc2ns_data *data; + u64 offset; userpg->cap_user_time = 0; userpg->cap_user_time_zero = 0; @@ -2263,11 +2264,13 @@ void arch_perf_update_userpage(struct perf_event *event, !!(event->hw.flags & PERF_X86_EVENT_RDPMC_ALLOWED); userpg->pmc_width = x86_pmu.cntval_bits; - if (!sched_clock_stable()) + if (!using_native_sched_clock() || !sched_clock_stable()) return; data = cyc2ns_read_begin(); + offset = data->cyc2ns_offset + __sched_clock_offset; + /* * Internal timekeeping for enabled/running/stopped times * is always in the local_clock domain. @@ -2275,7 +2278,7 @@ void arch_perf_update_userpage(struct perf_event *event, userpg->cap_user_time = 1; userpg->time_mult = data->cyc2ns_mul; userpg->time_shift = data->cyc2ns_shift; - userpg->time_offset = data->cyc2ns_offset - now; + userpg->time_offset = offset - now; /* * cap_user_time_zero doesn't make sense when we're using a different @@ -2283,7 +2286,7 @@ void arch_perf_update_userpage(struct perf_event *event, */ if (!event->attr.use_clockid) { userpg->cap_user_time_zero = 1; - userpg->time_zero = data->cyc2ns_offset; + userpg->time_zero = offset; } cyc2ns_read_end(data); diff --git a/arch/x86/include/asm/timer.h b/arch/x86/include/asm/timer.h index a04eabd43d06..27e9f9d769b8 100644 --- a/arch/x86/include/asm/timer.h +++ b/arch/x86/include/asm/timer.h @@ -12,6 +12,8 @@ extern int recalibrate_cpu_khz(void); extern int no_timer_check; +extern bool using_native_sched_clock(void); + /* * We use the full linear equation: f(x) = a + b*x, in order to allow * a continuous function in the face of dynamic freq changes. diff --git a/arch/x86/kernel/tsc.c b/arch/x86/kernel/tsc.c index c73a7f9e881a..714dfba6a1e7 100644 --- a/arch/x86/kernel/tsc.c +++ b/arch/x86/kernel/tsc.c @@ -328,7 +328,7 @@ unsigned long long sched_clock(void) return paravirt_sched_clock(); } -static inline bool using_native_sched_clock(void) +bool using_native_sched_clock(void) { return pv_time_ops.sched_clock == native_sched_clock; } @@ -336,7 +336,7 @@ static inline bool using_native_sched_clock(void) unsigned long long sched_clock(void) __attribute__((alias("native_sched_clock"))); -static inline bool using_native_sched_clock(void) { return true; } +bool using_native_sched_clock(void) { return true; } #endif int check_tsc_unstable(void) diff --git a/include/linux/sched/clock.h b/include/linux/sched/clock.h index 4a68c6791207..34fe92ce1ebd 100644 --- a/include/linux/sched/clock.h +++ b/include/linux/sched/clock.h @@ -54,15 +54,16 @@ static inline u64 local_clock(void) } #else extern void sched_clock_init_late(void); -/* - * Architectures can set this to 1 if they have specified - * CONFIG_HAVE_UNSTABLE_SCHED_CLOCK in their arch Kconfig, - * but then during bootup it turns out that sched_clock() - * is reliable after all: - */ extern int sched_clock_stable(void); extern void clear_sched_clock_stable(void); +/* + * When sched_clock_stable(), __sched_clock_offset provides the offset + * between local_clock() and sched_clock(). + */ +extern u64 __sched_clock_offset; + + extern void sched_clock_tick(void); extern void sched_clock_idle_sleep_event(void); extern void sched_clock_idle_wakeup_event(u64 delta_ns); diff --git a/kernel/sched/clock.c b/kernel/sched/clock.c index fec0f58c8dee..24a3e01bf8cb 100644 --- a/kernel/sched/clock.c +++ b/kernel/sched/clock.c @@ -96,10 +96,10 @@ static DEFINE_STATIC_KEY_FALSE(__sched_clock_stable); static int __sched_clock_stable_early = 1; /* - * We want: ktime_get_ns() + gtod_offset == sched_clock() + raw_offset + * We want: ktime_get_ns() + __gtod_offset == sched_clock() + __sched_clock_offset */ -static __read_mostly u64 raw_offset; -static __read_mostly u64 gtod_offset; +__read_mostly u64 __sched_clock_offset; +static __read_mostly u64 __gtod_offset; struct sched_clock_data { u64 tick_raw; @@ -131,11 +131,11 @@ static void __set_sched_clock_stable(void) /* * Attempt to make the (initial) unstable->stable transition continuous. */ - raw_offset = (scd->tick_gtod + gtod_offset) - (scd->tick_raw); + __sched_clock_offset = (scd->tick_gtod + __gtod_offset) - (scd->tick_raw); printk(KERN_INFO "sched_clock: Marking stable (%lld, %lld)->(%lld, %lld)\n", - scd->tick_gtod, gtod_offset, - scd->tick_raw, raw_offset); + scd->tick_gtod, __gtod_offset, + scd->tick_raw, __sched_clock_offset); static_branch_enable(&__sched_clock_stable); tick_dep_clear(TICK_DEP_BIT_CLOCK_UNSTABLE); @@ -161,11 +161,11 @@ static void __clear_sched_clock_stable(void) * * Still do what we can. */ - gtod_offset = (scd->tick_raw + raw_offset) - (scd->tick_gtod); + __gtod_offset = (scd->tick_raw + __sched_clock_offset) - (scd->tick_gtod); printk(KERN_INFO "sched_clock: Marking unstable (%lld, %lld)<-(%lld, %lld)\n", - scd->tick_gtod, gtod_offset, - scd->tick_raw, raw_offset); + scd->tick_gtod, __gtod_offset, + scd->tick_raw, __sched_clock_offset); tick_dep_set(TICK_DEP_BIT_CLOCK_UNSTABLE); @@ -238,7 +238,7 @@ again: * scd->tick_gtod + TICK_NSEC); */ - clock = scd->tick_gtod + gtod_offset + delta; + clock = scd->tick_gtod + __gtod_offset + delta; min_clock = wrap_max(scd->tick_gtod, old_clock); max_clock = wrap_max(old_clock, scd->tick_gtod + TICK_NSEC); @@ -324,7 +324,7 @@ u64 sched_clock_cpu(int cpu) u64 clock; if (sched_clock_stable()) - return sched_clock() + raw_offset; + return sched_clock() + __sched_clock_offset; if (unlikely(!sched_clock_running)) return 0ull; -- cgit v1.2.3 From 950712eb8ef03e4a62ac5b3067efde7ec2f8a91e Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 15 Mar 2017 16:01:18 +0800 Subject: KVM: x86: check existance before destroy MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Mostly used for split irqchip mode. In that case, these two things are not inited at all, so no need to release. Signed-off-by: Peter Xu Signed-off-by: Radim Krčmář --- arch/x86/kvm/i8259.c | 3 +++ arch/x86/kvm/ioapic.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8259.c b/arch/x86/kvm/i8259.c index 73ea24d4f119..047b17a26269 100644 --- a/arch/x86/kvm/i8259.c +++ b/arch/x86/kvm/i8259.c @@ -657,6 +657,9 @@ void kvm_pic_destroy(struct kvm *kvm) { struct kvm_pic *vpic = kvm->arch.vpic; + if (!vpic) + return; + kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_master); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_slave); kvm_io_bus_unregister_dev(vpic->kvm, KVM_PIO_BUS, &vpic->dev_eclr); diff --git a/arch/x86/kvm/ioapic.c b/arch/x86/kvm/ioapic.c index 6e219e5c07d2..289270a6aecb 100644 --- a/arch/x86/kvm/ioapic.c +++ b/arch/x86/kvm/ioapic.c @@ -635,6 +635,9 @@ void kvm_ioapic_destroy(struct kvm *kvm) { struct kvm_ioapic *ioapic = kvm->arch.vioapic; + if (!ioapic) + return; + cancel_delayed_work_sync(&ioapic->eoi_inject); kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &ioapic->dev); kvm->arch.vioapic = NULL; -- cgit v1.2.3 From c761159cf81b8641290f7559a8d8e30f6ab92669 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Wed, 15 Mar 2017 16:01:19 +0800 Subject: KVM: x86: use pic/ioapic destructor when destroy vm MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We have specific destructors for pic/ioapic, we'd better use them when destroying the VM as well. Signed-off-by: Peter Xu Signed-off-by: Radim Krčmář --- arch/x86/kvm/x86.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1faf620a6fdc..d30ff491ecc7 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8153,8 +8153,8 @@ void kvm_arch_destroy_vm(struct kvm *kvm) if (kvm_x86_ops->vm_destroy) kvm_x86_ops->vm_destroy(kvm); kvm_iommu_unmap_guest(kvm); - kfree(kvm->arch.vpic); - kfree(kvm->arch.vioapic); + kvm_pic_destroy(kvm); + kvm_ioapic_destroy(kvm); kvm_free_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kvm_mmu_uninit_vm(kvm); -- cgit v1.2.3 From fb6c8198431311027c3434d4e94ab8bc040f7aea Mon Sep 17 00:00:00 2001 From: Jim Mattson Date: Thu, 16 Mar 2017 13:53:59 -0700 Subject: kvm: vmx: Flush TLB when the APIC-access address changes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Quoting from the Intel SDM, volume 3, section 28.3.3.4: Guidelines for Use of the INVEPT Instruction: If EPT was in use on a logical processor at one time with EPTP X, it is recommended that software use the INVEPT instruction with the "single-context" INVEPT type and with EPTP X in the INVEPT descriptor before a VM entry on the same logical processor that enables EPT with EPTP X and either (a) the "virtualize APIC accesses" VM-execution control was changed from 0 to 1; or (b) the value of the APIC-access address was changed. In the nested case, the burden falls on L1, unless L0 enables EPT in vmcs02 when L1 doesn't enable EPT in vmcs12. Signed-off-by: Jim Mattson Signed-off-by: Radim Krčmář --- arch/x86/kvm/vmx.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index c66436530a93..e2f608283a5a 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -4024,6 +4024,12 @@ static void vmx_flush_tlb(struct kvm_vcpu *vcpu) __vmx_flush_tlb(vcpu, to_vmx(vcpu)->vpid); } +static void vmx_flush_tlb_ept_only(struct kvm_vcpu *vcpu) +{ + if (enable_ept) + vmx_flush_tlb(vcpu); +} + static void vmx_decache_cr0_guest_bits(struct kvm_vcpu *vcpu) { ulong cr0_guest_owned_bits = vcpu->arch.cr0_guest_owned_bits; @@ -8548,6 +8554,7 @@ static void vmx_set_virtual_x2apic_mode(struct kvm_vcpu *vcpu, bool set) } else { sec_exec_control &= ~SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE; sec_exec_control |= SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES; + vmx_flush_tlb_ept_only(vcpu); } vmcs_write32(SECONDARY_VM_EXEC_CONTROL, sec_exec_control); @@ -8573,8 +8580,10 @@ static void vmx_set_apic_access_page_addr(struct kvm_vcpu *vcpu, hpa_t hpa) */ if (!is_guest_mode(vcpu) || !nested_cpu_has2(get_vmcs12(&vmx->vcpu), - SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { vmcs_write64(APIC_ACCESS_ADDR, hpa); + vmx_flush_tlb_ept_only(vcpu); + } } static void vmx_hwapic_isr_update(struct kvm_vcpu *vcpu, int max_isr) @@ -10256,6 +10265,9 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, if (nested_cpu_has_ept(vmcs12)) { kvm_mmu_unload(vcpu); nested_ept_init_mmu_context(vcpu); + } else if (nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + vmx_flush_tlb_ept_only(vcpu); } /* @@ -11055,6 +11067,10 @@ static void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 exit_reason, vmx->nested.change_vmcs01_virtual_x2apic_mode = false; vmx_set_virtual_x2apic_mode(vcpu, vcpu->arch.apic_base & X2APIC_ENABLE); + } else if (!nested_cpu_has_ept(vmcs12) && + nested_cpu_has2(vmcs12, + SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) { + vmx_flush_tlb_ept_only(vcpu); } /* This is needed for same reason as it was needed in prepare_vmcs02 */ -- cgit v1.2.3 From 24dccf83a121b8a4ad5c2ad383a8184ef6c266ee Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Mar 2017 21:18:55 -0700 Subject: KVM: x86: correct async page present tracepoint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit After async pf setup successfully, there is a broadcast wakeup w/ special token 0xffffffff which tells vCPU that it should wake up all processes waiting for APFs though there is no real process waiting at the moment. The async page present tracepoint print prematurely and fails to catch the special token setup. This patch fixes it by moving the async page present tracepoint after the special token setup. Before patch: qemu-system-x86-8499 [006] ...1 5973.473292: kvm_async_pf_ready: token 0x0 gva 0x0 After patch: qemu-system-x86-8499 [006] ...1 5973.473292: kvm_async_pf_ready: token 0xffffffff gva 0x0 Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/x86.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index d30ff491ecc7..64697fe475c3 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8566,11 +8566,11 @@ void kvm_arch_async_page_present(struct kvm_vcpu *vcpu, { struct x86_exception fault; - trace_kvm_async_pf_ready(work->arch.token, work->gva); if (work->wakeup_all) work->arch.token = ~0; /* broadcast wakeup */ else kvm_del_async_pf_gfn(vcpu, work->arch.gfn); + trace_kvm_async_pf_ready(work->arch.token, work->gva); if ((vcpu->arch.apf.msr_val & KVM_ASYNC_PF_ENABLED) && !apf_put_user(vcpu, KVM_PV_REASON_PAGE_READY)) { -- cgit v1.2.3 From 63cb6d5f004ca44f9b8e562b6dd191f717a4960e Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Mon, 20 Mar 2017 21:18:53 -0700 Subject: KVM: nVMX: Fix nested VPID vmx exec control MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be reproduced by running kvm-unit-tests/vmx.flat on L0 w/ vpid disabled. Test suite: VPID Unhandled exception 6 #UD at ip 00000000004051a6 error_code=0000 rflags=00010047 cs=00000008 rax=0000000000000000 rcx=0000000000000001 rdx=0000000000000047 rbx=0000000000402f79 rbp=0000000000456240 rsi=0000000000000001 rdi=0000000000000000 r8=000000000000000a r9=00000000000003f8 r10=0000000080010011 r11=0000000000000000 r12=0000000000000003 r13=0000000000000708 r14=0000000000000000 r15=0000000000000000 cr0=0000000080010031 cr2=0000000000000000 cr3=0000000007fff000 cr4=0000000000002020 cr8=0000000000000000 STACK: @4051a6 40523e 400f7f 402059 40028f We should hide and forbid VPID in L1 if it is disabled on L0. However, nested VPID enable bit is set unconditionally during setup nested vmx exec controls though VPID is not exposed through nested VMX capablity. This patch fixes it by don't set nested VPID enable bit if it is disabled on L0. Cc: Paolo Bonzini Cc: Radim Krčmář Cc: stable@vger.kernel.org Fixes: 5c614b3583e (KVM: nVMX: nested VPID emulation) Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index e2f608283a5a..5a82766e4b07 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -2753,7 +2753,6 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) SECONDARY_EXEC_RDTSCP | SECONDARY_EXEC_DESC | SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE | - SECONDARY_EXEC_ENABLE_VPID | SECONDARY_EXEC_APIC_REGISTER_VIRT | SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY | SECONDARY_EXEC_WBINVD_EXITING | @@ -2781,10 +2780,12 @@ static void nested_vmx_setup_ctls_msrs(struct vcpu_vmx *vmx) * though it is treated as global context. The alternative is * not failing the single-context invvpid, and it is worse. */ - if (enable_vpid) + if (enable_vpid) { + vmx->nested.nested_vmx_secondary_ctls_high |= + SECONDARY_EXEC_ENABLE_VPID; vmx->nested.nested_vmx_vpid_caps = VMX_VPID_INVVPID_BIT | VMX_VPID_EXTENT_SUPPORTED_MASK; - else + } else vmx->nested.nested_vmx_vpid_caps = 0; if (enable_unrestricted_guest) -- cgit v1.2.3 From 08d839c4b134b8328ec42f2157a9ca4b93227c03 Mon Sep 17 00:00:00 2001 From: Wanpeng Li Date: Thu, 23 Mar 2017 05:30:08 -0700 Subject: KVM: VMX: Fix enable VPID conditions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This can be reproduced by running L2 on L1, and disable VPID on L0 if w/o commit "KVM: nVMX: Fix nested VPID vmx exec control", the L2 crash as below: KVM: entry failed, hardware error 0x7 EAX=00000000 EBX=00000000 ECX=00000000 EDX=000306c3 ESI=00000000 EDI=00000000 EBP=00000000 ESP=00000000 EIP=0000fff0 EFL=00000002 [-------] CPL=0 II=0 A20=1 SMM=0 HLT=0 ES =0000 00000000 0000ffff 00009300 CS =f000 ffff0000 0000ffff 00009b00 SS =0000 00000000 0000ffff 00009300 DS =0000 00000000 0000ffff 00009300 FS =0000 00000000 0000ffff 00009300 GS =0000 00000000 0000ffff 00009300 LDT=0000 00000000 0000ffff 00008200 TR =0000 00000000 0000ffff 00008b00 GDT= 00000000 0000ffff IDT= 00000000 0000ffff CR0=60000010 CR2=00000000 CR3=00000000 CR4=00000000 DR0=0000000000000000 DR1=0000000000000000 DR2=0000000000000000 DR3=0000000000000000 DR6=00000000ffff0ff0 DR7=0000000000000400 EFER=0000000000000000 Reference SDM 30.3 INVVPID: Protected Mode Exceptions - #UD - If not in VMX operation. - If the logical processor does not support VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=0). - If the logical processor supports VPIDs (IA32_VMX_PROCBASED_CTLS2[37]=1) but does not support the INVVPID instruction (IA32_VMX_EPT_VPID_CAP[32]=0). So we should check both VPID enable bit in vmx exec control and INVVPID support bit in vmx capability MSRs to enable VPID. This patch adds the guarantee to not enable VPID if either INVVPID or single-context/all-context invalidation is not exposed in vmx capability MSRs. Reviewed-by: David Hildenbrand Reviewed-by: Jim Mattson Cc: Paolo Bonzini Cc: Radim Krčmář Signed-off-by: Wanpeng Li Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 5a82766e4b07..cd1ba62878f3 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -1239,6 +1239,11 @@ static inline bool cpu_has_vmx_invvpid_global(void) return vmx_capability.vpid & VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT; } +static inline bool cpu_has_vmx_invvpid(void) +{ + return vmx_capability.vpid & VMX_VPID_INVVPID_BIT; +} + static inline bool cpu_has_vmx_ept(void) { return vmcs_config.cpu_based_2nd_exec_ctrl & @@ -6524,8 +6529,10 @@ static __init int hardware_setup(void) if (boot_cpu_has(X86_FEATURE_NX)) kvm_enable_efer_bits(EFER_NX); - if (!cpu_has_vmx_vpid()) + if (!cpu_has_vmx_vpid() || !cpu_has_vmx_invvpid() || + !(cpu_has_vmx_invvpid_single() || cpu_has_vmx_invvpid_global())) enable_vpid = 0; + if (!cpu_has_vmx_shadow_vmcs()) enable_shadow_vmcs = 0; if (enable_shadow_vmcs) -- cgit v1.2.3 From a46f60d76004965e5669dbf3fc21ef3bc3632eb4 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 24 Mar 2017 12:59:52 +0800 Subject: x86/mm/KASLR: Exclude EFI region from KASLR VA space randomization Currently KASLR is enabled on three regions: the direct mapping of physical memory, vamlloc and vmemmap. However the EFI region is also mistakenly included for VA space randomization because of misusing EFI_VA_START macro and assuming EFI_VA_START < EFI_VA_END. (This breaks kexec and possibly other things that rely on stable addresses.) The EFI region is reserved for EFI runtime services virtual mapping which should not be included in KASLR ranges. In Documentation/x86/x86_64/mm.txt, we can see: ffffffef00000000 - fffffffeffffffff (=64 GB) EFI region mapping space EFI uses the space from -4G to -64G thus EFI_VA_START > EFI_VA_END, Here EFI_VA_START = -4G, and EFI_VA_END = -64G. Changing EFI_VA_START to EFI_VA_END in mm/kaslr.c fixes this problem. Signed-off-by: Baoquan He Reviewed-by: Bhupesh Sharma Acked-by: Dave Young Acked-by: Thomas Garnier Cc: #4.8+ Cc: Andrew Morton Cc: Andy Lutomirski Cc: Ard Biesheuvel Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Kees Cook Cc: Linus Torvalds Cc: Masahiro Yamada Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/1490331592-31860-1-git-send-email-bhe@redhat.com Signed-off-by: Ingo Molnar --- arch/x86/mm/kaslr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/kaslr.c b/arch/x86/mm/kaslr.c index 887e57182716..aed206475aa7 100644 --- a/arch/x86/mm/kaslr.c +++ b/arch/x86/mm/kaslr.c @@ -48,7 +48,7 @@ static const unsigned long vaddr_start = __PAGE_OFFSET_BASE; #if defined(CONFIG_X86_ESPFIX64) static const unsigned long vaddr_end = ESPFIX_BASE_ADDR; #elif defined(CONFIG_EFI) -static const unsigned long vaddr_end = EFI_VA_START; +static const unsigned long vaddr_end = EFI_VA_END; #else static const unsigned long vaddr_end = __START_KERNEL_map; #endif @@ -105,7 +105,7 @@ void __init kernel_randomize_memory(void) */ BUILD_BUG_ON(vaddr_start >= vaddr_end); BUILD_BUG_ON(IS_ENABLED(CONFIG_X86_ESPFIX64) && - vaddr_end >= EFI_VA_START); + vaddr_end >= EFI_VA_END); BUILD_BUG_ON((IS_ENABLED(CONFIG_X86_ESPFIX64) || IS_ENABLED(CONFIG_EFI)) && vaddr_end >= __START_KERNEL_map); -- cgit v1.2.3 From 7ad658b693536741c37b16aeb07840a2ce75f5b9 Mon Sep 17 00:00:00 2001 From: Ladi Prosek Date: Thu, 23 Mar 2017 07:18:08 +0100 Subject: KVM: nVMX: fix nested EPT detection The nested_ept_enabled flag introduced in commit 7ca29de2136 was not computed correctly. We are interested only in L1's EPT state, not the the combined L0+L1 value. In particular, if L0 uses EPT but L1 does not, nested_ept_enabled must be false to make sure that PDPSTRs are loaded based on CR3 as usual, because the special case described in 26.3.2.4 Loading Page-Directory- Pointer-Table Entries does not apply. Fixes: 7ca29de21362 ("KVM: nVMX: fix CR3 load if L2 uses PAE paging and EPT") Cc: qemu-stable@nongnu.org Reported-by: Wanpeng Li Reviewed-by: David Hildenbrand Signed-off-by: Ladi Prosek Signed-off-by: Paolo Bonzini --- arch/x86/kvm/vmx.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index cd1ba62878f3..2ee00dbbbd51 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -9992,7 +9992,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, { struct vcpu_vmx *vmx = to_vmx(vcpu); u32 exec_control; - bool nested_ept_enabled = false; vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector); vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector); @@ -10139,8 +10138,6 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmcs12->guest_intr_status); } - nested_ept_enabled = (exec_control & SECONDARY_EXEC_ENABLE_EPT) != 0; - /* * Write an illegal value to APIC_ACCESS_ADDR. Later, * nested_get_vmcs12_pages will either fix it up or @@ -10303,7 +10300,7 @@ static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12, vmx_set_efer(vcpu, vcpu->arch.efer); /* Shadow page tables on either EPT or shadow page tables. */ - if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_ept_enabled, + if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12), entry_failure_code)) return 1; -- cgit v1.2.3 From 2beb6dad2e8f95d710159d5befb390e4f62ab5cf Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 27 Mar 2017 17:53:50 +0200 Subject: KVM: x86: cleanup the page tracking SRCU instance SRCU uses a delayed work item. Skip cleaning it up, and the result is use-after-free in the work item callbacks. Reported-by: Dmitry Vyukov Suggested-by: Dmitry Vyukov Cc: stable@vger.kernel.org Fixes: 0eb05bf290cfe8610d9680b49abef37febd1c38a Reviewed-by: Xiao Guangrong Signed-off-by: Paolo Bonzini --- arch/x86/include/asm/kvm_page_track.h | 1 + arch/x86/kvm/page_track.c | 8 ++++++++ arch/x86/kvm/x86.c | 1 + 3 files changed, 10 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/kvm_page_track.h b/arch/x86/include/asm/kvm_page_track.h index d74747b031ec..c4eda791f877 100644 --- a/arch/x86/include/asm/kvm_page_track.h +++ b/arch/x86/include/asm/kvm_page_track.h @@ -46,6 +46,7 @@ struct kvm_page_track_notifier_node { }; void kvm_page_track_init(struct kvm *kvm); +void kvm_page_track_cleanup(struct kvm *kvm); void kvm_page_track_free_memslot(struct kvm_memory_slot *free, struct kvm_memory_slot *dont); diff --git a/arch/x86/kvm/page_track.c b/arch/x86/kvm/page_track.c index 37942e419c32..60168cdd0546 100644 --- a/arch/x86/kvm/page_track.c +++ b/arch/x86/kvm/page_track.c @@ -160,6 +160,14 @@ bool kvm_page_track_is_active(struct kvm_vcpu *vcpu, gfn_t gfn, return !!ACCESS_ONCE(slot->arch.gfn_track[mode][index]); } +void kvm_page_track_cleanup(struct kvm *kvm) +{ + struct kvm_page_track_notifier_head *head; + + head = &kvm->arch.track_notifier_head; + cleanup_srcu_struct(&head->track_srcu); +} + void kvm_page_track_init(struct kvm *kvm) { struct kvm_page_track_notifier_head *head; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 64697fe475c3..ccbd45ecd41a 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -8158,6 +8158,7 @@ void kvm_arch_destroy_vm(struct kvm *kvm) kvm_free_vcpus(kvm); kvfree(rcu_dereference_check(kvm->arch.apic_map, 1)); kvm_mmu_uninit_vm(kvm); + kvm_page_track_cleanup(kvm); } void kvm_arch_free_memslot(struct kvm *kvm, struct kvm_memory_slot *free, -- cgit v1.2.3 From 3f135e57a4f76d24ae8d8a490314331f0ced40c5 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Thu, 16 Mar 2017 14:31:33 -0500 Subject: x86/build: Mostly disable '-maccumulate-outgoing-args' The GCC '-maccumulate-outgoing-args' flag is enabled for most configs, mostly because of issues which are no longer relevant. For most configs, and with most recent versions of GCC, it's no longer needed. Clarify which cases need it, and only enable it for those cases. Also produce a compile-time error for the ftrace graph + mcount + '-Os' case, which will otherwise cause runtime failures. The main benefit of '-maccumulate-outgoing-args' is that it prevents an ugly prologue for functions which have aligned stacks. But removing the option also has some benefits: more readable argument saves, smaller text size, and (presumably) slightly improved performance. Here are the object size savings for 32-bit and 64-bit defconfig kernels: text data bss dec hex filename 10006710 3543328 1773568 15323606 e9d1d6 vmlinux.x86-32.before 9706358 3547424 1773568 15027350 e54c96 vmlinux.x86-32.after text data bss dec hex filename 10652105 4537576 843776 16033457 f4a6b1 vmlinux.x86-64.before 10639629 4537576 843776 16020981 f475f5 vmlinux.x86-64.after That comes out to a 3% text size improvement on x86-32 and a 0.1% text size improvement on x86-64. Signed-off-by: Josh Poimboeuf Cc: Andrew Lutomirski Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: Linus Torvalds Cc: Pavel Machek Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170316193133.zrj6gug53766m6nn@treble Signed-off-by: Ingo Molnar --- arch/x86/Makefile | 35 +++++++++++++++++++++++++++++++---- arch/x86/Makefile_32.cpu | 18 ------------------ arch/x86/kernel/ftrace.c | 6 ++++++ scripts/Kbuild.include | 4 ++++ 4 files changed, 41 insertions(+), 22 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/Makefile b/arch/x86/Makefile index 2d449337a360..a94a4d10f2df 100644 --- a/arch/x86/Makefile +++ b/arch/x86/Makefile @@ -120,10 +120,6 @@ else # -funit-at-a-time shrinks the kernel .text considerably # unfortunately it makes reading oopses harder. KBUILD_CFLAGS += $(call cc-option,-funit-at-a-time) - - # this works around some issues with generating unwind tables in older gccs - # newer gccs do it by default - KBUILD_CFLAGS += $(call cc-option,-maccumulate-outgoing-args) endif ifdef CONFIG_X86_X32 @@ -147,6 +143,37 @@ ifeq ($(CONFIG_KMEMCHECK),y) KBUILD_CFLAGS += $(call cc-option,-fno-builtin-memcpy) endif +# +# If the function graph tracer is used with mcount instead of fentry, +# '-maccumulate-outgoing-args' is needed to prevent a GCC bug +# (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=42109) +# +ifdef CONFIG_FUNCTION_GRAPH_TRACER + ifndef CONFIG_HAVE_FENTRY + ACCUMULATE_OUTGOING_ARGS := 1 + else + ifeq ($(call cc-option-yn, -mfentry), n) + ACCUMULATE_OUTGOING_ARGS := 1 + endif + endif +endif + +# +# Jump labels need '-maccumulate-outgoing-args' for gcc < 4.5.2 to prevent a +# GCC bug (https://gcc.gnu.org/bugzilla/show_bug.cgi?id=46226). There's no way +# to test for this bug at compile-time because the test case needs to execute, +# which is a no-go for cross compilers. So check the GCC version instead. +# +ifdef CONFIG_JUMP_LABEL + ifneq ($(ACCUMULATE_OUTGOING_ARGS), 1) + ACCUMULATE_OUTGOING_ARGS = $(call cc-if-fullversion, -lt, 040502, 1) + endif +endif + +ifeq ($(ACCUMULATE_OUTGOING_ARGS), 1) + KBUILD_CFLAGS += -maccumulate-outgoing-args +endif + # Stackpointer is addressed different for 32 bit and 64 bit x86 sp-$(CONFIG_X86_32) := esp sp-$(CONFIG_X86_64) := rsp diff --git a/arch/x86/Makefile_32.cpu b/arch/x86/Makefile_32.cpu index 6647ed49c66c..a45eb15b7cf2 100644 --- a/arch/x86/Makefile_32.cpu +++ b/arch/x86/Makefile_32.cpu @@ -45,24 +45,6 @@ cflags-$(CONFIG_MGEODE_LX) += $(call cc-option,-march=geode,-march=pentium-mmx) # cpu entries cflags-$(CONFIG_X86_GENERIC) += $(call tune,generic,$(call tune,i686)) -# Work around the pentium-mmx code generator madness of gcc4.4.x which -# does stack alignment by generating horrible code _before_ the mcount -# prologue (push %ebp, mov %esp, %ebp) which breaks the function graph -# tracer assumptions. For i686, generic, core2 this is set by the -# compiler anyway -ifeq ($(CONFIG_FUNCTION_GRAPH_TRACER), y) -ADD_ACCUMULATE_OUTGOING_ARGS := y -endif - -# Work around to a bug with asm goto with first implementations of it -# in gcc causing gcc to mess up the push and pop of the stack in some -# uses of asm goto. -ifeq ($(CONFIG_JUMP_LABEL), y) -ADD_ACCUMULATE_OUTGOING_ARGS := y -endif - -cflags-$(ADD_ACCUMULATE_OUTGOING_ARGS) += $(call cc-option,-maccumulate-outgoing-args) - # Bug fix for binutils: this option is required in order to keep # binutils from generating NOPL instructions against our will. ifneq ($(CONFIG_X86_P6_NOP),y) diff --git a/arch/x86/kernel/ftrace.c b/arch/x86/kernel/ftrace.c index 8f3d9cf26ff9..cbd73eb42170 100644 --- a/arch/x86/kernel/ftrace.c +++ b/arch/x86/kernel/ftrace.c @@ -29,6 +29,12 @@ #include #include +#if defined(CONFIG_FUNCTION_GRAPH_TRACER) && \ + !defined(CC_USING_FENTRY) && \ + !defined(CONFIG_CC_OPTIMIZE_FOR_PERFORMANCE) +# error The following combination is not supported: ((compiler missing -mfentry) || (CONFIG_X86_32 and !CONFIG_DYNAMIC_FTRACE)) && CONFIG_FUNCTION_GRAPH_TRACER && CONFIG_CC_OPTIMIZE_FOR_SIZE +#endif + #ifdef CONFIG_DYNAMIC_FTRACE int ftrace_arch_code_modify_prepare(void) diff --git a/scripts/Kbuild.include b/scripts/Kbuild.include index d6ca649cb0e9..afe3fd3af1e4 100644 --- a/scripts/Kbuild.include +++ b/scripts/Kbuild.include @@ -148,6 +148,10 @@ cc-fullversion = $(shell $(CONFIG_SHELL) \ # Usage: EXTRA_CFLAGS += $(call cc-ifversion, -lt, 0402, -O1) cc-ifversion = $(shell [ $(cc-version) $(1) $(2) ] && echo $(3) || echo $(4)) +# cc-if-fullversion +# Usage: EXTRA_CFLAGS += $(call cc-if-fullversion, -lt, 040502, -O1) +cc-if-fullversion = $(shell [ $(cc-fullversion) $(1) $(2) ] && echo $(3) || echo $(4)) + # cc-ldoption # Usage: ldflags += $(call cc-ldoption, -Wl$(comma)--hash-style=both) cc-ldoption = $(call try-run,\ -- cgit v1.2.3 From 29f72ce3e4d18066ec75c79c857bee0618a3504b Mon Sep 17 00:00:00 2001 From: Yazen Ghannam Date: Thu, 30 Mar 2017 13:17:14 +0200 Subject: x86/mce/AMD: Give a name to MCA bank 3 when accessed with legacy MSRs MCA bank 3 is reserved on systems pre-Fam17h, so it didn't have a name. However, MCA bank 3 is defined on Fam17h systems and can be accessed using legacy MSRs. Without a name we get a stack trace on Fam17h systems when trying to register sysfs files for bank 3 on kernels that don't recognize Scalable MCA. Call MCA bank 3 "decode_unit" since this is what it represents on Fam17h. This will allow kernels without SMCA support to see this bank on Fam17h+ and prevent the stack trace. This will not affect older systems since this bank is reserved on them, i.e. it'll be ignored. Tested on AMD Fam15h and Fam17h systems. WARNING: CPU: 26 PID: 1 at lib/kobject.c:210 kobject_add_internal kobject: (ffff88085bb256c0): attempted to be registered with empty name! ... Call Trace: kobject_add_internal kobject_add kobject_create_and_add threshold_create_device threshold_init_device Signed-off-by: Yazen Ghannam Signed-off-by: Borislav Petkov Link: http://lkml.kernel.org/r/1490102285-3659-1-git-send-email-Yazen.Ghannam@amd.com Signed-off-by: Thomas Gleixner --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 524cc5780a77..6e4a047e4b68 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -60,7 +60,7 @@ static const char * const th_names[] = { "load_store", "insn_fetch", "combined_unit", - "", + "decode_unit", "northbridge", "execution_unit", }; -- cgit v1.2.3 From 6b1cc946ddfcfc17d66c7d02eafa14deeb183437 Mon Sep 17 00:00:00 2001 From: Zhengyi Shen Date: Wed, 29 Mar 2017 15:00:20 +0800 Subject: x86/boot: Include missing header file Sparse complains about missing forward declarations: arch/x86/boot/compressed/error.c:8:6: warning: symbol 'warn' was not declared. Should it be static? arch/x86/boot/compressed/error.c:15:6: warning: symbol 'error' was not declared. Should it be static? Include the missing header file. Signed-off-by: Zhengyi Shen Acked-by: Kess Cook Link: http://lkml.kernel.org/r/1490770820-24472-1-git-send-email-shenzhengyi@gmail.com Signed-off-by: Thomas Gleixner --- arch/x86/boot/compressed/error.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/boot/compressed/error.c b/arch/x86/boot/compressed/error.c index 6248740b68b5..31922023de49 100644 --- a/arch/x86/boot/compressed/error.c +++ b/arch/x86/boot/compressed/error.c @@ -4,6 +4,7 @@ * memcpy() and memmove() are defined for the compressed boot environment. */ #include "misc.h" +#include "error.h" void warn(char *m) { -- cgit v1.2.3 From 13a6798e4a03096b11bf402a063786a7be55d426 Mon Sep 17 00:00:00 2001 From: Mike Galbraith Date: Fri, 31 Mar 2017 15:12:12 -0700 Subject: kasan: do not sanitize kexec purgatory Fixes this: kexec: Undefined symbol: __asan_load8_noabort kexec-bzImage64: Loading purgatory failed Link: http://lkml.kernel.org/r/1489672155.4458.7.camel@gmx.de Signed-off-by: Mike Galbraith Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Dmitry Vyukov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- arch/x86/purgatory/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/purgatory/Makefile b/arch/x86/purgatory/Makefile index 555b9fa0ad43..7dbdb780264d 100644 --- a/arch/x86/purgatory/Makefile +++ b/arch/x86/purgatory/Makefile @@ -8,6 +8,7 @@ PURGATORY_OBJS = $(addprefix $(obj)/,$(purgatory-y)) LDFLAGS_purgatory.ro := -e purgatory_start -r --no-undefined -nostdlib -z nodefaultlib targets += purgatory.ro +KASAN_SANITIZE := n KCOV_INSTRUMENT := n # Default KBUILD_CFLAGS can have -pg option set when FTRACE is enabled. That -- cgit v1.2.3