From a13cff318cafbd493b8d5d679e5f3f761084c4fe Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 23 Oct 2014 12:07:14 +0200 Subject: s390/mm: recfactor global pgste updates Replace the s390 specific page table walker for the pgste updates with a call to the common code walk_page_range function. There are now two pte modification functions, one for the reset of the CMMA state and another one for the initialization of the storage keys. Signed-off-by: Dominik Dingel Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgalloc.h | 2 -- arch/s390/include/asm/pgtable.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/pgalloc.h b/arch/s390/include/asm/pgalloc.h index d39a31c3cdf2..e510b9460efa 100644 --- a/arch/s390/include/asm/pgalloc.h +++ b/arch/s390/include/asm/pgalloc.h @@ -22,8 +22,6 @@ unsigned long *page_table_alloc(struct mm_struct *); void page_table_free(struct mm_struct *, unsigned long *); void page_table_free_rcu(struct mmu_gather *, unsigned long *, unsigned long); -void page_table_reset_pgste(struct mm_struct *, unsigned long, unsigned long, - bool init_skey); int set_guest_storage_key(struct mm_struct *mm, unsigned long addr, unsigned long key, bool nq); diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 57c882761dea..4399be1aaeff 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1747,6 +1747,7 @@ extern int vmem_add_mapping(unsigned long start, unsigned long size); extern int vmem_remove_mapping(unsigned long start, unsigned long size); extern int s390_enable_sie(void); extern void s390_enable_skey(void); +extern void s390_reset_cmma(struct mm_struct *mm); /* * No page table caches to initialise -- cgit v1.2.3 From 2faee8ff9dc6f4bfe46f6d2d110add858140fb20 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 23 Oct 2014 12:08:38 +0200 Subject: s390/mm: prevent and break zero page mappings in case of storage keys As soon as storage keys are enabled we need to stop working on zero page mappings to prevent inconsistencies between storage keys and pgste. Otherwise following data corruption could happen: 1) guest enables storage key 2) guest sets storage key for not mapped page X -> change goes to PGSTE 3) guest reads from page X -> as X was not dirty before, the page will be zero page backed, storage key from PGSTE for X will go to storage key for zero page 4) guest sets storage key for not mapped page Y (same logic as above 5) guest reads from page Y -> as Y was not dirty before, the page will be zero page backed, storage key from PGSTE for Y will got to storage key for zero page overwriting storage key for X While holding the mmap sem, we are safe against changes on entries we already fixed, as every fault would need to take the mmap_sem (read). Other vCPUs executing storage key instructions will get a one time interception and be serialized also with mmap_sem. Signed-off-by: Dominik Dingel Reviewed-by: Paolo Bonzini Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 5 +++++ arch/s390/mm/pgtable.c | 13 ++++++++++++- 2 files changed, 17 insertions(+), 1 deletion(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 4399be1aaeff..df2e7f14ffb7 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -479,6 +479,11 @@ static inline int mm_has_pgste(struct mm_struct *mm) return 0; } +/* + * In the case that a guest uses storage keys + * faults should no longer be backed by zero pages + */ +#define mm_forbids_zeropage mm_use_skey static inline int mm_use_skey(struct mm_struct *mm) { #ifdef CONFIG_PGSTE diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 019afdf50b1a..0f1e9ff6bc12 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -1256,6 +1256,15 @@ static int __s390_enable_skey(pte_t *pte, unsigned long addr, pgste_t pgste; pgste = pgste_get_lock(pte); + /* + * Remove all zero page mappings, + * after establishing a policy to forbid zero page mappings + * following faults for that page will get fresh anonymous pages + */ + if (is_zero_pfn(pte_pfn(*pte))) { + ptep_flush_direct(walk->mm, addr, pte); + pte_val(*pte) = _PAGE_INVALID; + } /* Clear storage key */ pgste_val(pgste) &= ~(PGSTE_ACC_BITS | PGSTE_FP_BIT | PGSTE_GR_BIT | PGSTE_GC_BIT); @@ -1274,9 +1283,11 @@ void s390_enable_skey(void) down_write(&mm->mmap_sem); if (mm_use_skey(mm)) goto out_up; + + mm->context.use_skey = 1; + walk.mm = mm; walk_page_range(0, TASK_SIZE, &walk); - mm->context.use_skey = 1; out_up: up_write(&mm->mmap_sem); -- cgit v1.2.3 From 3ac8e38015d4fd1c12e4e048a01a9f059a2053a2 Mon Sep 17 00:00:00 2001 From: Dominik Dingel Date: Thu, 23 Oct 2014 12:09:17 +0200 Subject: s390/mm: disable KSM for storage key enabled pages When storage keys are enabled unmerge already merged pages and prevent new pages from being merged. Signed-off-by: Dominik Dingel Acked-by: Christian Borntraeger Reviewed-by: Paolo Bonzini Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 2 +- arch/s390/kvm/priv.c | 17 ++++++++++++----- arch/s390/mm/pgtable.c | 16 +++++++++++++++- 3 files changed, 28 insertions(+), 7 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index df2e7f14ffb7..00d460742e1e 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1751,7 +1751,7 @@ static inline pte_t mk_swap_pte(unsigned long type, unsigned long offset) extern int vmem_add_mapping(unsigned long start, unsigned long size); extern int vmem_remove_mapping(unsigned long start, unsigned long size); extern int s390_enable_sie(void); -extern void s390_enable_skey(void); +extern int s390_enable_skey(void); extern void s390_reset_cmma(struct mm_struct *mm); /* diff --git a/arch/s390/kvm/priv.c b/arch/s390/kvm/priv.c index 72bb2dd8b9cd..f47cb0c6d906 100644 --- a/arch/s390/kvm/priv.c +++ b/arch/s390/kvm/priv.c @@ -156,21 +156,25 @@ static int handle_store_cpu_address(struct kvm_vcpu *vcpu) return 0; } -static void __skey_check_enable(struct kvm_vcpu *vcpu) +static int __skey_check_enable(struct kvm_vcpu *vcpu) { + int rc = 0; if (!(vcpu->arch.sie_block->ictl & (ICTL_ISKE | ICTL_SSKE | ICTL_RRBE))) - return; + return rc; - s390_enable_skey(); + rc = s390_enable_skey(); trace_kvm_s390_skey_related_inst(vcpu); vcpu->arch.sie_block->ictl &= ~(ICTL_ISKE | ICTL_SSKE | ICTL_RRBE); + return rc; } static int handle_skey(struct kvm_vcpu *vcpu) { - __skey_check_enable(vcpu); + int rc = __skey_check_enable(vcpu); + if (rc) + return rc; vcpu->stat.instruction_storage_key++; if (vcpu->arch.sie_block->gpsw.mask & PSW_MASK_PSTATE) @@ -683,7 +687,10 @@ static int handle_pfmf(struct kvm_vcpu *vcpu) } if (vcpu->run->s.regs.gprs[reg1] & PFMF_SK) { - __skey_check_enable(vcpu); + int rc = __skey_check_enable(vcpu); + + if (rc) + return rc; if (set_guest_storage_key(current->mm, useraddr, vcpu->run->s.regs.gprs[reg1] & PFMF_KEY, vcpu->run->s.regs.gprs[reg1] & PFMF_NQ)) diff --git a/arch/s390/mm/pgtable.c b/arch/s390/mm/pgtable.c index 0f1e9ff6bc12..b1871d39e46e 100644 --- a/arch/s390/mm/pgtable.c +++ b/arch/s390/mm/pgtable.c @@ -18,6 +18,8 @@ #include #include #include +#include +#include #include #include @@ -1275,22 +1277,34 @@ static int __s390_enable_skey(pte_t *pte, unsigned long addr, return 0; } -void s390_enable_skey(void) +int s390_enable_skey(void) { struct mm_walk walk = { .pte_entry = __s390_enable_skey }; struct mm_struct *mm = current->mm; + struct vm_area_struct *vma; + int rc = 0; down_write(&mm->mmap_sem); if (mm_use_skey(mm)) goto out_up; mm->context.use_skey = 1; + for (vma = mm->mmap; vma; vma = vma->vm_next) { + if (ksm_madvise(vma, vma->vm_start, vma->vm_end, + MADV_UNMERGEABLE, &vma->vm_flags)) { + mm->context.use_skey = 0; + rc = -ENOMEM; + goto out_up; + } + } + mm->def_flags &= ~VM_MERGEABLE; walk.mm = mm; walk_page_range(0, TASK_SIZE, &walk); out_up: up_write(&mm->mmap_sem); + return rc; } EXPORT_SYMBOL_GPL(s390_enable_skey); -- cgit v1.2.3 From c933146a5e41e42ea3eb4f34fa02e201da3f068e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Wed, 15 Oct 2014 12:17:38 +0200 Subject: s390/ftrace,kprobes: allow to patch first instruction If the function tracer is enabled, allow to set kprobes on the first instruction of a function (which is the function trace caller): If no kprobe is set handling of enabling and disabling function tracing of a function simply patches the first instruction. Either it is a nop (right now it's an unconditional branch, which skips the mcount block), or it's a branch to the ftrace_caller() function. If a kprobe is being placed on a function tracer calling instruction we encode if we actually have a nop or branch in the remaining bytes after the breakpoint instruction (illegal opcode). This is possible, since the size of the instruction used for the nop and branch is six bytes, while the size of the breakpoint is only two bytes. Therefore the first two bytes contain the illegal opcode and the last four bytes contain either "0" for nop or "1" for branch. The kprobes code will then execute/simulate the correct instruction. Instruction patching for kprobes and function tracer is always done with stop_machine(). Therefore we don't have any races where an instruction is patched concurrently on a different cpu. Besides that also the program check handler which executes the function trace caller instruction won't be executed concurrently to any stop_machine() execution. This allows to keep full fault based kprobes handling which generates correct pt_regs contents automatically. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ftrace.h | 52 ++++++++++++++-- arch/s390/include/asm/kprobes.h | 1 + arch/s390/include/asm/lowcore.h | 4 +- arch/s390/include/asm/pgtable.h | 12 ++++ arch/s390/kernel/asm-offsets.c | 1 - arch/s390/kernel/early.c | 4 -- arch/s390/kernel/ftrace.c | 132 +++++++++++++++++++++++++--------------- arch/s390/kernel/kprobes.c | 92 ++++++++++++++++++++-------- arch/s390/kernel/mcount.S | 1 + arch/s390/kernel/setup.c | 2 - arch/s390/kernel/smp.c | 1 - scripts/recordmcount.c | 2 +- scripts/recordmcount.pl | 2 +- 13 files changed, 214 insertions(+), 92 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 3aef8afec336..785041f1dc77 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -1,25 +1,67 @@ #ifndef _ASM_S390_FTRACE_H #define _ASM_S390_FTRACE_H +#define ARCH_SUPPORTS_FTRACE_OPS 1 + +#define MCOUNT_INSN_SIZE 24 +#define MCOUNT_RETURN_FIXUP 18 + #ifndef __ASSEMBLY__ -extern void _mcount(void); +void _mcount(void); +void ftrace_caller(void); + extern char ftrace_graph_caller_end; +extern unsigned long ftrace_plt; struct dyn_arch_ftrace { }; -#define MCOUNT_ADDR ((long)_mcount) +#define MCOUNT_ADDR ((unsigned long)_mcount) +#define FTRACE_ADDR ((unsigned long)ftrace_caller) +#define KPROBE_ON_FTRACE_NOP 0 +#define KPROBE_ON_FTRACE_CALL 1 static inline unsigned long ftrace_call_adjust(unsigned long addr) { return addr; } -#endif /* __ASSEMBLY__ */ +struct ftrace_insn { + u16 opc; + s32 disp; +} __packed; -#define MCOUNT_INSN_SIZE 18 +static inline void ftrace_generate_nop_insn(struct ftrace_insn *insn) +{ +#ifdef CONFIG_FUNCTION_TRACER + /* jg .+24 */ + insn->opc = 0xc0f4; + insn->disp = MCOUNT_INSN_SIZE / 2; +#endif +} -#define ARCH_SUPPORTS_FTRACE_OPS 1 +static inline int is_ftrace_nop(struct ftrace_insn *insn) +{ +#ifdef CONFIG_FUNCTION_TRACER + if (insn->disp == MCOUNT_INSN_SIZE / 2) + return 1; +#endif + return 0; +} + +static inline void ftrace_generate_call_insn(struct ftrace_insn *insn, + unsigned long ip) +{ +#ifdef CONFIG_FUNCTION_TRACER + unsigned long target; + /* brasl r0,ftrace_caller */ + target = is_module_addr((void *) ip) ? ftrace_plt : FTRACE_ADDR; + insn->opc = 0xc005; + insn->disp = (target - ip) / 2; +#endif +} + +#endif /* __ASSEMBLY__ */ #endif /* _ASM_S390_FTRACE_H */ diff --git a/arch/s390/include/asm/kprobes.h b/arch/s390/include/asm/kprobes.h index 98629173ce3b..b47ad3b642cc 100644 --- a/arch/s390/include/asm/kprobes.h +++ b/arch/s390/include/asm/kprobes.h @@ -60,6 +60,7 @@ typedef u16 kprobe_opcode_t; struct arch_specific_insn { /* copy of original instruction */ kprobe_opcode_t *insn; + unsigned int is_ftrace_insn : 1; }; struct prev_kprobe { diff --git a/arch/s390/include/asm/lowcore.h b/arch/s390/include/asm/lowcore.h index 6cc51fe84410..34fbcac61133 100644 --- a/arch/s390/include/asm/lowcore.h +++ b/arch/s390/include/asm/lowcore.h @@ -147,7 +147,7 @@ struct _lowcore { __u32 softirq_pending; /* 0x02ec */ __u32 percpu_offset; /* 0x02f0 */ __u32 machine_flags; /* 0x02f4 */ - __u32 ftrace_func; /* 0x02f8 */ + __u8 pad_0x02f8[0x02fc-0x02f8]; /* 0x02f8 */ __u32 spinlock_lockval; /* 0x02fc */ __u8 pad_0x0300[0x0e00-0x0300]; /* 0x0300 */ @@ -297,7 +297,7 @@ struct _lowcore { __u64 percpu_offset; /* 0x0378 */ __u64 vdso_per_cpu_data; /* 0x0380 */ __u64 machine_flags; /* 0x0388 */ - __u64 ftrace_func; /* 0x0390 */ + __u8 pad_0x0390[0x0398-0x0390]; /* 0x0390 */ __u64 gmap; /* 0x0398 */ __u32 spinlock_lockval; /* 0x03a0 */ __u8 pad_0x03a0[0x0400-0x03a4]; /* 0x03a4 */ diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 00d460742e1e..5ef1a266936a 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -133,6 +133,18 @@ extern unsigned long MODULES_END; #define MODULES_LEN (1UL << 31) #endif +static inline int is_module_addr(void *addr) +{ +#ifdef CONFIG_64BIT + BUILD_BUG_ON(MODULES_LEN > (1UL << 31)); + if (addr < (void *)MODULES_VADDR) + return 0; + if (addr > (void *)MODULES_END) + return 0; +#endif + return 1; +} + /* * A 31 bit pagetable entry of S390 has following format: * | PFRA | | OS | diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index ef279a136801..f3a78337ca86 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -156,7 +156,6 @@ int main(void) DEFINE(__LC_INT_CLOCK, offsetof(struct _lowcore, int_clock)); DEFINE(__LC_MCCK_CLOCK, offsetof(struct _lowcore, mcck_clock)); DEFINE(__LC_MACHINE_FLAGS, offsetof(struct _lowcore, machine_flags)); - DEFINE(__LC_FTRACE_FUNC, offsetof(struct _lowcore, ftrace_func)); DEFINE(__LC_DUMP_REIPL, offsetof(struct _lowcore, ipib)); BLANK(); DEFINE(__LC_CPU_TIMER_SAVE_AREA, offsetof(struct _lowcore, cpu_timer_save_area)); diff --git a/arch/s390/kernel/early.c b/arch/s390/kernel/early.c index cef2879edff3..302ac1f7f8e7 100644 --- a/arch/s390/kernel/early.c +++ b/arch/s390/kernel/early.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -490,8 +489,5 @@ void __init startup_init(void) detect_machine_facilities(); setup_topology(); sclp_early_detect(); -#ifdef CONFIG_DYNAMIC_FTRACE - S390_lowcore.ftrace_func = (unsigned long)ftrace_caller; -#endif lockdep_on(); } diff --git a/arch/s390/kernel/ftrace.c b/arch/s390/kernel/ftrace.c index 51d14fe5eb9a..5744d25c1d33 100644 --- a/arch/s390/kernel/ftrace.c +++ b/arch/s390/kernel/ftrace.c @@ -7,6 +7,7 @@ * Martin Schwidefsky */ +#include #include #include #include @@ -15,60 +16,39 @@ #include #include #include +#include #include "entry.h" -void mcount_replace_code(void); -void ftrace_disable_code(void); -void ftrace_enable_insn(void); - /* * The mcount code looks like this: * stg %r14,8(%r15) # offset 0 * larl %r1,<&counter> # offset 6 * brasl %r14,_mcount # offset 12 * lg %r14,8(%r15) # offset 18 - * Total length is 24 bytes. The complete mcount block initially gets replaced - * by ftrace_make_nop. Subsequent calls to ftrace_make_call / ftrace_make_nop - * only patch the jg/lg instruction within the block. - * Note: we do not patch the first instruction to an unconditional branch, - * since that would break kprobes/jprobes. It is easier to leave the larl - * instruction in and only modify the second instruction. + * Total length is 24 bytes. Only the first instruction will be patched + * by ftrace_make_call / ftrace_make_nop. * The enabled ftrace code block looks like this: - * larl %r0,.+24 # offset 0 - * > lg %r1,__LC_FTRACE_FUNC # offset 6 - * br %r1 # offset 12 - * brcl 0,0 # offset 14 - * brc 0,0 # offset 20 + * > brasl %r0,ftrace_caller # offset 0 + * larl %r1,<&counter> # offset 6 + * brasl %r14,_mcount # offset 12 + * lg %r14,8(%r15) # offset 18 * The ftrace function gets called with a non-standard C function call ABI * where r0 contains the return address. It is also expected that the called * function only clobbers r0 and r1, but restores r2-r15. + * For module code we can't directly jump to ftrace caller, but need a + * trampoline (ftrace_plt), which clobbers also r1. * The return point of the ftrace function has offset 24, so execution * continues behind the mcount block. - * larl %r0,.+24 # offset 0 - * > jg .+18 # offset 6 - * br %r1 # offset 12 - * brcl 0,0 # offset 14 - * brc 0,0 # offset 20 + * The disabled ftrace code block looks like this: + * > jg .+24 # offset 0 + * larl %r1,<&counter> # offset 6 + * brasl %r14,_mcount # offset 12 + * lg %r14,8(%r15) # offset 18 * The jg instruction branches to offset 24 to skip as many instructions * as possible. */ -asm( - " .align 4\n" - "mcount_replace_code:\n" - " larl %r0,0f\n" - "ftrace_disable_code:\n" - " jg 0f\n" - " br %r1\n" - " brcl 0,0\n" - " brc 0,0\n" - "0:\n" - " .align 4\n" - "ftrace_enable_insn:\n" - " lg %r1,"__stringify(__LC_FTRACE_FUNC)"\n"); - -#define MCOUNT_BLOCK_SIZE 24 -#define MCOUNT_INSN_OFFSET 6 -#define FTRACE_INSN_SIZE 6 + +unsigned long ftrace_plt; int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, unsigned long addr) @@ -79,24 +59,62 @@ int ftrace_modify_call(struct dyn_ftrace *rec, unsigned long old_addr, int ftrace_make_nop(struct module *mod, struct dyn_ftrace *rec, unsigned long addr) { - /* Initial replacement of the whole mcount block */ - if (addr == MCOUNT_ADDR) { - if (probe_kernel_write((void *) rec->ip - MCOUNT_INSN_OFFSET, - mcount_replace_code, - MCOUNT_BLOCK_SIZE)) - return -EPERM; - return 0; + struct ftrace_insn insn; + unsigned short op; + void *from, *to; + size_t size; + + ftrace_generate_nop_insn(&insn); + size = sizeof(insn); + from = &insn; + to = (void *) rec->ip; + if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op))) + return -EFAULT; + /* + * If we find a breakpoint instruction, a kprobe has been placed + * at the beginning of the function. We write the constant + * KPROBE_ON_FTRACE_NOP into the remaining four bytes of the original + * instruction so that the kprobes handler can execute a nop, if it + * reaches this breakpoint. + */ + if (op == BREAKPOINT_INSTRUCTION) { + size -= 2; + from += 2; + to += 2; + insn.disp = KPROBE_ON_FTRACE_NOP; } - if (probe_kernel_write((void *) rec->ip, ftrace_disable_code, - MCOUNT_INSN_SIZE)) + if (probe_kernel_write(to, from, size)) return -EPERM; return 0; } int ftrace_make_call(struct dyn_ftrace *rec, unsigned long addr) { - if (probe_kernel_write((void *) rec->ip, ftrace_enable_insn, - FTRACE_INSN_SIZE)) + struct ftrace_insn insn; + unsigned short op; + void *from, *to; + size_t size; + + ftrace_generate_call_insn(&insn, rec->ip); + size = sizeof(insn); + from = &insn; + to = (void *) rec->ip; + if (probe_kernel_read(&op, (void *) rec->ip, sizeof(op))) + return -EFAULT; + /* + * If we find a breakpoint instruction, a kprobe has been placed + * at the beginning of the function. We write the constant + * KPROBE_ON_FTRACE_CALL into the remaining four bytes of the original + * instruction so that the kprobes handler can execute a brasl if it + * reaches this breakpoint. + */ + if (op == BREAKPOINT_INSTRUCTION) { + size -= 2; + from += 2; + to += 2; + insn.disp = KPROBE_ON_FTRACE_CALL; + } + if (probe_kernel_write(to, from, size)) return -EPERM; return 0; } @@ -111,6 +129,24 @@ int __init ftrace_dyn_arch_init(void) return 0; } +static int __init ftrace_plt_init(void) +{ + unsigned int *ip; + + ftrace_plt = (unsigned long) module_alloc(PAGE_SIZE); + if (!ftrace_plt) + panic("cannot allocate ftrace plt\n"); + ip = (unsigned int *) ftrace_plt; + ip[0] = 0x0d10e310; /* basr 1,0; lg 1,10(1); br 1 */ + ip[1] = 0x100a0004; + ip[2] = 0x07f10000; + ip[3] = FTRACE_ADDR >> 32; + ip[4] = FTRACE_ADDR & 0xffffffff; + set_memory_ro(ftrace_plt, 1); + return 0; +} +device_initcall(ftrace_plt_init); + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* * Hook the return address and push it in the stack of return addresses diff --git a/arch/s390/kernel/kprobes.c b/arch/s390/kernel/kprobes.c index 014d4729b134..d6716c29b7f8 100644 --- a/arch/s390/kernel/kprobes.c +++ b/arch/s390/kernel/kprobes.c @@ -29,6 +29,7 @@ #include #include #include +#include #include #include #include @@ -60,10 +61,21 @@ struct kprobe_insn_cache kprobe_dmainsn_slots = { static void __kprobes copy_instruction(struct kprobe *p) { + unsigned long ip = (unsigned long) p->addr; s64 disp, new_disp; u64 addr, new_addr; - memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8)); + if (ftrace_location(ip) == ip) { + /* + * If kprobes patches the instruction that is morphed by + * ftrace make sure that kprobes always sees the branch + * "jg .+24" that skips the mcount block + */ + ftrace_generate_nop_insn((struct ftrace_insn *)p->ainsn.insn); + p->ainsn.is_ftrace_insn = 1; + } else + memcpy(p->ainsn.insn, p->addr, insn_length(p->opcode >> 8)); + p->opcode = p->ainsn.insn[0]; if (!probe_is_insn_relative_long(p->ainsn.insn)) return; /* @@ -85,18 +97,6 @@ static inline int is_kernel_addr(void *addr) return addr < (void *)_end; } -static inline int is_module_addr(void *addr) -{ -#ifdef CONFIG_64BIT - BUILD_BUG_ON(MODULES_LEN > (1UL << 31)); - if (addr < (void *)MODULES_VADDR) - return 0; - if (addr > (void *)MODULES_END) - return 0; -#endif - return 1; -} - static int __kprobes s390_get_insn_slot(struct kprobe *p) { /* @@ -132,43 +132,63 @@ int __kprobes arch_prepare_kprobe(struct kprobe *p) return -EINVAL; if (s390_get_insn_slot(p)) return -ENOMEM; - p->opcode = *p->addr; copy_instruction(p); return 0; } -struct ins_replace_args { - kprobe_opcode_t *ptr; - kprobe_opcode_t opcode; +int arch_check_ftrace_location(struct kprobe *p) +{ + return 0; +} + +struct swap_insn_args { + struct kprobe *p; + unsigned int arm_kprobe : 1; }; -static int __kprobes swap_instruction(void *aref) +static int __kprobes swap_instruction(void *data) { struct kprobe_ctlblk *kcb = get_kprobe_ctlblk(); unsigned long status = kcb->kprobe_status; - struct ins_replace_args *args = aref; - + struct swap_insn_args *args = data; + struct ftrace_insn new_insn, *insn; + struct kprobe *p = args->p; + size_t len; + + new_insn.opc = args->arm_kprobe ? BREAKPOINT_INSTRUCTION : p->opcode; + len = sizeof(new_insn.opc); + if (!p->ainsn.is_ftrace_insn) + goto skip_ftrace; + len = sizeof(new_insn); + insn = (struct ftrace_insn *) p->addr; + if (args->arm_kprobe) { + if (is_ftrace_nop(insn)) + new_insn.disp = KPROBE_ON_FTRACE_NOP; + else + new_insn.disp = KPROBE_ON_FTRACE_CALL; + } else { + ftrace_generate_call_insn(&new_insn, (unsigned long)p->addr); + if (insn->disp == KPROBE_ON_FTRACE_NOP) + ftrace_generate_nop_insn(&new_insn); + } +skip_ftrace: kcb->kprobe_status = KPROBE_SWAP_INST; - probe_kernel_write(args->ptr, &args->opcode, sizeof(args->opcode)); + probe_kernel_write(p->addr, &new_insn, len); kcb->kprobe_status = status; return 0; } void __kprobes arch_arm_kprobe(struct kprobe *p) { - struct ins_replace_args args; + struct swap_insn_args args = {.p = p, .arm_kprobe = 1}; - args.ptr = p->addr; - args.opcode = BREAKPOINT_INSTRUCTION; stop_machine(swap_instruction, &args, NULL); } void __kprobes arch_disarm_kprobe(struct kprobe *p) { - struct ins_replace_args args; + struct swap_insn_args args = {.p = p, .arm_kprobe = 0}; - args.ptr = p->addr; - args.opcode = p->opcode; stop_machine(swap_instruction, &args, NULL); } @@ -459,6 +479,24 @@ static void __kprobes resume_execution(struct kprobe *p, struct pt_regs *regs) unsigned long ip = regs->psw.addr & PSW_ADDR_INSN; int fixup = probe_get_fixup_type(p->ainsn.insn); + /* Check if the kprobes location is an enabled ftrace caller */ + if (p->ainsn.is_ftrace_insn) { + struct ftrace_insn *insn = (struct ftrace_insn *) p->addr; + struct ftrace_insn call_insn; + + ftrace_generate_call_insn(&call_insn, (unsigned long) p->addr); + /* + * A kprobe on an enabled ftrace call site actually single + * stepped an unconditional branch (ftrace nop equivalent). + * Now we need to fixup things and pretend that a brasl r0,... + * was executed instead. + */ + if (insn->disp == KPROBE_ON_FTRACE_CALL) { + ip += call_insn.disp * 2 - MCOUNT_INSN_SIZE; + regs->gprs[0] = (unsigned long)p->addr + sizeof(*insn); + } + } + if (fixup & FIXUP_PSW_NORMAL) ip += (unsigned long) p->addr - (unsigned long) p->ainsn.insn; diff --git a/arch/s390/kernel/mcount.S b/arch/s390/kernel/mcount.S index 4300ea374826..b6dfc5bfcb89 100644 --- a/arch/s390/kernel/mcount.S +++ b/arch/s390/kernel/mcount.S @@ -27,6 +27,7 @@ ENTRY(ftrace_caller) .globl ftrace_regs_caller .set ftrace_regs_caller,ftrace_caller lgr %r1,%r15 + aghi %r0,MCOUNT_RETURN_FIXUP aghi %r15,-STACK_FRAME_SIZE stg %r1,__SF_BACKCHAIN(%r15) stg %r1,(STACK_PTREGS_GPRS+15*8)(%r15) diff --git a/arch/s390/kernel/setup.c b/arch/s390/kernel/setup.c index e80d9ff9a56d..4e532c67832f 100644 --- a/arch/s390/kernel/setup.c +++ b/arch/s390/kernel/setup.c @@ -41,7 +41,6 @@ #include #include #include -#include #include #include #include @@ -356,7 +355,6 @@ static void __init setup_lowcore(void) lc->steal_timer = S390_lowcore.steal_timer; lc->last_update_timer = S390_lowcore.last_update_timer; lc->last_update_clock = S390_lowcore.last_update_clock; - lc->ftrace_func = S390_lowcore.ftrace_func; restart_stack = __alloc_bootmem(ASYNC_SIZE, ASYNC_SIZE, 0); restart_stack += ASYNC_SIZE; diff --git a/arch/s390/kernel/smp.c b/arch/s390/kernel/smp.c index 6fd9e60101f1..0b499f5cbe19 100644 --- a/arch/s390/kernel/smp.c +++ b/arch/s390/kernel/smp.c @@ -236,7 +236,6 @@ static void pcpu_prepare_secondary(struct pcpu *pcpu, int cpu) lc->percpu_offset = __per_cpu_offset[cpu]; lc->kernel_asce = S390_lowcore.kernel_asce; lc->machine_flags = S390_lowcore.machine_flags; - lc->ftrace_func = S390_lowcore.ftrace_func; lc->user_timer = lc->system_timer = lc->steal_timer = 0; __ctl_store(lc->cregs_save_area, 0, 15); save_access_regs((unsigned int *) lc->access_regs_save_area); diff --git a/scripts/recordmcount.c b/scripts/recordmcount.c index 001facfa5b74..3d1984e59a30 100644 --- a/scripts/recordmcount.c +++ b/scripts/recordmcount.c @@ -404,7 +404,7 @@ do_file(char const *const fname) } if (w2(ghdr->e_machine) == EM_S390) { reltype = R_390_64; - mcount_adjust_64 = -8; + mcount_adjust_64 = -14; } if (w2(ghdr->e_machine) == EM_MIPS) { reltype = R_MIPS_64; diff --git a/scripts/recordmcount.pl b/scripts/recordmcount.pl index d4b665610d67..56ea99a12ab7 100755 --- a/scripts/recordmcount.pl +++ b/scripts/recordmcount.pl @@ -243,7 +243,7 @@ if ($arch eq "x86_64") { } elsif ($arch eq "s390" && $bits == 64) { $mcount_regex = "^\\s*([0-9a-fA-F]+):\\s*R_390_(PC|PLT)32DBL\\s+_mcount\\+0x2\$"; - $mcount_adjust = -8; + $mcount_adjust = -14; $alignment = 8; $type = ".quad"; $ld .= " -m elf64_s390"; -- cgit v1.2.3 From fcbe08d66f57c368e77ca729dd01e6b539ffb3ff Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Fri, 24 Oct 2014 10:52:29 +0200 Subject: s390/mm: pmdp_get_and_clear_full optimization Analog to ptep_get_and_clear_full define a variant of the pmpd_get_and_clear primitive which gets the full hint from the mmu_gather struct. This allows s390 to avoid a costly instruction when destroying an address space. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pgtable.h | 13 +++++++++++++ include/asm-generic/pgtable.h | 11 +++++++++++ mm/huge_memory.c | 3 ++- 3 files changed, 26 insertions(+), 1 deletion(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/pgtable.h b/arch/s390/include/asm/pgtable.h index 5ef1a266936a..5e102422c9ab 100644 --- a/arch/s390/include/asm/pgtable.h +++ b/arch/s390/include/asm/pgtable.h @@ -1651,6 +1651,19 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, return pmd; } +#define __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL +static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm, + unsigned long address, + pmd_t *pmdp, int full) +{ + pmd_t pmd = *pmdp; + + if (!full) + pmdp_flush_lazy(mm, address, pmdp); + pmd_clear(pmdp); + return pmd; +} + #define __HAVE_ARCH_PMDP_CLEAR_FLUSH static inline pmd_t pmdp_clear_flush(struct vm_area_struct *vma, unsigned long address, pmd_t *pmdp) diff --git a/include/asm-generic/pgtable.h b/include/asm-generic/pgtable.h index 752e30d63904..177d5973b132 100644 --- a/include/asm-generic/pgtable.h +++ b/include/asm-generic/pgtable.h @@ -103,6 +103,17 @@ static inline pmd_t pmdp_get_and_clear(struct mm_struct *mm, #endif /* CONFIG_TRANSPARENT_HUGEPAGE */ #endif +#ifndef __HAVE_ARCH_PMDP_GET_AND_CLEAR_FULL +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +static inline pmd_t pmdp_get_and_clear_full(struct mm_struct *mm, + unsigned long address, pmd_t *pmdp, + int full) +{ + return pmdp_get_and_clear(mm, address, pmdp); +} +#endif /* CONFIG_TRANSPARENT_HUGEPAGE */ +#endif + #ifndef __HAVE_ARCH_PTEP_GET_AND_CLEAR_FULL static inline pte_t ptep_get_and_clear_full(struct mm_struct *mm, unsigned long address, pte_t *ptep, diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 7e9c15cb93a9..6a37f1b2ed1e 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -1400,7 +1400,8 @@ int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, * pgtable_trans_huge_withdraw after finishing pmdp related * operations. */ - orig_pmd = pmdp_get_and_clear(tlb->mm, addr, pmd); + orig_pmd = pmdp_get_and_clear_full(tlb->mm, addr, pmd, + tlb->fullmm); tlb_remove_pmd_tlb_entry(tlb, pmd, addr); pgtable = pgtable_trans_huge_withdraw(tlb->mm, pmd); if (is_huge_zero_pmd(orig_pmd)) { -- cgit v1.2.3 From f318a1229bd8d377282ddb37158812073701a22b Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Wed, 29 Oct 2014 12:50:31 +0100 Subject: s390/cmpxchg: use compiler builtins The kernel build for s390 fails for gcc compilers with version 3.x, set the minimum required version of gcc to version 4.3. As the atomic builtins are available with all gcc 4.x compilers, use the __sync_val_compare_and_swap and __sync_bool_compare_and_swap functions to replace the complex macro and inline assembler magic in include/asm/cmpxchg.h. The compiler can just-do-it and generates better code with the builtins. While we are at it use __sync_bool_compare_and_swap for the _raw_compare_and_swap function in the spinlock code as well. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cmpxchg.h | 240 +++------------------------------------ arch/s390/include/asm/spinlock.h | 9 +- arch/s390/kernel/asm-offsets.c | 4 +- 3 files changed, 19 insertions(+), 234 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/cmpxchg.h b/arch/s390/include/asm/cmpxchg.h index 4236408070e5..6259895fcd97 100644 --- a/arch/s390/include/asm/cmpxchg.h +++ b/arch/s390/include/asm/cmpxchg.h @@ -11,200 +11,28 @@ #include #include -extern void __xchg_called_with_bad_pointer(void); - -static inline unsigned long __xchg(unsigned long x, void *ptr, int size) -{ - unsigned long addr, old; - int shift; - - switch (size) { - case 1: - addr = (unsigned long) ptr; - shift = (3 ^ (addr & 3)) << 3; - addr ^= addr & 3; - asm volatile( - " l %0,%4\n" - "0: lr 0,%0\n" - " nr 0,%3\n" - " or 0,%2\n" - " cs %0,0,%4\n" - " jl 0b\n" - : "=&d" (old), "=Q" (*(int *) addr) - : "d" ((x & 0xff) << shift), "d" (~(0xff << shift)), - "Q" (*(int *) addr) : "memory", "cc", "0"); - return old >> shift; - case 2: - addr = (unsigned long) ptr; - shift = (2 ^ (addr & 2)) << 3; - addr ^= addr & 2; - asm volatile( - " l %0,%4\n" - "0: lr 0,%0\n" - " nr 0,%3\n" - " or 0,%2\n" - " cs %0,0,%4\n" - " jl 0b\n" - : "=&d" (old), "=Q" (*(int *) addr) - : "d" ((x & 0xffff) << shift), "d" (~(0xffff << shift)), - "Q" (*(int *) addr) : "memory", "cc", "0"); - return old >> shift; - case 4: - asm volatile( - " l %0,%3\n" - "0: cs %0,%2,%3\n" - " jl 0b\n" - : "=&d" (old), "=Q" (*(int *) ptr) - : "d" (x), "Q" (*(int *) ptr) - : "memory", "cc"); - return old; -#ifdef CONFIG_64BIT - case 8: - asm volatile( - " lg %0,%3\n" - "0: csg %0,%2,%3\n" - " jl 0b\n" - : "=&d" (old), "=m" (*(long *) ptr) - : "d" (x), "Q" (*(long *) ptr) - : "memory", "cc"); - return old; -#endif /* CONFIG_64BIT */ - } - __xchg_called_with_bad_pointer(); - return x; -} - -#define xchg(ptr, x) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __xchg((unsigned long)(x), (void *)(ptr), sizeof(*(ptr)));\ - __ret; \ +#define cmpxchg(ptr, o, n) \ +({ \ + __typeof__(*(ptr)) __o = (o); \ + __typeof__(*(ptr)) __n = (n); \ + (__typeof__(*(ptr))) __sync_val_compare_and_swap((ptr),__o,__n);\ }) -/* - * Atomic compare and exchange. Compare OLD with MEM, if identical, - * store NEW in MEM. Return the initial value in MEM. Success is - * indicated by comparing RETURN with OLD. - */ - -#define __HAVE_ARCH_CMPXCHG - -extern void __cmpxchg_called_with_bad_pointer(void); - -static inline unsigned long __cmpxchg(void *ptr, unsigned long old, - unsigned long new, int size) -{ - unsigned long addr, prev, tmp; - int shift; - - switch (size) { - case 1: - addr = (unsigned long) ptr; - shift = (3 ^ (addr & 3)) << 3; - addr ^= addr & 3; - asm volatile( - " l %0,%2\n" - "0: nr %0,%5\n" - " lr %1,%0\n" - " or %0,%3\n" - " or %1,%4\n" - " cs %0,%1,%2\n" - " jnl 1f\n" - " xr %1,%0\n" - " nr %1,%5\n" - " jnz 0b\n" - "1:" - : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr) - : "d" ((old & 0xff) << shift), - "d" ((new & 0xff) << shift), - "d" (~(0xff << shift)) - : "memory", "cc"); - return prev >> shift; - case 2: - addr = (unsigned long) ptr; - shift = (2 ^ (addr & 2)) << 3; - addr ^= addr & 2; - asm volatile( - " l %0,%2\n" - "0: nr %0,%5\n" - " lr %1,%0\n" - " or %0,%3\n" - " or %1,%4\n" - " cs %0,%1,%2\n" - " jnl 1f\n" - " xr %1,%0\n" - " nr %1,%5\n" - " jnz 0b\n" - "1:" - : "=&d" (prev), "=&d" (tmp), "+Q" (*(int *) addr) - : "d" ((old & 0xffff) << shift), - "d" ((new & 0xffff) << shift), - "d" (~(0xffff << shift)) - : "memory", "cc"); - return prev >> shift; - case 4: - asm volatile( - " cs %0,%3,%1\n" - : "=&d" (prev), "=Q" (*(int *) ptr) - : "0" (old), "d" (new), "Q" (*(int *) ptr) - : "memory", "cc"); - return prev; -#ifdef CONFIG_64BIT - case 8: - asm volatile( - " csg %0,%3,%1\n" - : "=&d" (prev), "=Q" (*(long *) ptr) - : "0" (old), "d" (new), "Q" (*(long *) ptr) - : "memory", "cc"); - return prev; -#endif /* CONFIG_64BIT */ - } - __cmpxchg_called_with_bad_pointer(); - return old; -} - -#define cmpxchg(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg((ptr), (unsigned long)(o), (unsigned long)(n), \ - sizeof(*(ptr))); \ - __ret; \ -}) +#define cmpxchg64 cmpxchg +#define cmpxchg_local cmpxchg +#define cmpxchg64_local cmpxchg -#ifdef CONFIG_64BIT -#define cmpxchg64(ptr, o, n) \ +#define xchg(ptr, x) \ ({ \ - cmpxchg((ptr), (o), (n)); \ + __typeof__(ptr) __ptr = (ptr); \ + __typeof__(*(ptr)) __old; \ + do { \ + __old = *__ptr; \ + } while (!__sync_bool_compare_and_swap(__ptr, __old, x)); \ + __old; \ }) -#else /* CONFIG_64BIT */ -static inline unsigned long long __cmpxchg64(void *ptr, - unsigned long long old, - unsigned long long new) -{ - register_pair rp_old = {.pair = old}; - register_pair rp_new = {.pair = new}; - unsigned long long *ullptr = ptr; - asm volatile( - " cds %0,%2,%1" - : "+d" (rp_old), "+Q" (*ullptr) - : "d" (rp_new) - : "memory", "cc"); - return rp_old.pair; -} - -#define cmpxchg64(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg64((ptr), \ - (unsigned long long)(o), \ - (unsigned long long)(n)); \ - __ret; \ -}) -#endif /* CONFIG_64BIT */ +#define __HAVE_ARCH_CMPXCHG #define __cmpxchg_double_op(p1, p2, o1, o2, n1, n2, insn) \ ({ \ @@ -265,40 +93,4 @@ extern void __cmpxchg_double_called_with_bad_pointer(void); #define system_has_cmpxchg_double() 1 -#include - -static inline unsigned long __cmpxchg_local(void *ptr, - unsigned long old, - unsigned long new, int size) -{ - switch (size) { - case 1: - case 2: - case 4: -#ifdef CONFIG_64BIT - case 8: -#endif - return __cmpxchg(ptr, old, new, size); - default: - return __cmpxchg_local_generic(ptr, old, new, size); - } - - return old; -} - -/* - * cmpxchg_local and cmpxchg64_local are atomic wrt current CPU. Always make - * them available. - */ -#define cmpxchg_local(ptr, o, n) \ -({ \ - __typeof__(*(ptr)) __ret; \ - __ret = (__typeof__(*(ptr))) \ - __cmpxchg_local((ptr), (unsigned long)(o), \ - (unsigned long)(n), sizeof(*(ptr))); \ - __ret; \ -}) - -#define cmpxchg64_local(ptr, o, n) cmpxchg64((ptr), (o), (n)) - #endif /* __ASM_CMPXCHG_H */ diff --git a/arch/s390/include/asm/spinlock.h b/arch/s390/include/asm/spinlock.h index d6bdf906caa5..0e37cd041241 100644 --- a/arch/s390/include/asm/spinlock.h +++ b/arch/s390/include/asm/spinlock.h @@ -18,14 +18,7 @@ extern int spin_retry; static inline int _raw_compare_and_swap(unsigned int *lock, unsigned int old, unsigned int new) { - unsigned int old_expected = old; - - asm volatile( - " cs %0,%3,%1" - : "=d" (old), "=Q" (*lock) - : "0" (old), "d" (new), "Q" (*lock) - : "cc", "memory" ); - return old == old_expected; + return __sync_bool_compare_and_swap(lock, old, new); } /* diff --git a/arch/s390/kernel/asm-offsets.c b/arch/s390/kernel/asm-offsets.c index f3a78337ca86..e07e91605353 100644 --- a/arch/s390/kernel/asm-offsets.c +++ b/arch/s390/kernel/asm-offsets.c @@ -17,8 +17,8 @@ * Make sure that the compiler is new enough. We want a compiler that * is known to work with the "Q" assembler constraint. */ -#if __GNUC__ < 3 || (__GNUC__ == 3 && __GNUC_MINOR__ < 3) -#error Your compiler is too old; please use version 3.3.3 or newer +#if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ < 3) +#error Your compiler is too old; please use version 4.3 or newer #endif int main(void) -- cgit v1.2.3 From b19148f6e2d90738ecf0c2eeeb9bdbae25c59e9b Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Wed, 29 Oct 2014 19:12:04 +0100 Subject: s390/pci: improve irq number check for msix s390s arch_setup_msi_irqs function ensures that we don't return with more irqs than the PCI architecture allows and that a single PCI function doesn't consume more irqs than the kernel is configured for. At least the last check doesn't help much and should take the sum of all irqs into account. Since that's already done by irq_alloc_desc we can remove this check. As for the first check we should use the value provided by the firmware which can be less than what the PCI architecture allows. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci.h | 5 +---- arch/s390/pci/pci.c | 3 +-- arch/s390/pci/pci_clp.c | 1 + 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/pci.h b/arch/s390/include/asm/pci.h index c030900320e0..ef803c202d42 100644 --- a/arch/s390/include/asm/pci.h +++ b/arch/s390/include/asm/pci.h @@ -50,10 +50,6 @@ struct zpci_fmb { atomic64_t unmapped_pages; } __packed __aligned(16); -#define ZPCI_MSI_VEC_BITS 11 -#define ZPCI_MSI_VEC_MAX (1 << ZPCI_MSI_VEC_BITS) -#define ZPCI_MSI_VEC_MASK (ZPCI_MSI_VEC_MAX - 1) - enum zpci_state { ZPCI_FN_STATE_RESERVED, ZPCI_FN_STATE_STANDBY, @@ -90,6 +86,7 @@ struct zpci_dev { /* IRQ stuff */ u64 msi_addr; /* MSI address */ + unsigned int max_msi; /* maximum number of MSI's */ struct airq_iv *aibv; /* adapter interrupt bit vector */ unsigned int aisb; /* number of the summary bit */ diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 2fa7b14b9c08..47a4568e141d 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -369,8 +369,7 @@ int arch_setup_msi_irqs(struct pci_dev *pdev, int nvec, int type) if (type == PCI_CAP_ID_MSI && nvec > 1) return 1; - msi_vecs = min(nvec, ZPCI_MSI_VEC_MAX); - msi_vecs = min_t(unsigned int, msi_vecs, CONFIG_PCI_NR_MSI); + msi_vecs = min_t(unsigned int, nvec, zdev->max_msi); /* Allocate adapter summary indicator bit */ rc = -EIO; diff --git a/arch/s390/pci/pci_clp.c b/arch/s390/pci/pci_clp.c index 6e22a247de9b..d6e411ed8b1f 100644 --- a/arch/s390/pci/pci_clp.c +++ b/arch/s390/pci/pci_clp.c @@ -62,6 +62,7 @@ static void clp_store_query_pci_fngrp(struct zpci_dev *zdev, zdev->tlb_refresh = response->refresh; zdev->dma_mask = response->dasm; zdev->msi_addr = response->msia; + zdev->max_msi = response->noi; zdev->fmb_update = response->mui; switch (response->version) { -- cgit v1.2.3 From 5b9f2081e0844581cc94384eb052007002b2bfa8 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 30 Oct 2014 10:30:45 +0100 Subject: s390/pci: add sparse annotations Fix the following warnings from the sparse code checker: arch/s390/include/asm/pci_io.h:165:49: warning: cast removes address space of expression arch/s390/pci/pci.c:476:44: warning: cast removes address space of expression arch/s390/pci/pci.c:491:36: warning: incorrect type in argument 2 (different address spaces) arch/s390/pci/pci.c:491:36: expected void [noderef] *addr arch/s390/pci/pci.c:491:36: got void * Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/pci_io.h | 6 ++++-- arch/s390/pci/pci.c | 6 ++++-- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/pci_io.h b/arch/s390/include/asm/pci_io.h index d194d544d694..f664e96f48c7 100644 --- a/arch/s390/include/asm/pci_io.h +++ b/arch/s390/include/asm/pci_io.h @@ -139,7 +139,8 @@ static inline int zpci_memcpy_fromio(void *dst, int size, rc = 0; while (n > 0) { - size = zpci_get_max_write_size((u64) src, (u64) dst, n, 8); + size = zpci_get_max_write_size((u64 __force) src, + (u64) dst, n, 8); req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size); rc = zpci_read_single(req, dst, offset, size); if (rc) @@ -162,7 +163,8 @@ static inline int zpci_memcpy_toio(volatile void __iomem *dst, return -EINVAL; while (n > 0) { - size = zpci_get_max_write_size((u64) dst, (u64) src, n, 128); + size = zpci_get_max_write_size((u64 __force) dst, + (u64) src, n, 128); req = ZPCI_CREATE_REQ(entry->fh, entry->bar, size); if (size > 8) /* main path */ diff --git a/arch/s390/pci/pci.c b/arch/s390/pci/pci.c index 47a4568e141d..ed3725e2d16e 100644 --- a/arch/s390/pci/pci.c +++ b/arch/s390/pci/pci.c @@ -473,7 +473,8 @@ static void zpci_map_resources(struct zpci_dev *zdev) len = pci_resource_len(pdev, i); if (!len) continue; - pdev->resource[i].start = (resource_size_t) pci_iomap(pdev, i, 0); + pdev->resource[i].start = + (resource_size_t __force) pci_iomap(pdev, i, 0); pdev->resource[i].end = pdev->resource[i].start + len - 1; } } @@ -488,7 +489,8 @@ static void zpci_unmap_resources(struct zpci_dev *zdev) len = pci_resource_len(pdev, i); if (!len) continue; - pci_iounmap(pdev, (void *) pdev->resource[i].start); + pci_iounmap(pdev, (void __iomem __force *) + pdev->resource[i].start); } } -- cgit v1.2.3 From 99e97b7106d492a3cac4f7963f4a89935d2fbca4 Mon Sep 17 00:00:00 2001 From: Frank Blaschka Date: Thu, 6 Nov 2014 13:17:06 +0100 Subject: s390/io: add ioport_map stubs add ioport_map stubs to make vfio build on s390. Signed-off-by: Frank Blaschka Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/io.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/io.h b/arch/s390/include/asm/io.h index cd6b9ee7b69c..ba516f8f918a 100644 --- a/arch/s390/include/asm/io.h +++ b/arch/s390/include/asm/io.h @@ -38,6 +38,15 @@ static inline void iounmap(volatile void __iomem *addr) { } +static inline void __iomem *ioport_map(unsigned long port, unsigned int nr) +{ + return NULL; +} + +static inline void ioport_unmap(void __iomem *p) +{ +} + /* * s390 needs a private implementation of pci_iomap since ioremap with its * offset parameter isn't sufficient. That's because BAR spaces are not -- cgit v1.2.3 From afaa7d29bc04bf0fcf2e7bda2a802392a38d059b Mon Sep 17 00:00:00 2001 From: Sebastian Ott Date: Fri, 14 Nov 2014 11:01:37 +0100 Subject: s390/irq: use irq 0 Irq 0 is currently unused on s390. Since there is no reason to do this start counting at the beginning and gain an additional irq. Also correctly report the smallest usable irq number for dynamic allocation. Signed-off-by: Sebastian Ott Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/irq.h | 11 ++++------- arch/s390/kernel/irq.c | 5 +---- 2 files changed, 5 insertions(+), 11 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/irq.h b/arch/s390/include/asm/irq.h index b0d5f0a97a01..343ea7c987aa 100644 --- a/arch/s390/include/asm/irq.h +++ b/arch/s390/include/asm/irq.h @@ -1,11 +1,11 @@ #ifndef _ASM_IRQ_H #define _ASM_IRQ_H -#define EXT_INTERRUPT 1 -#define IO_INTERRUPT 2 -#define THIN_INTERRUPT 3 +#define EXT_INTERRUPT 0 +#define IO_INTERRUPT 1 +#define THIN_INTERRUPT 2 -#define NR_IRQS_BASE 4 +#define NR_IRQS_BASE 3 #ifdef CONFIG_PCI_NR_MSI # define NR_IRQS (NR_IRQS_BASE + CONFIG_PCI_NR_MSI) @@ -13,9 +13,6 @@ # define NR_IRQS NR_IRQS_BASE #endif -/* This number is used when no interrupt has been assigned */ -#define NO_IRQ 0 - /* External interruption codes */ #define EXT_IRQ_INTERRUPT_KEY 0x0040 #define EXT_IRQ_CLK_COMP 0x1004 diff --git a/arch/s390/kernel/irq.c b/arch/s390/kernel/irq.c index 1b8a38ab7861..f238720690f3 100644 --- a/arch/s390/kernel/irq.c +++ b/arch/s390/kernel/irq.c @@ -127,13 +127,10 @@ int show_interrupts(struct seq_file *p, void *v) for_each_online_cpu(cpu) seq_printf(p, "CPU%d ", cpu); seq_putc(p, '\n'); - goto out; } if (index < NR_IRQS) { if (index >= NR_IRQS_BASE) goto out; - /* Adjust index to process irqclass_main_desc array entries */ - index--; seq_printf(p, "%s: ", irqclass_main_desc[index].name); irq = irqclass_main_desc[index].irq; for_each_online_cpu(cpu) @@ -158,7 +155,7 @@ out: unsigned int arch_dynirq_lower_bound(unsigned int from) { - return from < THIN_INTERRUPT ? THIN_INTERRUPT : from; + return from < NR_IRQS_BASE ? NR_IRQS_BASE : from; } /* -- cgit v1.2.3 From 4eafad7febd482092b331ea72c37274d745956be Mon Sep 17 00:00:00 2001 From: Alexey Ishchuk Date: Fri, 14 Nov 2014 14:27:58 +0100 Subject: s390/kernel: add system calls for PCI memory access Add the new __NR_s390_pci_mmio_write and __NR_s390_pci_mmio_read system calls to allow user space applications to access device PCI I/O memory pages on s390x platform. [ Martin Schwidefsky: some code beautification ] Signed-off-by: Alexey Ishchuk Signed-off-by: Martin Schwidefsky --- arch/s390/include/uapi/asm/unistd.h | 4 +- arch/s390/kernel/compat_wrapper.c | 2 + arch/s390/kernel/entry.h | 2 + arch/s390/kernel/syscalls.S | 2 + arch/s390/pci/Makefile | 2 +- arch/s390/pci/pci_mmio.c | 115 ++++++++++++++++++++++++++++++++++++ kernel/sys_ni.c | 2 + 7 files changed, 127 insertions(+), 2 deletions(-) create mode 100644 arch/s390/pci/pci_mmio.c (limited to 'arch/s390/include') diff --git a/arch/s390/include/uapi/asm/unistd.h b/arch/s390/include/uapi/asm/unistd.h index 4197c89c52d4..2b446cf0cc65 100644 --- a/arch/s390/include/uapi/asm/unistd.h +++ b/arch/s390/include/uapi/asm/unistd.h @@ -287,7 +287,9 @@ #define __NR_getrandom 349 #define __NR_memfd_create 350 #define __NR_bpf 351 -#define NR_syscalls 352 +#define __NR_s390_pci_mmio_write 352 +#define __NR_s390_pci_mmio_read 353 +#define NR_syscalls 354 /* * There are some system calls that are not present on 64 bit, some diff --git a/arch/s390/kernel/compat_wrapper.c b/arch/s390/kernel/compat_wrapper.c index c4f7a3d655b8..d7fa2f0f1425 100644 --- a/arch/s390/kernel/compat_wrapper.c +++ b/arch/s390/kernel/compat_wrapper.c @@ -218,3 +218,5 @@ COMPAT_SYSCALL_WRAP3(seccomp, unsigned int, op, unsigned int, flags, const char COMPAT_SYSCALL_WRAP3(getrandom, char __user *, buf, size_t, count, unsigned int, flags) COMPAT_SYSCALL_WRAP2(memfd_create, const char __user *, uname, unsigned int, flags) COMPAT_SYSCALL_WRAP3(bpf, int, cmd, union bpf_attr *, attr, unsigned int, size); +COMPAT_SYSCALL_WRAP3(s390_pci_mmio_write, const unsigned long, mmio_addr, const void __user *, user_buffer, const size_t, length); +COMPAT_SYSCALL_WRAP3(s390_pci_mmio_read, const unsigned long, mmio_addr, void __user *, user_buffer, const size_t, length); diff --git a/arch/s390/kernel/entry.h b/arch/s390/kernel/entry.h index 0554b9771c9f..8e61393c8275 100644 --- a/arch/s390/kernel/entry.h +++ b/arch/s390/kernel/entry.h @@ -74,4 +74,6 @@ struct old_sigaction; long sys_s390_personality(unsigned int personality); long sys_s390_runtime_instr(int command, int signum); +long sys_s390_pci_mmio_write(unsigned long, const void __user *, size_t); +long sys_s390_pci_mmio_read(unsigned long, void __user *, size_t); #endif /* _ENTRY_H */ diff --git a/arch/s390/kernel/syscalls.S b/arch/s390/kernel/syscalls.S index 9f7087fd58de..a2987243bc76 100644 --- a/arch/s390/kernel/syscalls.S +++ b/arch/s390/kernel/syscalls.S @@ -360,3 +360,5 @@ SYSCALL(sys_seccomp,sys_seccomp,compat_sys_seccomp) SYSCALL(sys_getrandom,sys_getrandom,compat_sys_getrandom) SYSCALL(sys_memfd_create,sys_memfd_create,compat_sys_memfd_create) /* 350 */ SYSCALL(sys_bpf,sys_bpf,compat_sys_bpf) +SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_write,compat_sys_s390_pci_mmio_write) +SYSCALL(sys_ni_syscall,sys_s390_pci_mmio_read,compat_sys_s390_pci_mmio_read) diff --git a/arch/s390/pci/Makefile b/arch/s390/pci/Makefile index a9e1dc4ae442..805d8b29193a 100644 --- a/arch/s390/pci/Makefile +++ b/arch/s390/pci/Makefile @@ -3,4 +3,4 @@ # obj-$(CONFIG_PCI) += pci.o pci_dma.o pci_clp.o pci_sysfs.o \ - pci_event.o pci_debug.o pci_insn.o + pci_event.o pci_debug.o pci_insn.o pci_mmio.o diff --git a/arch/s390/pci/pci_mmio.c b/arch/s390/pci/pci_mmio.c new file mode 100644 index 000000000000..62c5ea6d8682 --- /dev/null +++ b/arch/s390/pci/pci_mmio.c @@ -0,0 +1,115 @@ +/* + * Access to PCI I/O memory from user space programs. + * + * Copyright IBM Corp. 2014 + * Author(s): Alexey Ishchuk + */ +#include +#include +#include +#include +#include +#include + +static long get_pfn(unsigned long user_addr, unsigned long access, + unsigned long *pfn) +{ + struct vm_area_struct *vma; + long ret; + + down_read(¤t->mm->mmap_sem); + ret = -EINVAL; + vma = find_vma(current->mm, user_addr); + if (!vma) + goto out; + ret = -EACCES; + if (!(vma->vm_flags & access)) + goto out; + ret = follow_pfn(vma, user_addr, pfn); +out: + up_read(¤t->mm->mmap_sem); + return ret; +} + +SYSCALL_DEFINE3(s390_pci_mmio_write, unsigned long, mmio_addr, + const void __user *, user_buffer, size_t, length) +{ + u8 local_buf[64]; + void __iomem *io_addr; + void *buf; + unsigned long pfn; + long ret; + + if (!zpci_is_enabled()) + return -ENODEV; + + if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length) + return -EINVAL; + if (length > 64) { + buf = kmalloc(length, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } else + buf = local_buf; + + ret = get_pfn(mmio_addr, VM_WRITE, &pfn); + if (ret) + goto out; + io_addr = (void *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); + + ret = -EFAULT; + if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) + goto out; + + if (copy_from_user(buf, user_buffer, length)) + goto out; + + memcpy_toio(io_addr, buf, length); + ret = 0; +out: + if (buf != local_buf) + kfree(buf); + return ret; +} + +SYSCALL_DEFINE3(s390_pci_mmio_read, unsigned long, mmio_addr, + void __user *, user_buffer, size_t, length) +{ + u8 local_buf[64]; + void __iomem *io_addr; + void *buf; + unsigned long pfn; + long ret; + + if (!zpci_is_enabled()) + return -ENODEV; + + if (length <= 0 || PAGE_SIZE - (mmio_addr & ~PAGE_MASK) < length) + return -EINVAL; + if (length > 64) { + buf = kmalloc(length, GFP_KERNEL); + if (!buf) + return -ENOMEM; + } else + buf = local_buf; + + ret = get_pfn(mmio_addr, VM_READ, &pfn); + if (ret) + goto out; + io_addr = (void *)((pfn << PAGE_SHIFT) | (mmio_addr & ~PAGE_MASK)); + + ret = -EFAULT; + if ((unsigned long) io_addr < ZPCI_IOMAP_ADDR_BASE) + goto out; + + memcpy_fromio(buf, io_addr, length); + + if (copy_to_user(user_buffer, buf, length)) + goto out; + + ret = 0; +out: + if (buf != local_buf) + kfree(buf); + return ret; +} diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 02aa4185b17e..61eea02b53f5 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -169,6 +169,8 @@ cond_syscall(ppc_rtas); cond_syscall(sys_spu_run); cond_syscall(sys_spu_create); cond_syscall(sys_subpage_prot); +cond_syscall(sys_s390_pci_mmio_read); +cond_syscall(sys_s390_pci_mmio_write); /* mmu depending weak syscall entries */ cond_syscall(sys_mprotect); -- cgit v1.2.3 From 7eed2e09abf0a0064a7e3f4a911b37d71202dfa4 Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 24 Nov 2014 14:30:32 +0100 Subject: s390/ftrace: provide working ftrace_return_address() The common code ftrace_return_address(n), which is just a wrapper for __builtin_return_address(n), will only work for n > 0 if CONFIG_FRAME_POINTER is set to 'y'. Otherwise it will return 0. Since on s390 we will never have that config option set to 'y' ftrace_return_address() won't work at all for n > 0. Luckily we always compile the kernel with -mkernel-backchain which in turn means that __builtin_return_address(n) will always work. So let ftrace_return_address(n) map to __builtin_return_address(n). Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/ftrace.h b/arch/s390/include/asm/ftrace.h index 785041f1dc77..abb618f1ead2 100644 --- a/arch/s390/include/asm/ftrace.h +++ b/arch/s390/include/asm/ftrace.h @@ -8,6 +8,8 @@ #ifndef __ASSEMBLY__ +#define ftrace_return_address(n) __builtin_return_address(n) + void _mcount(void); void ftrace_caller(void); -- cgit v1.2.3 From 57f2ffe14fd125c240babd88b99d8a57d3ed060e Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Mon, 15 Sep 2014 10:50:47 +0200 Subject: s390: remove diag 44 calls from cpu_relax() Simplify cpu_relax() to a simple barrier(). Performance wise this doesn't seem to make any big difference anymore, since nearly all lock variants have directed yield semantics in the meantime. Also this makes s390 behave like all other architectures. Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/processor.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/processor.h b/arch/s390/include/asm/processor.h index d559bdb03d18..bed05ea7ec27 100644 --- a/arch/s390/include/asm/processor.h +++ b/arch/s390/include/asm/processor.h @@ -217,8 +217,6 @@ static inline unsigned short stap(void) */ static inline void cpu_relax(void) { - if (MACHINE_HAS_DIAG44) - asm volatile("diag 0,0,68"); barrier(); } -- cgit v1.2.3 From 832a771034bf62444796ab8868264c3ea9e50866 Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Mon, 1 Dec 2014 09:16:45 +0100 Subject: s390/debug: avoid function call for debug_sprintf_* debug_sprintf_event/exception are called even for debug events with a disabling debug level. All other functions already do the check in a wrapper function. Lets do the same here. Due to the var_args the compiler rejects to make this function inline. So let's wrap this via a macro. This patch saves around 80 ns on my z196 for a KVM round trip (we have two debug statements for entry and exit) when KVM is build as a module. The savings for built-in drivers is smaller as we then avoid the PLT overhead for a function call. Signed-off-by: Christian Borntraeger Reviewed-by: Michael Holzheu Reviewed-by: David Hildenbrand Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/debug.h | 29 +++++++++++++++++++++++++++-- arch/s390/kernel/debug.c | 12 ++++-------- 2 files changed, 31 insertions(+), 10 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/debug.h b/arch/s390/include/asm/debug.h index 530c15eb01e9..0206c8052328 100644 --- a/arch/s390/include/asm/debug.h +++ b/arch/s390/include/asm/debug.h @@ -151,9 +151,21 @@ debug_text_event(debug_info_t* id, int level, const char* txt) * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! */ extern debug_entry_t * -debug_sprintf_event(debug_info_t* id,int level,char *string,...) +__debug_sprintf_event(debug_info_t *id, int level, char *string, ...) __attribute__ ((format(printf, 3, 4))); +#define debug_sprintf_event(_id, _level, _fmt, ...) \ +({ \ + debug_entry_t *__ret; \ + debug_info_t *__id = _id; \ + int __level = _level; \ + if ((!__id) || (__level > __id->level)) \ + __ret = NULL; \ + else \ + __ret = __debug_sprintf_event(__id, __level, \ + _fmt, ## __VA_ARGS__); \ + __ret; \ +}) static inline debug_entry_t* debug_exception(debug_info_t* id, int level, void* data, int length) @@ -194,9 +206,22 @@ debug_text_exception(debug_info_t* id, int level, const char* txt) * stored in the s390dbf. See Documentation/s390/s390dbf.txt for more details! */ extern debug_entry_t * -debug_sprintf_exception(debug_info_t* id,int level,char *string,...) +__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) __attribute__ ((format(printf, 3, 4))); +#define debug_sprintf_exception(_id, _level, _fmt, ...) \ +({ \ + debug_entry_t *__ret; \ + debug_info_t *__id = _id; \ + int __level = _level; \ + if ((!__id) || (__level > __id->level)) \ + __ret = NULL; \ + else \ + __ret = __debug_sprintf_exception(__id, __level, \ + _fmt, ## __VA_ARGS__);\ + __ret; \ +}) + int debug_register_view(debug_info_t* id, struct debug_view* view); int debug_unregister_view(debug_info_t* id, struct debug_view* view); diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index ee8390da6ea7..c1f21aca76e7 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -1019,7 +1019,7 @@ debug_count_numargs(char *string) */ debug_entry_t* -debug_sprintf_event(debug_info_t* id, int level,char *string,...) +__debug_sprintf_event(debug_info_t *id, int level, char *string, ...) { va_list ap; int numargs,idx; @@ -1027,8 +1027,6 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...) debug_sprintf_entry_t *curr_event; debug_entry_t *active; - if((!id) || (level > id->level)) - return NULL; if (!debug_active || !id->areas) return NULL; numargs=debug_count_numargs(string); @@ -1050,14 +1048,14 @@ debug_sprintf_event(debug_info_t* id, int level,char *string,...) return active; } -EXPORT_SYMBOL(debug_sprintf_event); +EXPORT_SYMBOL(__debug_sprintf_event); /* * debug_sprintf_exception: */ debug_entry_t* -debug_sprintf_exception(debug_info_t* id, int level,char *string,...) +__debug_sprintf_exception(debug_info_t *id, int level, char *string, ...) { va_list ap; int numargs,idx; @@ -1065,8 +1063,6 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...) debug_sprintf_entry_t *curr_event; debug_entry_t *active; - if((!id) || (level > id->level)) - return NULL; if (!debug_active || !id->areas) return NULL; @@ -1089,7 +1085,7 @@ debug_sprintf_exception(debug_info_t* id, int level,char *string,...) return active; } -EXPORT_SYMBOL(debug_sprintf_exception); +EXPORT_SYMBOL(__debug_sprintf_exception); /* * debug_register_view: -- cgit v1.2.3 From 1ce2180498fd40ed3f6485fc5daadf4b711f305f Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 28 Nov 2014 19:23:34 +0100 Subject: s390/idle: convert open coded idle time seqcount s390 uses open coded seqcount to synchronize idle time accounting. Lets consolidate it with the standard API. Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Ingo Molnar Cc: Martin Schwidefsky Cc: Oleg Nesterov Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Tony Luck Cc: Wu Fengguang Signed-off-by: Frederic Weisbecker Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/idle.h | 3 ++- arch/s390/kernel/idle.c | 24 +++++++++++------------- 2 files changed, 13 insertions(+), 14 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/idle.h b/arch/s390/include/asm/idle.h index 6af037f574b8..113cd963dbbe 100644 --- a/arch/s390/include/asm/idle.h +++ b/arch/s390/include/asm/idle.h @@ -9,9 +9,10 @@ #include #include +#include struct s390_idle_data { - unsigned int sequence; + seqcount_t seqcount; unsigned long long idle_count; unsigned long long idle_time; unsigned long long clock_idle_enter; diff --git a/arch/s390/kernel/idle.c b/arch/s390/kernel/idle.c index 8814dd9cf644..7a55c29b0b33 100644 --- a/arch/s390/kernel/idle.c +++ b/arch/s390/kernel/idle.c @@ -38,15 +38,13 @@ void enabled_wait(void) trace_hardirqs_off(); /* Account time spent with enabled wait psw loaded as idle time. */ - idle->sequence++; - smp_wmb(); + write_seqcount_begin(&idle->seqcount); idle_time = idle->clock_idle_exit - idle->clock_idle_enter; idle->clock_idle_enter = idle->clock_idle_exit = 0ULL; idle->idle_time += idle_time; idle->idle_count++; account_idle_time(idle_time); - smp_wmb(); - idle->sequence++; + write_seqcount_end(&idle->seqcount); } NOKPROBE_SYMBOL(enabled_wait); @@ -55,14 +53,14 @@ static ssize_t show_idle_count(struct device *dev, { struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); unsigned long long idle_count; - unsigned int sequence; + unsigned int seq; do { - sequence = ACCESS_ONCE(idle->sequence); + seq = read_seqcount_begin(&idle->seqcount); idle_count = ACCESS_ONCE(idle->idle_count); if (ACCESS_ONCE(idle->clock_idle_enter)) idle_count++; - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + } while (read_seqcount_retry(&idle->seqcount, seq)); return sprintf(buf, "%llu\n", idle_count); } DEVICE_ATTR(idle_count, 0444, show_idle_count, NULL); @@ -72,15 +70,15 @@ static ssize_t show_idle_time(struct device *dev, { struct s390_idle_data *idle = &per_cpu(s390_idle, dev->id); unsigned long long now, idle_time, idle_enter, idle_exit; - unsigned int sequence; + unsigned int seq; do { now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); + seq = read_seqcount_begin(&idle->seqcount); idle_time = ACCESS_ONCE(idle->idle_time); idle_enter = ACCESS_ONCE(idle->clock_idle_enter); idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + } while (read_seqcount_retry(&idle->seqcount, seq)); idle_time += idle_enter ? ((idle_exit ? : now) - idle_enter) : 0; return sprintf(buf, "%llu\n", idle_time >> 12); } @@ -90,14 +88,14 @@ cputime64_t arch_cpu_idle_time(int cpu) { struct s390_idle_data *idle = &per_cpu(s390_idle, cpu); unsigned long long now, idle_enter, idle_exit; - unsigned int sequence; + unsigned int seq; do { now = get_tod_clock(); - sequence = ACCESS_ONCE(idle->sequence); + seq = read_seqcount_begin(&idle->seqcount); idle_enter = ACCESS_ONCE(idle->clock_idle_enter); idle_exit = ACCESS_ONCE(idle->clock_idle_exit); - } while ((sequence & 1) || (ACCESS_ONCE(idle->sequence) != sequence)); + } while (read_seqcount_retry(&idle->seqcount, seq)); return idle_enter ? ((idle_exit ?: now) - idle_enter) : 0; } -- cgit v1.2.3 From 8461b63ca01d125a245a0d0fb4821ea0656e5053 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 28 Nov 2014 19:23:35 +0100 Subject: s390: translate cputime magic constants to macros Make the code more self-explanatory by naming magic constants. Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Ingo Molnar Cc: Martin Schwidefsky Cc: Oleg Nesterov Cc: Paul Mackerras Cc: Peter Zijlstra Cc: Rik van Riel Cc: Thomas Gleixner Cc: Tony Luck Cc: Wu Fengguang Signed-off-by: Frederic Weisbecker Signed-off-by: Heiko Carstens Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 46 +++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index f8c196984853..b81712306360 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -10,6 +10,8 @@ #include #include +#define CPUTIME_PER_USEC 4096ULL +#define CPUTIME_PER_SEC (CPUTIME_PER_USEC * USEC_PER_SEC) /* We want to use full resolution of the CPU timer: 2**-12 micro-seconds. */ @@ -38,24 +40,24 @@ static inline unsigned long __div(unsigned long long n, unsigned long base) */ static inline unsigned long cputime_to_jiffies(const cputime_t cputime) { - return __div((__force unsigned long long) cputime, 4096000000ULL / HZ); + return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / HZ); } static inline cputime_t jiffies_to_cputime(const unsigned int jif) { - return (__force cputime_t)(jif * (4096000000ULL / HZ)); + return (__force cputime_t)(jif * (CPUTIME_PER_SEC / HZ)); } static inline u64 cputime64_to_jiffies64(cputime64_t cputime) { unsigned long long jif = (__force unsigned long long) cputime; - do_div(jif, 4096000000ULL / HZ); + do_div(jif, CPUTIME_PER_SEC / HZ); return jif; } static inline cputime64_t jiffies64_to_cputime64(const u64 jif) { - return (__force cputime64_t)(jif * (4096000000ULL / HZ)); + return (__force cputime64_t)(jif * (CPUTIME_PER_SEC / HZ)); } /* @@ -68,7 +70,7 @@ static inline unsigned int cputime_to_usecs(const cputime_t cputime) static inline cputime_t usecs_to_cputime(const unsigned int m) { - return (__force cputime_t)(m * 4096ULL); + return (__force cputime_t)(m * CPUTIME_PER_USEC); } #define usecs_to_cputime64(m) usecs_to_cputime(m) @@ -78,12 +80,12 @@ static inline cputime_t usecs_to_cputime(const unsigned int m) */ static inline unsigned int cputime_to_secs(const cputime_t cputime) { - return __div((__force unsigned long long) cputime, 2048000000) >> 1; + return __div((__force unsigned long long) cputime, CPUTIME_PER_SEC / 2) >> 1; } static inline cputime_t secs_to_cputime(const unsigned int s) { - return (__force cputime_t)(s * 4096000000ULL); + return (__force cputime_t)(s * CPUTIME_PER_SEC); } /* @@ -91,8 +93,8 @@ static inline cputime_t secs_to_cputime(const unsigned int s) */ static inline cputime_t timespec_to_cputime(const struct timespec *value) { - unsigned long long ret = value->tv_sec * 4096000000ULL; - return (__force cputime_t)(ret + value->tv_nsec * 4096 / 1000); + unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC; + return (__force cputime_t)(ret + (value->tv_nsec * CPUTIME_PER_USEC) / NSEC_PER_USEC); } static inline void cputime_to_timespec(const cputime_t cputime, @@ -103,12 +105,12 @@ static inline void cputime_to_timespec(const cputime_t cputime, register_pair rp; rp.pair = __cputime >> 1; - asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); - value->tv_nsec = rp.subreg.even * 1000 / 4096; + asm ("dr %0,%1" : "+d" (rp) : "d" (CPUTIME_PER_SEC / 2)); + value->tv_nsec = rp.subreg.even * NSEC_PER_USEC / CPUTIME_PER_USEC; value->tv_sec = rp.subreg.odd; #else - value->tv_nsec = (__cputime % 4096000000ULL) * 1000 / 4096; - value->tv_sec = __cputime / 4096000000ULL; + value->tv_nsec = (__cputime % CPUTIME_PER_SEC) * NSEC_PER_USEC / CPUTIME_PER_USEC; + value->tv_sec = __cputime / CPUTIME_PER_SEC; #endif } @@ -119,8 +121,8 @@ static inline void cputime_to_timespec(const cputime_t cputime, */ static inline cputime_t timeval_to_cputime(const struct timeval *value) { - unsigned long long ret = value->tv_sec * 4096000000ULL; - return (__force cputime_t)(ret + value->tv_usec * 4096ULL); + unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC; + return (__force cputime_t)(ret + value->tv_usec * CPUTIME_PER_USEC); } static inline void cputime_to_timeval(const cputime_t cputime, @@ -131,12 +133,12 @@ static inline void cputime_to_timeval(const cputime_t cputime, register_pair rp; rp.pair = __cputime >> 1; - asm ("dr %0,%1" : "+d" (rp) : "d" (2048000000UL)); - value->tv_usec = rp.subreg.even / 4096; + asm ("dr %0,%1" : "+d" (rp) : "d" (CPUTIME_PER_USEC / 2)); + value->tv_usec = rp.subreg.even / CPUTIME_PER_USEC; value->tv_sec = rp.subreg.odd; #else - value->tv_usec = (__cputime % 4096000000ULL) / 4096; - value->tv_sec = __cputime / 4096000000ULL; + value->tv_usec = (__cputime % CPUTIME_PER_SEC) / CPUTIME_PER_USEC; + value->tv_sec = __cputime / CPUTIME_PER_SEC; #endif } @@ -146,13 +148,13 @@ static inline void cputime_to_timeval(const cputime_t cputime, static inline clock_t cputime_to_clock_t(cputime_t cputime) { unsigned long long clock = (__force unsigned long long) cputime; - do_div(clock, 4096000000ULL / USER_HZ); + do_div(clock, CPUTIME_PER_SEC / USER_HZ); return clock; } static inline cputime_t clock_t_to_cputime(unsigned long x) { - return (__force cputime_t)(x * (4096000000ULL / USER_HZ)); + return (__force cputime_t)(x * (CPUTIME_PER_SEC / USER_HZ)); } /* @@ -161,7 +163,7 @@ static inline cputime_t clock_t_to_cputime(unsigned long x) static inline clock_t cputime64_to_clock_t(cputime64_t cputime) { unsigned long long clock = (__force unsigned long long) cputime; - do_div(clock, 4096000000ULL / USER_HZ); + do_div(clock, CPUTIME_PER_SEC / USER_HZ); return clock; } -- cgit v1.2.3 From 9de45f736f21655400fd56b85bfbaf507cc2959d Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Thu, 4 Dec 2014 11:07:19 +0100 Subject: s390/mm: fix memory leak of ptlock in pmd_free_tlb The pmd_free_tlb function fails to call pgtable_pmd_page_dtor. Without the call the ptlock for the pmd tables will not be freed. Add the missing call. Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/tlb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/tlb.h b/arch/s390/include/asm/tlb.h index 572c59949004..06d8741ad6f4 100644 --- a/arch/s390/include/asm/tlb.h +++ b/arch/s390/include/asm/tlb.h @@ -121,6 +121,7 @@ static inline void pmd_free_tlb(struct mmu_gather *tlb, pmd_t *pmd, #ifdef CONFIG_64BIT if (tlb->mm->context.asce_limit <= (1UL << 31)) return; + pgtable_pmd_page_dtor(virt_to_page(pmd)); tlb_remove_table(tlb, pmd); #endif } -- cgit v1.2.3 From 351997810131565fe62aec2c366deccbf6bda3f4 Mon Sep 17 00:00:00 2001 From: Martin Schwidefsky Date: Mon, 8 Dec 2014 13:19:12 +0100 Subject: s390/cputime: fix 31-bit compile git commit 8461b63ca01d125a245a0d0fb4821ea0656e5053 "s390: translate cputime magic constants to macros" introduce a built error for 31-bit: kernel/built-in.o: In function `posix_cpu_timer_set': posix-cpu-timers.c:(.text+0x2a8cc): undefined reference to `__udivdi3' The original code is actually broken for 31-bit and has been corrected by the above commit by forcing the compiler to use 64-bit arithmetic through the CPUTIME_PER_USEC define. To fix the compile error replace the 64-bit division with a call to __div(). Signed-off-by: Martin Schwidefsky --- arch/s390/include/asm/cputime.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/s390/include') diff --git a/arch/s390/include/asm/cputime.h b/arch/s390/include/asm/cputime.h index b81712306360..b91e960e4045 100644 --- a/arch/s390/include/asm/cputime.h +++ b/arch/s390/include/asm/cputime.h @@ -94,7 +94,7 @@ static inline cputime_t secs_to_cputime(const unsigned int s) static inline cputime_t timespec_to_cputime(const struct timespec *value) { unsigned long long ret = value->tv_sec * CPUTIME_PER_SEC; - return (__force cputime_t)(ret + (value->tv_nsec * CPUTIME_PER_USEC) / NSEC_PER_USEC); + return (__force cputime_t)(ret + __div(value->tv_nsec * CPUTIME_PER_USEC, NSEC_PER_USEC)); } static inline void cputime_to_timespec(const cputime_t cputime, -- cgit v1.2.3