From a268fcfaa6ab2ef740fda5ecf947aca45ccd535d Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@alien8.de> Date: Tue, 31 May 2011 22:21:51 +0200 Subject: x86, asm: Thin down SAVE/RESTORE_* asm macros Use dwarf2 cfi annotation macros, making SAVE/RESTORE_* marginally more readable. No functionality change. Signed-off-by: Borislav Petkov <bp@alien8.de> Link: http://lkml.kernel.org/r/1306873314-32523-2-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/include/asm/calling.h | 101 +++++++++++++++++------------------------ 1 file changed, 41 insertions(+), 60 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index 30af5a832163..b67e06c4710e 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -46,6 +46,7 @@ For 32-bit we have the following conventions - kernel is built with */ +#include "dwarf2.h" /* * 64-bit system call stack frame layout defines and helpers, for @@ -87,30 +88,25 @@ For 32-bit we have the following conventions - kernel is built with .macro SAVE_ARGS addskip=0, norcx=0, nor891011=0 subq $9*8+\addskip, %rsp CFI_ADJUST_CFA_OFFSET 9*8+\addskip - movq %rdi, 8*8(%rsp) - CFI_REL_OFFSET rdi, 8*8 - movq %rsi, 7*8(%rsp) - CFI_REL_OFFSET rsi, 7*8 - movq %rdx, 6*8(%rsp) - CFI_REL_OFFSET rdx, 6*8 + movq_cfi rdi, 8*8 + movq_cfi rsi, 7*8 + movq_cfi rdx, 6*8 + .if \norcx .else - movq %rcx, 5*8(%rsp) - CFI_REL_OFFSET rcx, 5*8 + movq_cfi rcx, 5*8 .endif - movq %rax, 4*8(%rsp) - CFI_REL_OFFSET rax, 4*8 + + movq_cfi rax, 4*8 + .if \nor891011 .else - movq %r8, 3*8(%rsp) - CFI_REL_OFFSET r8, 3*8 - movq %r9, 2*8(%rsp) - CFI_REL_OFFSET r9, 2*8 - movq %r10, 1*8(%rsp) - CFI_REL_OFFSET r10, 1*8 - movq %r11, (%rsp) - CFI_REL_OFFSET r11, 0*8 + movq_cfi r8, 3*8 + movq_cfi r9, 2*8 + movq_cfi r10, 1*8 + movq_cfi r11, 0*8 .endif + .endm #define ARG_SKIP (9*8) @@ -119,37 +115,34 @@ For 32-bit we have the following conventions - kernel is built with skipr8910=0, skiprdx=0 .if \skipr11 .else - movq (%rsp), %r11 - CFI_RESTORE r11 + movq_cfi_restore 0*8, r11 .endif + .if \skipr8910 .else - movq 1*8(%rsp), %r10 - CFI_RESTORE r10 - movq 2*8(%rsp), %r9 - CFI_RESTORE r9 - movq 3*8(%rsp), %r8 - CFI_RESTORE r8 + movq_cfi_restore 1*8, r10 + movq_cfi_restore 2*8, r9 + movq_cfi_restore 3*8, r8 .endif + .if \skiprax .else - movq 4*8(%rsp), %rax - CFI_RESTORE rax + movq_cfi_restore 4*8, rax .endif + .if \skiprcx .else - movq 5*8(%rsp), %rcx - CFI_RESTORE rcx + movq_cfi_restore 5*8, rcx .endif + .if \skiprdx .else - movq 6*8(%rsp), %rdx - CFI_RESTORE rdx + movq_cfi_restore 6*8, rdx .endif - movq 7*8(%rsp), %rsi - CFI_RESTORE rsi - movq 8*8(%rsp), %rdi - CFI_RESTORE rdi + + movq_cfi_restore 7*8, rsi + movq_cfi_restore 8*8, rdi + .if ARG_SKIP+\addskip > 0 addq $ARG_SKIP+\addskip, %rsp CFI_ADJUST_CFA_OFFSET -(ARG_SKIP+\addskip) @@ -176,33 +169,21 @@ For 32-bit we have the following conventions - kernel is built with .macro SAVE_REST subq $REST_SKIP, %rsp CFI_ADJUST_CFA_OFFSET REST_SKIP - movq %rbx, 5*8(%rsp) - CFI_REL_OFFSET rbx, 5*8 - movq %rbp, 4*8(%rsp) - CFI_REL_OFFSET rbp, 4*8 - movq %r12, 3*8(%rsp) - CFI_REL_OFFSET r12, 3*8 - movq %r13, 2*8(%rsp) - CFI_REL_OFFSET r13, 2*8 - movq %r14, 1*8(%rsp) - CFI_REL_OFFSET r14, 1*8 - movq %r15, (%rsp) - CFI_REL_OFFSET r15, 0*8 + movq_cfi rbx, 5*8 + movq_cfi rbp, 4*8 + movq_cfi r12, 3*8 + movq_cfi r13, 2*8 + movq_cfi r14, 1*8 + movq_cfi r15, 0*8 .endm .macro RESTORE_REST - movq (%rsp), %r15 - CFI_RESTORE r15 - movq 1*8(%rsp), %r14 - CFI_RESTORE r14 - movq 2*8(%rsp), %r13 - CFI_RESTORE r13 - movq 3*8(%rsp), %r12 - CFI_RESTORE r12 - movq 4*8(%rsp), %rbp - CFI_RESTORE rbp - movq 5*8(%rsp), %rbx - CFI_RESTORE rbx + movq_cfi_restore 0*8, r15 + movq_cfi_restore 1*8, r14 + movq_cfi_restore 2*8, r13 + movq_cfi_restore 3*8, r12 + movq_cfi_restore 4*8, rbp + movq_cfi_restore 5*8, rbx addq $REST_SKIP, %rsp CFI_ADJUST_CFA_OFFSET -(REST_SKIP) .endm -- cgit v1.2.3 From cac0e0a78f722abd85b7f8d614ee0820f7672f58 Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@alien8.de> Date: Tue, 31 May 2011 22:21:52 +0200 Subject: x86, asm: Flip SAVE_ARGS arguments logic This saves us the else part of the conditional statement in the macro. No functionality change. Signed-off-by: Borislav Petkov <bp@alien8.de> Link: http://lkml.kernel.org/r/1306873314-32523-3-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/ia32/ia32entry.S | 6 +++--- arch/x86/include/asm/calling.h | 8 +++----- arch/x86/kernel/entry_64.S | 2 +- 3 files changed, 7 insertions(+), 9 deletions(-) (limited to 'arch') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index c1870dddd322..c5435dcea15c 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -143,7 +143,7 @@ ENTRY(ia32_sysenter_target) CFI_REL_OFFSET rip,0 pushq_cfi %rax cld - SAVE_ARGS 0,0,1 + SAVE_ARGS 0,1,0 /* no need to do an access_ok check here because rbp has been 32bit zero extended */ 1: movl (%rbp),%ebp @@ -289,7 +289,7 @@ ENTRY(ia32_cstar_target) * disabled irqs and here we enable it straight after entry: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,1,1 + SAVE_ARGS 8,0,0 movl %eax,%eax /* zero extension */ movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) @@ -419,7 +419,7 @@ ENTRY(ia32_syscall) cld /* note the registers are not zero extended to the sf. this could be a problem. */ - SAVE_ARGS 0,0,1 + SAVE_ARGS 0,1,0 GET_THREAD_INFO(%r10) orl $TS_COMPAT,TI_status(%r10) testl $_TIF_WORK_SYSCALL_ENTRY,TI_flags(%r10) diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index b67e06c4710e..b0b7d90d3054 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -85,22 +85,20 @@ For 32-bit we have the following conventions - kernel is built with #define ARGOFFSET R11 #define SWFRAME ORIG_RAX - .macro SAVE_ARGS addskip=0, norcx=0, nor891011=0 + .macro SAVE_ARGS addskip=0, save_rcx=1, save_r891011=1 subq $9*8+\addskip, %rsp CFI_ADJUST_CFA_OFFSET 9*8+\addskip movq_cfi rdi, 8*8 movq_cfi rsi, 7*8 movq_cfi rdx, 6*8 - .if \norcx - .else + .if \save_rcx movq_cfi rcx, 5*8 .endif movq_cfi rax, 4*8 - .if \nor891011 - .else + .if \save_r891011 movq_cfi r8, 3*8 movq_cfi r9, 2*8 movq_cfi r10, 1*8 diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index 8a445a0c989e..e5ece6b6e716 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -473,7 +473,7 @@ ENTRY(system_call_after_swapgs) * and short: */ ENABLE_INTERRUPTS(CLBR_NONE) - SAVE_ARGS 8,1 + SAVE_ARGS 8,0 movq %rax,ORIG_RAX-ARGOFFSET(%rsp) movq %rcx,RIP-ARGOFFSET(%rsp) CFI_REL_OFFSET rip,RIP-ARGOFFSET -- cgit v1.2.3 From 838feb47549a9b73534c6c1d7da4a9639a0750f4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@alien8.de> Date: Tue, 31 May 2011 22:21:53 +0200 Subject: x86, asm: Flip RESTORE_ARGS arguments logic ... thus getting rid of the "else" part of the conditional statement in the macro. No functionality change. Signed-off-by: Borislav Petkov <bp@alien8.de> Link: http://lkml.kernel.org/r/1306873314-32523-4-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/ia32/ia32entry.S | 4 ++-- arch/x86/include/asm/calling.h | 21 ++++++++------------- arch/x86/kernel/entry_64.S | 4 ++-- 3 files changed, 12 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/ia32/ia32entry.S b/arch/x86/ia32/ia32entry.S index c5435dcea15c..a0e866d233ee 100644 --- a/arch/x86/ia32/ia32entry.S +++ b/arch/x86/ia32/ia32entry.S @@ -173,7 +173,7 @@ sysexit_from_sys_call: andl $~0x200,EFLAGS-R11(%rsp) movl RIP-R11(%rsp),%edx /* User %eip */ CFI_REGISTER rip,rdx - RESTORE_ARGS 1,24,1,1,1,1 + RESTORE_ARGS 0,24,0,0,0,0 xorq %r8,%r8 xorq %r9,%r9 xorq %r10,%r10 @@ -328,7 +328,7 @@ cstar_dispatch: jnz sysretl_audit sysretl_from_sys_call: andl $~TS_COMPAT,TI_status(%r10) - RESTORE_ARGS 1,-ARG_SKIP,1,1,1 + RESTORE_ARGS 0,-ARG_SKIP,0,0,0 movl RIP-ARGOFFSET(%rsp),%ecx CFI_REGISTER rip,rcx movl EFLAGS-ARGOFFSET(%rsp),%r11d diff --git a/arch/x86/include/asm/calling.h b/arch/x86/include/asm/calling.h index b0b7d90d3054..a9e3a740f697 100644 --- a/arch/x86/include/asm/calling.h +++ b/arch/x86/include/asm/calling.h @@ -109,32 +109,27 @@ For 32-bit we have the following conventions - kernel is built with #define ARG_SKIP (9*8) - .macro RESTORE_ARGS skiprax=0, addskip=0, skiprcx=0, skipr11=0, \ - skipr8910=0, skiprdx=0 - .if \skipr11 - .else + .macro RESTORE_ARGS rstor_rax=1, addskip=0, rstor_rcx=1, rstor_r11=1, \ + rstor_r8910=1, rstor_rdx=1 + .if \rstor_r11 movq_cfi_restore 0*8, r11 .endif - .if \skipr8910 - .else + .if \rstor_r8910 movq_cfi_restore 1*8, r10 movq_cfi_restore 2*8, r9 movq_cfi_restore 3*8, r8 .endif - .if \skiprax - .else + .if \rstor_rax movq_cfi_restore 4*8, rax .endif - .if \skiprcx - .else + .if \rstor_rcx movq_cfi_restore 5*8, rcx .endif - .if \skiprdx - .else + .if \rstor_rdx movq_cfi_restore 6*8, rdx .endif @@ -193,7 +188,7 @@ For 32-bit we have the following conventions - kernel is built with .macro RESTORE_ALL addskip=0 RESTORE_REST - RESTORE_ARGS 0, \addskip + RESTORE_ARGS 1, \addskip .endm .macro icebp diff --git a/arch/x86/kernel/entry_64.S b/arch/x86/kernel/entry_64.S index e5ece6b6e716..0412bcbe171c 100644 --- a/arch/x86/kernel/entry_64.S +++ b/arch/x86/kernel/entry_64.S @@ -508,7 +508,7 @@ sysret_check: TRACE_IRQS_ON movq RIP-ARGOFFSET(%rsp),%rcx CFI_REGISTER rip,rcx - RESTORE_ARGS 0,-ARG_SKIP,1 + RESTORE_ARGS 1,-ARG_SKIP,0 /*CFI_REGISTER rflags,r11*/ movq PER_CPU_VAR(old_rsp), %rsp USERGS_SYSRET64 @@ -858,7 +858,7 @@ retint_restore_args: /* return to kernel space */ */ TRACE_IRQS_IRETQ restore_args: - RESTORE_ARGS 0,8,0 + RESTORE_ARGS 1,8,1 irq_return: INTERRUPT_RETURN -- cgit v1.2.3 From 38e6b75d3b6f4b9445eb6486e28fc716acda44ae Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@alien8.de> Date: Tue, 31 May 2011 22:21:54 +0200 Subject: x86, asm: Cleanup thunk_64.S Drop thunk_ra macro in favor of an additional argument to the thunk macro since their bodies are almost identical. Do a whitespace scrubbing and use CFI-aware macros for full annotation. Signed-off-by: Borislav Petkov <bp@alien8.de> Link: http://lkml.kernel.org/r/1306873314-32523-5-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/lib/thunk_64.S | 46 +++++++++++++++++++--------------------------- 1 file changed, 19 insertions(+), 27 deletions(-) (limited to 'arch') diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index 782b082c9ff7..d5b088b3ab81 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -5,50 +5,42 @@ * Added trace_hardirqs callers - Copyright 2007 Steven Rostedt, Red Hat, Inc. * Subject to the GNU public license, v.2. No warranty of any kind. */ +#include <linux/linkage.h> +#include <asm/dwarf2.h> +#include <asm/calling.h> +#include <asm/rwlock.h> - #include <linux/linkage.h> - #include <asm/dwarf2.h> - #include <asm/calling.h> - #include <asm/rwlock.h> - - /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ - .macro thunk name,func - .globl \name -\name: - CFI_STARTPROC - SAVE_ARGS - call \func - jmp restore - CFI_ENDPROC - .endm - -#ifdef CONFIG_TRACE_IRQFLAGS - /* put return address in rdi (arg1) */ - .macro thunk_ra name,func + /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ + .macro THUNK name, func, put_ret_addr_in_rdi=0 .globl \name \name: CFI_STARTPROC + + /* this one pushes 9 elems, the next one would be %rIP */ SAVE_ARGS - /* SAVE_ARGS pushs 9 elements */ - /* the next element would be the rip */ - movq 9*8(%rsp), %rdi + + .if \put_ret_addr_in_rdi + movq_cfi_restore 9*8, rdi + .endif + call \func jmp restore CFI_ENDPROC .endm - thunk_ra trace_hardirqs_on_thunk,trace_hardirqs_on_caller - thunk_ra trace_hardirqs_off_thunk,trace_hardirqs_off_caller +#ifdef CONFIG_TRACE_IRQFLAGS + THUNK trace_hardirqs_on_thunk,trace_hardirqs_on_caller,1 + THUNK trace_hardirqs_off_thunk,trace_hardirqs_off_caller,1 #endif #ifdef CONFIG_DEBUG_LOCK_ALLOC - thunk lockdep_sys_exit_thunk,lockdep_sys_exit + THUNK lockdep_sys_exit_thunk,lockdep_sys_exit #endif - + /* SAVE_ARGS below is used only for the .cfi directives it contains. */ CFI_STARTPROC SAVE_ARGS restore: RESTORE_ARGS - ret + ret CFI_ENDPROC -- cgit v1.2.3 From dd2897bf0f4d523238e87dabb23e9634ea9ba73d Mon Sep 17 00:00:00 2001 From: Borislav Petkov <bp@alien8.de> Date: Fri, 3 Jun 2011 22:07:22 +0200 Subject: x86, asm: Fix binutils 2.16 issue with __USER32_CS While testing the patchset at http://lkml.kernel.org/r/1306873314-32523-1-git-send-email-bp@alien8.de with binutils 2.16.1 from hell, kernel build fails with the following error: arch/x86/ia32/ia32entry.S: Assembler messages: arch/x86/ia32/ia32entry.S:139: Error: too many positional arguments make[2]: *** [arch/x86/ia32/ia32entry.o] Error 1 make[1]: *** [arch/x86/ia32] Error 2 make[1]: *** Waiting for unfinished jobs.... make: *** [arch/x86] Error 2 make: *** Waiting for unfinished jobs.... due to spaces between the operators of the __USER32_CS define. Fix it so that gas 2.16 can swallow it too. Signed-off-by: Borislav Petkov <bp@alien8.de> Link: http://lkml.kernel.org/r/1307131642-32595-1-git-send-email-bp@alien8.de Signed-off-by: H. Peter Anvin <hpa@linux.intel.com> --- arch/x86/include/asm/segment.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/include/asm/segment.h b/arch/x86/include/asm/segment.h index cd84f7208f76..5e641715c3fe 100644 --- a/arch/x86/include/asm/segment.h +++ b/arch/x86/include/asm/segment.h @@ -162,7 +162,7 @@ #define GDT_ENTRY_DEFAULT_USER32_CS 4 #define GDT_ENTRY_DEFAULT_USER_DS 5 #define GDT_ENTRY_DEFAULT_USER_CS 6 -#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS * 8 + 3) +#define __USER32_CS (GDT_ENTRY_DEFAULT_USER32_CS*8+3) #define __USER32_DS __USER_DS #define GDT_ENTRY_TSS 8 /* needs two entries */ -- cgit v1.2.3 From 4625cd637919edfb562e0d62abf94f52e9321335 Mon Sep 17 00:00:00 2001 From: Jan Beulich <JBeulich@novell.com> Date: Tue, 19 Jul 2011 12:59:51 +0100 Subject: x86: Unify rwlock assembly implementation Rather than having two functionally identical implementations for 32- and 64-bit configurations, extend the existing assembly abstractions enough to fold the two rwlock implementations into a shared one. Signed-off-by: Jan Beulich <jbeulich@novell.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/4E258DD7020000780004E3EA@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/include/asm/asm.h | 3 ++- arch/x86/include/asm/frame.h | 11 ++++++----- arch/x86/lib/Makefile | 6 ++++-- arch/x86/lib/rwlock.S | 44 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/rwlock_64.S | 38 -------------------------------------- arch/x86/lib/semaphore_32.S | 44 -------------------------------------------- 6 files changed, 56 insertions(+), 90 deletions(-) create mode 100644 arch/x86/lib/rwlock.S delete mode 100644 arch/x86/lib/rwlock_64.S (limited to 'arch') diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index b3ed1e1460ff..5890beb021c4 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -15,7 +15,8 @@ # define __ASM_SEL(a,b) __ASM_FORM(b) #endif -#define __ASM_SIZE(inst) __ASM_SEL(inst##l, inst##q) +#define __ASM_SIZE(inst, ...) __ASM_SEL(inst##l##__VA_ARGS__, \ + inst##q##__VA_ARGS__) #define __ASM_REG(reg) __ASM_SEL(e##reg, r##reg) #define _ASM_PTR __ASM_SEL(.long, .quad) diff --git a/arch/x86/include/asm/frame.h b/arch/x86/include/asm/frame.h index 2c6fc9e62812..3b629f47eb65 100644 --- a/arch/x86/include/asm/frame.h +++ b/arch/x86/include/asm/frame.h @@ -1,5 +1,6 @@ #ifdef __ASSEMBLY__ +#include <asm/asm.h> #include <asm/dwarf2.h> /* The annotation hides the frame from the unwinder and makes it look @@ -7,13 +8,13 @@ frame pointer later */ #ifdef CONFIG_FRAME_POINTER .macro FRAME - pushl_cfi %ebp - CFI_REL_OFFSET ebp,0 - movl %esp,%ebp + __ASM_SIZE(push,_cfi) %__ASM_REG(bp) + CFI_REL_OFFSET __ASM_REG(bp), 0 + __ASM_SIZE(mov) %__ASM_REG(sp), %__ASM_REG(bp) .endm .macro ENDFRAME - popl_cfi %ebp - CFI_RESTORE ebp + __ASM_SIZE(pop,_cfi) %__ASM_REG(bp) + CFI_RESTORE __ASM_REG(bp) .endm #else .macro FRAME diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index f2479f19ddde..d3ed1203cdd7 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -20,6 +20,7 @@ lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o +lib-$(CONFIG_SMP) += rwlock.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o obj-y += msr.o msr-reg.o msr-reg-export.o @@ -29,7 +30,8 @@ ifeq ($(CONFIG_X86_32),y) lib-y += atomic64_cx8_32.o lib-y += checksum_32.o lib-y += strstr_32.o - lib-y += semaphore_32.o string_32.o + lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += semaphore_32.o + lib-y += string_32.o lib-y += cmpxchg.o ifneq ($(CONFIG_X86_CMPXCHG64),y) lib-y += cmpxchg8b_emu.o atomic64_386_32.o @@ -40,7 +42,7 @@ else lib-y += csum-partial_64.o csum-copy_64.o csum-wrappers_64.o lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o - lib-y += copy_user_64.o rwlock_64.o copy_user_nocache_64.o + lib-y += copy_user_64.o copy_user_nocache_64.o lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o lib-y += cmpxchg16b_emu.o endif diff --git a/arch/x86/lib/rwlock.S b/arch/x86/lib/rwlock.S new file mode 100644 index 000000000000..fca17829caa8 --- /dev/null +++ b/arch/x86/lib/rwlock.S @@ -0,0 +1,44 @@ +/* Slow paths of read/write spinlocks. */ + +#include <linux/linkage.h> +#include <asm/alternative-asm.h> +#include <asm/frame.h> +#include <asm/rwlock.h> + +#ifdef CONFIG_X86_32 +# define __lock_ptr eax +#else +# define __lock_ptr rdi +#endif + +ENTRY(__write_lock_failed) + CFI_STARTPROC + FRAME +0: LOCK_PREFIX + addl $RW_LOCK_BIAS, (%__lock_ptr) +1: rep; nop + cmpl $RW_LOCK_BIAS, (%__lock_ptr) + jne 1b + LOCK_PREFIX + subl $RW_LOCK_BIAS, (%__lock_ptr) + jnz 0b + ENDFRAME + ret + CFI_ENDPROC +END(__write_lock_failed) + +ENTRY(__read_lock_failed) + CFI_STARTPROC + FRAME +0: LOCK_PREFIX + incl (%__lock_ptr) +1: rep; nop + cmpl $1, (%__lock_ptr) + js 1b + LOCK_PREFIX + decl (%__lock_ptr) + js 0b + ENDFRAME + ret + CFI_ENDPROC +END(__read_lock_failed) diff --git a/arch/x86/lib/rwlock_64.S b/arch/x86/lib/rwlock_64.S deleted file mode 100644 index 05ea55f71405..000000000000 --- a/arch/x86/lib/rwlock_64.S +++ /dev/null @@ -1,38 +0,0 @@ -/* Slow paths of read/write spinlocks. */ - -#include <linux/linkage.h> -#include <asm/rwlock.h> -#include <asm/alternative-asm.h> -#include <asm/dwarf2.h> - -/* rdi: pointer to rwlock_t */ -ENTRY(__write_lock_failed) - CFI_STARTPROC - LOCK_PREFIX - addl $RW_LOCK_BIAS,(%rdi) -1: rep - nop - cmpl $RW_LOCK_BIAS,(%rdi) - jne 1b - LOCK_PREFIX - subl $RW_LOCK_BIAS,(%rdi) - jnz __write_lock_failed - ret - CFI_ENDPROC -END(__write_lock_failed) - -/* rdi: pointer to rwlock_t */ -ENTRY(__read_lock_failed) - CFI_STARTPROC - LOCK_PREFIX - incl (%rdi) -1: rep - nop - cmpl $1,(%rdi) - js 1b - LOCK_PREFIX - decl (%rdi) - js __read_lock_failed - ret - CFI_ENDPROC -END(__read_lock_failed) diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S index 06691daa4108..65b591d778b1 100644 --- a/arch/x86/lib/semaphore_32.S +++ b/arch/x86/lib/semaphore_32.S @@ -14,8 +14,6 @@ */ #include <linux/linkage.h> -#include <asm/rwlock.h> -#include <asm/alternative-asm.h> #include <asm/frame.h> #include <asm/dwarf2.h> @@ -31,46 +29,6 @@ */ .section .sched.text, "ax" -/* - * rw spinlock fallbacks - */ -#ifdef CONFIG_SMP -ENTRY(__write_lock_failed) - CFI_STARTPROC - FRAME -2: LOCK_PREFIX - addl $ RW_LOCK_BIAS,(%eax) -1: rep; nop - cmpl $ RW_LOCK_BIAS,(%eax) - jne 1b - LOCK_PREFIX - subl $ RW_LOCK_BIAS,(%eax) - jnz 2b - ENDFRAME - ret - CFI_ENDPROC - ENDPROC(__write_lock_failed) - -ENTRY(__read_lock_failed) - CFI_STARTPROC - FRAME -2: LOCK_PREFIX - incl (%eax) -1: rep; nop - cmpl $1,(%eax) - js 1b - LOCK_PREFIX - decl (%eax) - js 2b - ENDFRAME - ret - CFI_ENDPROC - ENDPROC(__read_lock_failed) - -#endif - -#ifdef CONFIG_RWSEM_XCHGADD_ALGORITHM - /* Fix up special calling conventions */ ENTRY(call_rwsem_down_read_failed) CFI_STARTPROC @@ -120,5 +78,3 @@ ENTRY(call_rwsem_downgrade_wake) ret CFI_ENDPROC ENDPROC(call_rwsem_downgrade_wake) - -#endif -- cgit v1.2.3 From a738669464a1e0d8e7b20f631120192f9cf7cfbd Mon Sep 17 00:00:00 2001 From: Jan Beulich <JBeulich@novell.com> Date: Tue, 19 Jul 2011 13:00:19 +0100 Subject: x86: Unify rwsem assembly implementation Rather than having two functionally identical implementations for 32- and 64-bit configurations, use the previously extended assembly abstractions to fold the rwsem two implementations into a shared one. Signed-off-by: Jan Beulich <jbeulich@novell.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/4E258DF3020000780004E3ED@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/um/sys-i386/Makefile | 2 +- arch/um/sys-x86_64/Makefile | 2 +- arch/x86/lib/Makefile | 3 +- arch/x86/lib/rwsem.S | 136 ++++++++++++++++++++++++++++++++++++++++++++ arch/x86/lib/rwsem_64.S | 93 ------------------------------ arch/x86/lib/semaphore_32.S | 80 -------------------------- 6 files changed, 139 insertions(+), 177 deletions(-) create mode 100644 arch/x86/lib/rwsem.S delete mode 100644 arch/x86/lib/rwsem_64.S delete mode 100644 arch/x86/lib/semaphore_32.S (limited to 'arch') diff --git a/arch/um/sys-i386/Makefile b/arch/um/sys-i386/Makefile index b1da91c1b200..15587ed9a361 100644 --- a/arch/um/sys-i386/Makefile +++ b/arch/um/sys-i386/Makefile @@ -8,7 +8,7 @@ obj-y = bug.o bugs.o checksum.o delay.o fault.o ksyms.o ldt.o ptrace.o \ obj-$(CONFIG_BINFMT_ELF) += elfcore.o -subarch-obj-y = lib/semaphore_32.o lib/string_32.o +subarch-obj-y = lib/rwsem.o lib/string_32.o subarch-obj-$(CONFIG_HIGHMEM) += mm/highmem_32.o subarch-obj-$(CONFIG_MODULES) += kernel/module.o diff --git a/arch/um/sys-x86_64/Makefile b/arch/um/sys-x86_64/Makefile index c1ea9eb04466..61fc99a42e10 100644 --- a/arch/um/sys-x86_64/Makefile +++ b/arch/um/sys-x86_64/Makefile @@ -9,7 +9,7 @@ obj-y = bug.o bugs.o delay.o fault.o ldt.o mem.o ptrace.o ptrace_user.o \ sysrq.o ksyms.o tls.o subarch-obj-y = lib/csum-partial_64.o lib/memcpy_64.o lib/thunk_64.o \ - lib/rwsem_64.o + lib/rwsem.o subarch-obj-$(CONFIG_MODULES) += kernel/module.o ldt-y = ../sys-i386/ldt.o diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index d3ed1203cdd7..5d46ddcb69c5 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -21,6 +21,7 @@ lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o lib-$(CONFIG_SMP) += rwlock.o +lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem.o lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o obj-y += msr.o msr-reg.o msr-reg-export.o @@ -30,7 +31,6 @@ ifeq ($(CONFIG_X86_32),y) lib-y += atomic64_cx8_32.o lib-y += checksum_32.o lib-y += strstr_32.o - lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += semaphore_32.o lib-y += string_32.o lib-y += cmpxchg.o ifneq ($(CONFIG_X86_CMPXCHG64),y) @@ -43,6 +43,5 @@ else lib-y += thunk_64.o clear_page_64.o copy_page_64.o lib-y += memmove_64.o memset_64.o lib-y += copy_user_64.o copy_user_nocache_64.o - lib-$(CONFIG_RWSEM_XCHGADD_ALGORITHM) += rwsem_64.o lib-y += cmpxchg16b_emu.o endif diff --git a/arch/x86/lib/rwsem.S b/arch/x86/lib/rwsem.S new file mode 100644 index 000000000000..5dff5f042468 --- /dev/null +++ b/arch/x86/lib/rwsem.S @@ -0,0 +1,136 @@ +/* + * x86 semaphore implementation. + * + * (C) Copyright 1999 Linus Torvalds + * + * Portions Copyright 1999 Red Hat, Inc. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + * + * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> + */ + +#include <linux/linkage.h> +#include <asm/alternative-asm.h> +#include <asm/dwarf2.h> + +#define __ASM_HALF_REG(reg) __ASM_SEL(reg, e##reg) +#define __ASM_HALF_SIZE(inst) __ASM_SEL(inst##w, inst##l) + +#ifdef CONFIG_X86_32 + +/* + * The semaphore operations have a special calling sequence that + * allow us to do a simpler in-line version of them. These routines + * need to convert that sequence back into the C sequence when + * there is contention on the semaphore. + * + * %eax contains the semaphore pointer on entry. Save the C-clobbered + * registers (%eax, %edx and %ecx) except %eax whish is either a return + * value or just clobbered.. + */ + +#define save_common_regs \ + pushl_cfi %ecx; CFI_REL_OFFSET ecx, 0 + +#define restore_common_regs \ + popl_cfi %ecx; CFI_RESTORE ecx + + /* Avoid uglifying the argument copying x86-64 needs to do. */ + .macro movq src, dst + .endm + +#else + +/* + * x86-64 rwsem wrappers + * + * This interfaces the inline asm code to the slow-path + * C routines. We need to save the call-clobbered regs + * that the asm does not mark as clobbered, and move the + * argument from %rax to %rdi. + * + * NOTE! We don't need to save %rax, because the functions + * will always return the semaphore pointer in %rax (which + * is also the input argument to these helpers) + * + * The following can clobber %rdx because the asm clobbers it: + * call_rwsem_down_write_failed + * call_rwsem_wake + * but %rdi, %rsi, %rcx, %r8-r11 always need saving. + */ + +#define save_common_regs \ + pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ + pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ + pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ + pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ + pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ + pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ + pushq_cfi %r11; CFI_REL_OFFSET r11, 0 + +#define restore_common_regs \ + popq_cfi %r11; CFI_RESTORE r11; \ + popq_cfi %r10; CFI_RESTORE r10; \ + popq_cfi %r9; CFI_RESTORE r9; \ + popq_cfi %r8; CFI_RESTORE r8; \ + popq_cfi %rcx; CFI_RESTORE rcx; \ + popq_cfi %rsi; CFI_RESTORE rsi; \ + popq_cfi %rdi; CFI_RESTORE rdi + +#endif + +/* Fix up special calling conventions */ +ENTRY(call_rwsem_down_read_failed) + CFI_STARTPROC + save_common_regs + __ASM_SIZE(push,_cfi) %__ASM_REG(dx) + CFI_REL_OFFSET __ASM_REG(dx), 0 + movq %rax,%rdi + call rwsem_down_read_failed + __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) + CFI_RESTORE __ASM_REG(dx) + restore_common_regs + ret + CFI_ENDPROC +ENDPROC(call_rwsem_down_read_failed) + +ENTRY(call_rwsem_down_write_failed) + CFI_STARTPROC + save_common_regs + movq %rax,%rdi + call rwsem_down_write_failed + restore_common_regs + ret + CFI_ENDPROC +ENDPROC(call_rwsem_down_write_failed) + +ENTRY(call_rwsem_wake) + CFI_STARTPROC + /* do nothing if still outstanding active readers */ + __ASM_HALF_SIZE(dec) %__ASM_HALF_REG(dx) + jnz 1f + save_common_regs + movq %rax,%rdi + call rwsem_wake + restore_common_regs +1: ret + CFI_ENDPROC +ENDPROC(call_rwsem_wake) + +ENTRY(call_rwsem_downgrade_wake) + CFI_STARTPROC + save_common_regs + __ASM_SIZE(push,_cfi) %__ASM_REG(dx) + CFI_REL_OFFSET __ASM_REG(dx), 0 + movq %rax,%rdi + call rwsem_downgrade_wake + __ASM_SIZE(pop,_cfi) %__ASM_REG(dx) + CFI_RESTORE __ASM_REG(dx) + restore_common_regs + ret + CFI_ENDPROC +ENDPROC(call_rwsem_downgrade_wake) diff --git a/arch/x86/lib/rwsem_64.S b/arch/x86/lib/rwsem_64.S deleted file mode 100644 index 67743977398b..000000000000 --- a/arch/x86/lib/rwsem_64.S +++ /dev/null @@ -1,93 +0,0 @@ -/* - * x86-64 rwsem wrappers - * - * This interfaces the inline asm code to the slow-path - * C routines. We need to save the call-clobbered regs - * that the asm does not mark as clobbered, and move the - * argument from %rax to %rdi. - * - * NOTE! We don't need to save %rax, because the functions - * will always return the semaphore pointer in %rax (which - * is also the input argument to these helpers) - * - * The following can clobber %rdx because the asm clobbers it: - * call_rwsem_down_write_failed - * call_rwsem_wake - * but %rdi, %rsi, %rcx, %r8-r11 always need saving. - */ - -#include <linux/linkage.h> -#include <asm/rwlock.h> -#include <asm/alternative-asm.h> -#include <asm/frame.h> -#include <asm/dwarf2.h> - -#define save_common_regs \ - pushq_cfi %rdi; CFI_REL_OFFSET rdi, 0; \ - pushq_cfi %rsi; CFI_REL_OFFSET rsi, 0; \ - pushq_cfi %rcx; CFI_REL_OFFSET rcx, 0; \ - pushq_cfi %r8; CFI_REL_OFFSET r8, 0; \ - pushq_cfi %r9; CFI_REL_OFFSET r9, 0; \ - pushq_cfi %r10; CFI_REL_OFFSET r10, 0; \ - pushq_cfi %r11; CFI_REL_OFFSET r11, 0 - -#define restore_common_regs \ - popq_cfi %r11; CFI_RESTORE r11; \ - popq_cfi %r10; CFI_RESTORE r10; \ - popq_cfi %r9; CFI_RESTORE r9; \ - popq_cfi %r8; CFI_RESTORE r8; \ - popq_cfi %rcx; CFI_RESTORE rcx; \ - popq_cfi %rsi; CFI_RESTORE rsi; \ - popq_cfi %rdi; CFI_RESTORE rdi - -/* Fix up special calling conventions */ -ENTRY(call_rwsem_down_read_failed) - CFI_STARTPROC - save_common_regs - pushq_cfi %rdx - CFI_REL_OFFSET rdx, 0 - movq %rax,%rdi - call rwsem_down_read_failed - popq_cfi %rdx - CFI_RESTORE rdx - restore_common_regs - ret - CFI_ENDPROC -ENDPROC(call_rwsem_down_read_failed) - -ENTRY(call_rwsem_down_write_failed) - CFI_STARTPROC - save_common_regs - movq %rax,%rdi - call rwsem_down_write_failed - restore_common_regs - ret - CFI_ENDPROC -ENDPROC(call_rwsem_down_write_failed) - -ENTRY(call_rwsem_wake) - CFI_STARTPROC - decl %edx /* do nothing if still outstanding active readers */ - jnz 1f - save_common_regs - movq %rax,%rdi - call rwsem_wake - restore_common_regs -1: ret - CFI_ENDPROC -ENDPROC(call_rwsem_wake) - -/* Fix up special calling conventions */ -ENTRY(call_rwsem_downgrade_wake) - CFI_STARTPROC - save_common_regs - pushq_cfi %rdx - CFI_REL_OFFSET rdx, 0 - movq %rax,%rdi - call rwsem_downgrade_wake - popq_cfi %rdx - CFI_RESTORE rdx - restore_common_regs - ret - CFI_ENDPROC -ENDPROC(call_rwsem_downgrade_wake) diff --git a/arch/x86/lib/semaphore_32.S b/arch/x86/lib/semaphore_32.S deleted file mode 100644 index 65b591d778b1..000000000000 --- a/arch/x86/lib/semaphore_32.S +++ /dev/null @@ -1,80 +0,0 @@ -/* - * i386 semaphore implementation. - * - * (C) Copyright 1999 Linus Torvalds - * - * Portions Copyright 1999 Red Hat, Inc. - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - * rw semaphores implemented November 1999 by Benjamin LaHaise <bcrl@kvack.org> - */ - -#include <linux/linkage.h> -#include <asm/frame.h> -#include <asm/dwarf2.h> - -/* - * The semaphore operations have a special calling sequence that - * allow us to do a simpler in-line version of them. These routines - * need to convert that sequence back into the C sequence when - * there is contention on the semaphore. - * - * %eax contains the semaphore pointer on entry. Save the C-clobbered - * registers (%eax, %edx and %ecx) except %eax whish is either a return - * value or just clobbered.. - */ - .section .sched.text, "ax" - -/* Fix up special calling conventions */ -ENTRY(call_rwsem_down_read_failed) - CFI_STARTPROC - pushl_cfi %ecx - CFI_REL_OFFSET ecx,0 - pushl_cfi %edx - CFI_REL_OFFSET edx,0 - call rwsem_down_read_failed - popl_cfi %edx - popl_cfi %ecx - ret - CFI_ENDPROC - ENDPROC(call_rwsem_down_read_failed) - -ENTRY(call_rwsem_down_write_failed) - CFI_STARTPROC - pushl_cfi %ecx - CFI_REL_OFFSET ecx,0 - calll rwsem_down_write_failed - popl_cfi %ecx - ret - CFI_ENDPROC - ENDPROC(call_rwsem_down_write_failed) - -ENTRY(call_rwsem_wake) - CFI_STARTPROC - decw %dx /* do nothing if still outstanding active readers */ - jnz 1f - pushl_cfi %ecx - CFI_REL_OFFSET ecx,0 - call rwsem_wake - popl_cfi %ecx -1: ret - CFI_ENDPROC - ENDPROC(call_rwsem_wake) - -/* Fix up special calling conventions */ -ENTRY(call_rwsem_downgrade_wake) - CFI_STARTPROC - pushl_cfi %ecx - CFI_REL_OFFSET ecx,0 - pushl_cfi %edx - CFI_REL_OFFSET edx,0 - call rwsem_downgrade_wake - popl_cfi %edx - popl_cfi %ecx - ret - CFI_ENDPROC - ENDPROC(call_rwsem_downgrade_wake) -- cgit v1.2.3 From a750036f35cda160ef77408ec92c3dc41f8feebb Mon Sep 17 00:00:00 2001 From: Jan Beulich <JBeulich@novell.com> Date: Tue, 19 Jul 2011 13:00:45 +0100 Subject: x86: Fix write lock scalability 64-bit issue With the write lock path simply subtracting RW_LOCK_BIAS there is, on large systems, the theoretical possibility of overflowing the 32-bit value that was used so far (namely if 128 or more CPUs manage to do the subtraction, but don't get to do the inverse addition in the failure path quickly enough). A first measure is to modify RW_LOCK_BIAS itself - with the new value chosen, it is good for up to 2048 CPUs each allowed to nest over 2048 times on the read path without causing an issue. Quite possibly it would even be sufficient to adjust the bias a little further, assuming that allowing for significantly less nesting would suffice. However, as the original value chosen allowed for even more nesting levels, to support more than 2048 CPUs (possible currently only for 64-bit kernels) the lock itself gets widened to 64 bits. Signed-off-by: Jan Beulich <jbeulich@novell.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Andrew Morton <akpm@linux-foundation.org> Cc: Peter Zijlstra <a.p.zijlstra@chello.nl> Link: http://lkml.kernel.org/r/4E258E0D020000780004E3F0@nat28.tlf.novell.com Signed-off-by: Ingo Molnar <mingo@elte.hu> --- arch/x86/include/asm/asm.h | 2 ++ arch/x86/include/asm/rwlock.h | 43 ++++++++++++++++++++++++++++++++++- arch/x86/include/asm/spinlock.h | 37 ++++++++++++++++++------------ arch/x86/include/asm/spinlock_types.h | 6 +---- arch/x86/lib/rwlock.S | 12 +++++----- arch/x86/lib/thunk_64.S | 1 - 6 files changed, 73 insertions(+), 28 deletions(-) (limited to 'arch') diff --git a/arch/x86/include/asm/asm.h b/arch/x86/include/asm/asm.h index 5890beb021c4..9412d6558c88 100644 --- a/arch/x86/include/asm/asm.h +++ b/arch/x86/include/asm/asm.h @@ -3,9 +3,11 @@ #ifdef __ASSEMBLY__ # define __ASM_FORM(x) x +# define __ASM_FORM_COMMA(x) x, # define __ASM_EX_SEC .section __ex_table, "a" #else # define __ASM_FORM(x) " " #x " " +# define __ASM_FORM_COMMA(x) " " #x "," # define __ASM_EX_SEC " .section __ex_table,\"a\"\n" #endif diff --git a/arch/x86/include/asm/rwlock.h b/arch/x86/include/asm/rwlock.h index 6a8c0d645108..a5370a03d90c 100644 --- a/arch/x86/include/asm/rwlock.h +++ b/arch/x86/include/asm/rwlock.h @@ -1,7 +1,48 @@ #ifndef _ASM_X86_RWLOCK_H #define _ASM_X86_RWLOCK_H -#define RW_LOCK_BIAS 0x01000000 +#include <asm/asm.h> + +#if CONFIG_NR_CPUS <= 2048 + +#ifndef __ASSEMBLY__ +typedef union { + s32 lock; + s32 write; +} arch_rwlock_t; +#endif + +#define RW_LOCK_BIAS 0x00100000 +#define READ_LOCK_SIZE(insn) __ASM_FORM(insn##l) +#define READ_LOCK_ATOMIC(n) atomic_##n +#define WRITE_LOCK_ADD(n) __ASM_FORM_COMMA(addl n) +#define WRITE_LOCK_SUB(n) __ASM_FORM_COMMA(subl n) +#define WRITE_LOCK_CMP RW_LOCK_BIAS + +#else /* CONFIG_NR_CPUS > 2048 */ + +#include <linux/const.h> + +#ifndef __ASSEMBLY__ +typedef union { + s64 lock; + struct { + u32 read; + s32 write; + }; +} arch_rwlock_t; +#endif + +#define RW_LOCK_BIAS (_AC(1,L) << 32) +#define READ_LOCK_SIZE(insn) __ASM_FORM(insn##q) +#define READ_LOCK_ATOMIC(n) atomic64_##n +#define WRITE_LOCK_ADD(n) __ASM_FORM(incl) +#define WRITE_LOCK_SUB(n) __ASM_FORM(decl) +#define WRITE_LOCK_CMP 1 + +#endif /* CONFIG_NR_CPUS */ + +#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } /* Actual code is in asm/spinlock.h or in arch/x86/lib/rwlock.S */ diff --git a/arch/x86/include/asm/spinlock.h b/arch/x86/include/asm/spinlock.h index 3089f70c0c52..e9e51f710e6c 100644 --- a/arch/x86/include/asm/spinlock.h +++ b/arch/x86/include/asm/spinlock.h @@ -2,7 +2,6 @@ #define _ASM_X86_SPINLOCK_H #include <asm/atomic.h> -#include <asm/rwlock.h> #include <asm/page.h> #include <asm/processor.h> #include <linux/compiler.h> @@ -234,7 +233,7 @@ static inline void arch_spin_unlock_wait(arch_spinlock_t *lock) */ static inline int arch_read_can_lock(arch_rwlock_t *lock) { - return (int)(lock)->lock > 0; + return lock->lock > 0; } /** @@ -243,12 +242,12 @@ static inline int arch_read_can_lock(arch_rwlock_t *lock) */ static inline int arch_write_can_lock(arch_rwlock_t *lock) { - return (lock)->lock == RW_LOCK_BIAS; + return lock->write == WRITE_LOCK_CMP; } static inline void arch_read_lock(arch_rwlock_t *rw) { - asm volatile(LOCK_PREFIX " subl $1,(%0)\n\t" + asm volatile(LOCK_PREFIX READ_LOCK_SIZE(dec) " (%0)\n\t" "jns 1f\n" "call __read_lock_failed\n\t" "1:\n" @@ -257,47 +256,55 @@ static inline void arch_read_lock(arch_rwlock_t *rw) static inline void arch_write_lock(arch_rwlock_t *rw) { - asm volatile(LOCK_PREFIX " subl %1,(%0)\n\t" + asm volatile(LOCK_PREFIX WRITE_LOCK_SUB(%1) "(%0)\n\t" "jz 1f\n" "call __write_lock_failed\n\t" "1:\n" - ::LOCK_PTR_REG (rw), "i" (RW_LOCK_BIAS) : "memory"); + ::LOCK_PTR_REG (&rw->write), "i" (RW_LOCK_BIAS) + : "memory"); } static inline int arch_read_trylock(arch_rwlock_t *lock) { - atomic_t *count = (atomic_t *)lock; + READ_LOCK_ATOMIC(t) *count = (READ_LOCK_ATOMIC(t) *)lock; - if (atomic_dec_return(count) >= 0) + if (READ_LOCK_ATOMIC(dec_return)(count) >= 0) return 1; - atomic_inc(count); + READ_LOCK_ATOMIC(inc)(count); return 0; } static inline int arch_write_trylock(arch_rwlock_t *lock) { - atomic_t *count = (atomic_t *)lock; + atomic_t *count = (atomic_t *)&lock->write; - if (atomic_sub_and_test(RW_LOCK_BIAS, count)) + if (atomic_sub_and_test(WRITE_LOCK_CMP, count)) return 1; - atomic_add(RW_LOCK_BIAS, count); + atomic_add(WRITE_LOCK_CMP, count); return 0; } static inline void arch_read_unlock(arch_rwlock_t *rw) { - asm volatile(LOCK_PREFIX "incl %0" :"+m" (rw->lock) : : "memory"); + asm volatile(LOCK_PREFIX READ_LOCK_SIZE(inc) " %0" + :"+m" (rw->lock) : : "memory"); } static inline void arch_write_unlock(arch_rwlock_t *rw) { - asm volatile(LOCK_PREFIX "addl %1, %0" - : "+m" (rw->lock) : "i" (RW_LOCK_BIAS) : "memory"); + asm volatile(LOCK_PREFIX WRITE_LOCK_ADD(%1) "%0" + : "+m" (rw->write) : "i" (RW_LOCK_BIAS) : "memory"); } #define arch_read_lock_flags(lock, flags) arch_read_lock(lock) #define arch_write_lock_flags(lock, flags) arch_write_lock(lock) +#undef READ_LOCK_SIZE +#undef READ_LOCK_ATOMIC +#undef WRITE_LOCK_ADD +#undef WRITE_LOCK_SUB +#undef WRITE_LOCK_CMP + #define arch_spin_relax(lock) cpu_relax() #define arch_read_relax(lock) cpu_relax() #define arch_write_relax(lock) cpu_relax() diff --git a/arch/x86/include/asm/spinlock_types.h b/arch/x86/include/asm/spinlock_types.h index dcb48b2edc11..7c7a486fcb68 100644 --- a/arch/x86/include/asm/spinlock_types.h +++ b/arch/x86/include/asm/spinlock_types.h @@ -11,10 +11,6 @@ typedef struct arch_spinlock { #define __ARCH_SPIN_LOCK_UNLOCKED { 0 } -typedef struct { - unsigned int lock; -} arch_rwlock_t; - -#define __ARCH_RW_LOCK_UNLOCKED { RW_LOCK_BIAS } +#include <asm/rwlock.h> #endif /* _ASM_X86_SPINLOCK_TYPES_H */ diff --git a/arch/x86/lib/rwlock.S b/arch/x86/lib/rwlock.S index fca17829caa8..1cad22139c88 100644 --- a/arch/x86/lib/rwlock.S +++ b/arch/x86/lib/rwlock.S @@ -15,12 +15,12 @@ ENTRY(__write_lock_failed) CFI_STARTPROC FRAME 0: LOCK_PREFIX - addl $RW_LOCK_BIAS, (%__lock_ptr) + WRITE_LOCK_ADD($RW_LOCK_BIAS) (%__lock_ptr) 1: rep; nop - cmpl $RW_LOCK_BIAS, (%__lock_ptr) + cmpl $WRITE_LOCK_CMP, (%__lock_ptr) jne 1b LOCK_PREFIX - subl $RW_LOCK_BIAS, (%__lock_ptr) + WRITE_LOCK_SUB($RW_LOCK_BIAS) (%__lock_ptr) jnz 0b ENDFRAME ret @@ -31,12 +31,12 @@ ENTRY(__read_lock_failed) CFI_STARTPROC FRAME 0: LOCK_PREFIX - incl (%__lock_ptr) + READ_LOCK_SIZE(inc) (%__lock_ptr) 1: rep; nop - cmpl $1, (%__lock_ptr) + READ_LOCK_SIZE(cmp) $1, (%__lock_ptr) js 1b LOCK_PREFIX - decl (%__lock_ptr) + READ_LOCK_SIZE(dec) (%__lock_ptr) js 0b ENDFRAME ret diff --git a/arch/x86/lib/thunk_64.S b/arch/x86/lib/thunk_64.S index d5b088b3ab81..a63efd6bb6a5 100644 --- a/arch/x86/lib/thunk_64.S +++ b/arch/x86/lib/thunk_64.S @@ -8,7 +8,6 @@ #include <linux/linkage.h> #include <asm/dwarf2.h> #include <asm/calling.h> -#include <asm/rwlock.h> /* rdi: arg1 ... normal C conventions. rax is saved/restored. */ .macro THUNK name, func, put_ret_addr_in_rdi=0 -- cgit v1.2.3