diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-12 16:24:13 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-12 16:24:13 -0700 |
commit | c90578360c92c71189308ebc71087197080e94c3 (patch) | |
tree | 15cccf727f6fe35ffd81922461996c1c2ca1ebfd /arch/x86/lib | |
parent | 50d228345a03c882dfe11928ab41b42458b3f922 (diff) | |
parent | 70d65cd555c5e43c613700f604a47f7ebcf7b6f1 (diff) | |
download | linux-c90578360c92c71189308ebc71087197080e94c3.tar.bz2 |
Merge branch 'work.csum_and_copy' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull copy_and_csum cleanups from Al Viro:
"Saner calling conventions for csum_and_copy_..._user() and friends"
[ Removing 800+ lines of code and cleaning stuff up is good - Linus ]
* 'work.csum_and_copy' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
ppc: propagate the calling conventions change down to csum_partial_copy_generic()
amd64: switch csum_partial_copy_generic() to new calling conventions
sparc64: propagate the calling convention changes down to __csum_partial_copy_...()
xtensa: propagate the calling conventions change down into csum_partial_copy_generic()
mips: propagate the calling convention change down into __csum_partial_copy_..._user()
mips: __csum_partial_copy_kernel() has no users left
mips: csum_and_copy_{to,from}_user() are never called under KERNEL_DS
sparc32: propagate the calling conventions change down to __csum_partial_copy_sparc_generic()
i386: propagate the calling conventions change down to csum_partial_copy_generic()
sh: propage the calling conventions change down to csum_partial_copy_generic()
m68k: get rid of zeroing destination on error in csum_and_copy_from_user()
arm: propagate the calling convention changes down to csum_partial_copy_from_user()
alpha: propagate the calling convention changes down to csum_partial_copy.c helpers
saner calling conventions for csum_and_copy_..._user()
csum_and_copy_..._user(): pass 0xffffffff instead of 0 as initial sum
csum_partial_copy_nocheck(): drop the last argument
unify generic instances of csum_partial_copy_nocheck()
icmp_push_reply(): reorder adding the checksum up
skb_copy_and_csum_bits(): don't bother with the last argument
Diffstat (limited to 'arch/x86/lib')
-rw-r--r-- | arch/x86/lib/checksum_32.S | 117 | ||||
-rw-r--r-- | arch/x86/lib/csum-copy_64.S | 140 | ||||
-rw-r--r-- | arch/x86/lib/csum-wrappers_64.c | 86 |
3 files changed, 138 insertions, 205 deletions
diff --git a/arch/x86/lib/checksum_32.S b/arch/x86/lib/checksum_32.S index d1d768912368..4304320e51f4 100644 --- a/arch/x86/lib/checksum_32.S +++ b/arch/x86/lib/checksum_32.S @@ -253,28 +253,17 @@ EXPORT_SYMBOL(csum_partial) /* unsigned int csum_partial_copy_generic (const char *src, char *dst, - int len, int sum, int *src_err_ptr, int *dst_err_ptr) + int len) */ /* * Copy from ds while checksumming, otherwise like csum_partial - * - * The macros SRC and DST specify the type of access for the instruction. - * thus we can call a custom exception handler for all access types. - * - * FIXME: could someone double-check whether I haven't mixed up some SRC and - * DST definitions? It's damn hard to trigger all cases. I hope I got - * them all but there's no guarantee. */ -#define SRC(y...) \ +#define EXC(y...) \ 9999: y; \ _ASM_EXTABLE_UA(9999b, 6001f) -#define DST(y...) \ - 9999: y; \ - _ASM_EXTABLE_UA(9999b, 6002f) - #ifndef CONFIG_X86_USE_PPRO_CHECKSUM #define ARGBASE 16 @@ -285,20 +274,20 @@ SYM_FUNC_START(csum_partial_copy_generic) pushl %edi pushl %esi pushl %ebx - movl ARGBASE+16(%esp),%eax # sum movl ARGBASE+12(%esp),%ecx # len movl ARGBASE+4(%esp),%esi # src movl ARGBASE+8(%esp),%edi # dst + movl $-1, %eax # sum testl $2, %edi # Check alignment. jz 2f # Jump if alignment is ok. subl $2, %ecx # Alignment uses up two bytes. jae 1f # Jump if we had at least two bytes. addl $2, %ecx # ecx was < 2. Deal with it. jmp 4f -SRC(1: movw (%esi), %bx ) +EXC(1: movw (%esi), %bx ) addl $2, %esi -DST( movw %bx, (%edi) ) +EXC( movw %bx, (%edi) ) addl $2, %edi addw %bx, %ax adcl $0, %eax @@ -306,34 +295,34 @@ DST( movw %bx, (%edi) ) movl %ecx, FP(%esp) shrl $5, %ecx jz 2f - testl %esi, %esi -SRC(1: movl (%esi), %ebx ) -SRC( movl 4(%esi), %edx ) + testl %esi, %esi # what's wrong with clc? +EXC(1: movl (%esi), %ebx ) +EXC( movl 4(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +EXC( movl %ebx, (%edi) ) adcl %edx, %eax -DST( movl %edx, 4(%edi) ) +EXC( movl %edx, 4(%edi) ) -SRC( movl 8(%esi), %ebx ) -SRC( movl 12(%esi), %edx ) +EXC( movl 8(%esi), %ebx ) +EXC( movl 12(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 8(%edi) ) +EXC( movl %ebx, 8(%edi) ) adcl %edx, %eax -DST( movl %edx, 12(%edi) ) +EXC( movl %edx, 12(%edi) ) -SRC( movl 16(%esi), %ebx ) -SRC( movl 20(%esi), %edx ) +EXC( movl 16(%esi), %ebx ) +EXC( movl 20(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 16(%edi) ) +EXC( movl %ebx, 16(%edi) ) adcl %edx, %eax -DST( movl %edx, 20(%edi) ) +EXC( movl %edx, 20(%edi) ) -SRC( movl 24(%esi), %ebx ) -SRC( movl 28(%esi), %edx ) +EXC( movl 24(%esi), %ebx ) +EXC( movl 28(%esi), %edx ) adcl %ebx, %eax -DST( movl %ebx, 24(%edi) ) +EXC( movl %ebx, 24(%edi) ) adcl %edx, %eax -DST( movl %edx, 28(%edi) ) +EXC( movl %edx, 28(%edi) ) lea 32(%esi), %esi lea 32(%edi), %edi @@ -345,9 +334,9 @@ DST( movl %edx, 28(%edi) ) andl $0x1c, %edx je 4f shrl $2, %edx # This clears CF -SRC(3: movl (%esi), %ebx ) +EXC(3: movl (%esi), %ebx ) adcl %ebx, %eax -DST( movl %ebx, (%edi) ) +EXC( movl %ebx, (%edi) ) lea 4(%esi), %esi lea 4(%edi), %edi dec %edx @@ -357,39 +346,24 @@ DST( movl %ebx, (%edi) ) jz 7f cmpl $2, %ecx jb 5f -SRC( movw (%esi), %cx ) +EXC( movw (%esi), %cx ) leal 2(%esi), %esi -DST( movw %cx, (%edi) ) +EXC( movw %cx, (%edi) ) leal 2(%edi), %edi je 6f shll $16,%ecx -SRC(5: movb (%esi), %cl ) -DST( movb %cl, (%edi) ) +EXC(5: movb (%esi), %cl ) +EXC( movb %cl, (%edi) ) 6: addl %ecx, %eax adcl $0, %eax 7: -5000: # Exception handler: .section .fixup, "ax" 6001: - movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) - - # zero the complete destination - computing the rest - # is too much work - movl ARGBASE+8(%esp), %edi # dst - movl ARGBASE+12(%esp), %ecx # len - xorl %eax,%eax - rep ; stosb - - jmp 5000b - -6002: - movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT,(%ebx) - jmp 5000b + xorl %eax, %eax + jmp 7b .previous @@ -405,14 +379,14 @@ SYM_FUNC_END(csum_partial_copy_generic) /* Version for PentiumII/PPro */ #define ROUND1(x) \ - SRC(movl x(%esi), %ebx ) ; \ + EXC(movl x(%esi), %ebx ) ; \ addl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + EXC(movl %ebx, x(%edi) ) ; #define ROUND(x) \ - SRC(movl x(%esi), %ebx ) ; \ + EXC(movl x(%esi), %ebx ) ; \ adcl %ebx, %eax ; \ - DST(movl %ebx, x(%edi) ) ; + EXC(movl %ebx, x(%edi) ) ; #define ARGBASE 12 @@ -423,7 +397,7 @@ SYM_FUNC_START(csum_partial_copy_generic) movl ARGBASE+4(%esp),%esi #src movl ARGBASE+8(%esp),%edi #dst movl ARGBASE+12(%esp),%ecx #len - movl ARGBASE+16(%esp),%eax #sum + movl $-1, %eax #sum # movl %ecx, %edx movl %ecx, %ebx movl %esi, %edx @@ -439,7 +413,7 @@ SYM_FUNC_START(csum_partial_copy_generic) JMP_NOSPEC ebx 1: addl $64,%esi addl $64,%edi - SRC(movb -32(%edx),%bl) ; SRC(movb (%edx),%bl) + EXC(movb -32(%edx),%bl) ; EXC(movb (%edx),%bl) ROUND1(-64) ROUND(-60) ROUND(-56) ROUND(-52) ROUND (-48) ROUND(-44) ROUND(-40) ROUND(-36) ROUND (-32) ROUND(-28) ROUND(-24) ROUND(-20) @@ -453,29 +427,20 @@ SYM_FUNC_START(csum_partial_copy_generic) jz 7f cmpl $2, %edx jb 5f -SRC( movw (%esi), %dx ) +EXC( movw (%esi), %dx ) leal 2(%esi), %esi -DST( movw %dx, (%edi) ) +EXC( movw %dx, (%edi) ) leal 2(%edi), %edi je 6f shll $16,%edx 5: -SRC( movb (%esi), %dl ) -DST( movb %dl, (%edi) ) +EXC( movb (%esi), %dl ) +EXC( movb %dl, (%edi) ) 6: addl %edx, %eax adcl $0, %eax 7: .section .fixup, "ax" -6001: movl ARGBASE+20(%esp), %ebx # src_err_ptr - movl $-EFAULT, (%ebx) - # zero the complete destination (computing the rest is too much work) - movl ARGBASE+8(%esp),%edi # dst - movl ARGBASE+12(%esp),%ecx # len - xorl %eax,%eax - rep; stosb - jmp 7b -6002: movl ARGBASE+24(%esp), %ebx # dst_err_ptr - movl $-EFAULT, (%ebx) +6001: xorl %eax, %eax jmp 7b .previous diff --git a/arch/x86/lib/csum-copy_64.S b/arch/x86/lib/csum-copy_64.S index 3394a8ff7fd0..1fbd8ee9642d 100644 --- a/arch/x86/lib/csum-copy_64.S +++ b/arch/x86/lib/csum-copy_64.S @@ -18,9 +18,6 @@ * rdi source * rsi destination * edx len (32bit) - * ecx sum (32bit) - * r8 src_err_ptr (int) - * r9 dst_err_ptr (int) * * Output * eax 64bit sum. undefined in case of exception. @@ -31,44 +28,32 @@ .macro source 10: - _ASM_EXTABLE_UA(10b, .Lbad_source) + _ASM_EXTABLE_UA(10b, .Lfault) .endm .macro dest 20: - _ASM_EXTABLE_UA(20b, .Lbad_dest) + _ASM_EXTABLE_UA(20b, .Lfault) .endm - /* - * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a - * potentially unmapped kernel address. - */ - .macro ignore L=.Lignore -30: - _ASM_EXTABLE(30b, \L) - .endm - - SYM_FUNC_START(csum_partial_copy_generic) - cmpl $3*64, %edx - jle .Lignore - -.Lignore: - subq $7*8, %rsp - movq %rbx, 2*8(%rsp) - movq %r12, 3*8(%rsp) - movq %r14, 4*8(%rsp) - movq %r13, 5*8(%rsp) - movq %r15, 6*8(%rsp) + subq $5*8, %rsp + movq %rbx, 0*8(%rsp) + movq %r12, 1*8(%rsp) + movq %r14, 2*8(%rsp) + movq %r13, 3*8(%rsp) + movq %r15, 4*8(%rsp) - movq %r8, (%rsp) - movq %r9, 1*8(%rsp) - - movl %ecx, %eax + movl $-1, %eax + xorl %r9d, %r9d movl %edx, %ecx + cmpl $8, %ecx + jb .Lshort - xorl %r9d, %r9d - movq %rcx, %r12 + testb $7, %sil + jne .Lunaligned +.Laligned: + movl %ecx, %r12d shrq $6, %r12 jz .Lhandle_tail /* < 64 */ @@ -99,7 +84,12 @@ SYM_FUNC_START(csum_partial_copy_generic) source movq 56(%rdi), %r13 - ignore 2f +30: + /* + * No _ASM_EXTABLE_UA; this is used for intentional prefetch on a + * potentially unmapped kernel address. + */ + _ASM_EXTABLE(30b, 2f) prefetcht0 5*64(%rdi) 2: adcq %rbx, %rax @@ -131,8 +121,6 @@ SYM_FUNC_START(csum_partial_copy_generic) dest movq %r13, 56(%rsi) -3: - leaq 64(%rdi), %rdi leaq 64(%rsi), %rsi @@ -142,8 +130,8 @@ SYM_FUNC_START(csum_partial_copy_generic) /* do last up to 56 bytes */ .Lhandle_tail: - /* ecx: count */ - movl %ecx, %r10d + /* ecx: count, rcx.63: the end result needs to be rol8 */ + movq %rcx, %r10 andl $63, %ecx shrl $3, %ecx jz .Lfold @@ -172,6 +160,7 @@ SYM_FUNC_START(csum_partial_copy_generic) .Lhandle_7: movl %r10d, %ecx andl $7, %ecx +.L1: /* .Lshort rejoins the common path here */ shrl $1, %ecx jz .Lhandle_1 movl $2, %edx @@ -203,26 +192,65 @@ SYM_FUNC_START(csum_partial_copy_generic) adcl %r9d, %eax /* carry */ .Lende: - movq 2*8(%rsp), %rbx - movq 3*8(%rsp), %r12 - movq 4*8(%rsp), %r14 - movq 5*8(%rsp), %r13 - movq 6*8(%rsp), %r15 - addq $7*8, %rsp + testq %r10, %r10 + js .Lwas_odd +.Lout: + movq 0*8(%rsp), %rbx + movq 1*8(%rsp), %r12 + movq 2*8(%rsp), %r14 + movq 3*8(%rsp), %r13 + movq 4*8(%rsp), %r15 + addq $5*8, %rsp ret +.Lshort: + movl %ecx, %r10d + jmp .L1 +.Lunaligned: + xorl %ebx, %ebx + testb $1, %sil + jne .Lodd +1: testb $2, %sil + je 2f + source + movw (%rdi), %bx + dest + movw %bx, (%rsi) + leaq 2(%rdi), %rdi + subq $2, %rcx + leaq 2(%rsi), %rsi + addq %rbx, %rax +2: testb $4, %sil + je .Laligned + source + movl (%rdi), %ebx + dest + movl %ebx, (%rsi) + leaq 4(%rdi), %rdi + subq $4, %rcx + leaq 4(%rsi), %rsi + addq %rbx, %rax + jmp .Laligned + +.Lodd: + source + movb (%rdi), %bl + dest + movb %bl, (%rsi) + leaq 1(%rdi), %rdi + leaq 1(%rsi), %rsi + /* decrement, set MSB */ + leaq -1(%rcx, %rcx), %rcx + rorq $1, %rcx + shll $8, %ebx + addq %rbx, %rax + jmp 1b + +.Lwas_odd: + roll $8, %eax + jmp .Lout - /* Exception handlers. Very simple, zeroing is done in the wrappers */ -.Lbad_source: - movq (%rsp), %rax - testq %rax, %rax - jz .Lende - movl $-EFAULT, (%rax) - jmp .Lende - -.Lbad_dest: - movq 8(%rsp), %rax - testq %rax, %rax - jz .Lende - movl $-EFAULT, (%rax) - jmp .Lende + /* Exception: just return 0 */ +.Lfault: + xorl %eax, %eax + jmp .Lout SYM_FUNC_END(csum_partial_copy_generic) diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c index ee63d7576fd2..189344924a2b 100644 --- a/arch/x86/lib/csum-wrappers_64.c +++ b/arch/x86/lib/csum-wrappers_64.c @@ -21,52 +21,16 @@ * src and dst are best aligned to 64bits. */ __wsum -csum_and_copy_from_user(const void __user *src, void *dst, - int len, __wsum isum, int *errp) +csum_and_copy_from_user(const void __user *src, void *dst, int len) { - might_sleep(); - *errp = 0; + __wsum sum; + might_sleep(); if (!user_access_begin(src, len)) - goto out_err; - - /* - * Why 6, not 7? To handle odd addresses aligned we - * would need to do considerable complications to fix the - * checksum which is defined as an 16bit accumulator. The - * fix alignment code is primarily for performance - * compatibility with 32bit and that will handle odd - * addresses slowly too. - */ - if (unlikely((unsigned long)src & 6)) { - while (((unsigned long)src & 6) && len >= 2) { - __u16 val16; - - unsafe_get_user(val16, (const __u16 __user *)src, out); - - *(__u16 *)dst = val16; - isum = (__force __wsum)add32_with_carry( - (__force unsigned)isum, val16); - src += 2; - dst += 2; - len -= 2; - } - } - isum = csum_partial_copy_generic((__force const void *)src, - dst, len, isum, errp, NULL); - user_access_end(); - if (unlikely(*errp)) - goto out_err; - - return isum; - -out: + return 0; + sum = csum_partial_copy_generic((__force const void *)src, dst, len); user_access_end(); -out_err: - *errp = -EFAULT; - memset(dst, 0, len); - - return isum; + return sum; } EXPORT_SYMBOL(csum_and_copy_from_user); @@ -82,40 +46,16 @@ EXPORT_SYMBOL(csum_and_copy_from_user); * src and dst are best aligned to 64bits. */ __wsum -csum_and_copy_to_user(const void *src, void __user *dst, - int len, __wsum isum, int *errp) +csum_and_copy_to_user(const void *src, void __user *dst, int len) { - __wsum ret; + __wsum sum; might_sleep(); - - if (!user_access_begin(dst, len)) { - *errp = -EFAULT; + if (!user_access_begin(dst, len)) return 0; - } - - if (unlikely((unsigned long)dst & 6)) { - while (((unsigned long)dst & 6) && len >= 2) { - __u16 val16 = *(__u16 *)src; - - isum = (__force __wsum)add32_with_carry( - (__force unsigned)isum, val16); - unsafe_put_user(val16, (__u16 __user *)dst, out); - src += 2; - dst += 2; - len -= 2; - } - } - - *errp = 0; - ret = csum_partial_copy_generic(src, (void __force *)dst, - len, isum, NULL, errp); - user_access_end(); - return ret; -out: + sum = csum_partial_copy_generic(src, (void __force *)dst, len); user_access_end(); - *errp = -EFAULT; - return isum; + return sum; } EXPORT_SYMBOL(csum_and_copy_to_user); @@ -129,9 +69,9 @@ EXPORT_SYMBOL(csum_and_copy_to_user); * Returns an 32bit unfolded checksum of the buffer. */ __wsum -csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum) +csum_partial_copy_nocheck(const void *src, void *dst, int len) { - return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL); + return csum_partial_copy_generic(src, dst, len); } EXPORT_SYMBOL(csum_partial_copy_nocheck); |