diff options
Diffstat (limited to 'arch/xtensa/lib/checksum.S')
-rw-r--r-- | arch/xtensa/lib/checksum.S | 67 |
1 files changed, 15 insertions, 52 deletions
diff --git a/arch/xtensa/lib/checksum.S b/arch/xtensa/lib/checksum.S index 4cb9ca58d9ad..cf1bed1a5bd6 100644 --- a/arch/xtensa/lib/checksum.S +++ b/arch/xtensa/lib/checksum.S @@ -175,19 +175,14 @@ ENDPROC(csum_partial) */ /* -unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, - int sum, int *src_err_ptr, int *dst_err_ptr) +unsigned int csum_partial_copy_generic (const char *src, char *dst, int len) a2 = src a3 = dst a4 = len a5 = sum - a6 = src_err_ptr - a7 = dst_err_ptr a8 = temp a9 = temp a10 = temp - a11 = original len for exception handling - a12 = original dst for exception handling This function is optimized for 4-byte aligned addresses. Other alignments work, but not nearly as efficiently. @@ -196,8 +191,7 @@ unsigned int csum_partial_copy_generic (const char *src, char *dst, int len, ENTRY(csum_partial_copy_generic) abi_entry_default - mov a12, a3 - mov a11, a4 + movi a5, -1 or a10, a2, a3 /* We optimize the following alignment tests for the 4-byte @@ -228,26 +222,26 @@ ENTRY(csum_partial_copy_generic) #endif EX(10f) l32i a9, a2, 0 EX(10f) l32i a8, a2, 4 -EX(11f) s32i a9, a3, 0 -EX(11f) s32i a8, a3, 4 +EX(10f) s32i a9, a3, 0 +EX(10f) s32i a8, a3, 4 ONES_ADD(a5, a9) ONES_ADD(a5, a8) EX(10f) l32i a9, a2, 8 EX(10f) l32i a8, a2, 12 -EX(11f) s32i a9, a3, 8 -EX(11f) s32i a8, a3, 12 +EX(10f) s32i a9, a3, 8 +EX(10f) s32i a8, a3, 12 ONES_ADD(a5, a9) ONES_ADD(a5, a8) EX(10f) l32i a9, a2, 16 EX(10f) l32i a8, a2, 20 -EX(11f) s32i a9, a3, 16 -EX(11f) s32i a8, a3, 20 +EX(10f) s32i a9, a3, 16 +EX(10f) s32i a8, a3, 20 ONES_ADD(a5, a9) ONES_ADD(a5, a8) EX(10f) l32i a9, a2, 24 EX(10f) l32i a8, a2, 28 -EX(11f) s32i a9, a3, 24 -EX(11f) s32i a8, a3, 28 +EX(10f) s32i a9, a3, 24 +EX(10f) s32i a8, a3, 28 ONES_ADD(a5, a9) ONES_ADD(a5, a8) addi a2, a2, 32 @@ -267,7 +261,7 @@ EX(11f) s32i a8, a3, 28 .Loop6: #endif EX(10f) l32i a9, a2, 0 -EX(11f) s32i a9, a3, 0 +EX(10f) s32i a9, a3, 0 ONES_ADD(a5, a9) addi a2, a2, 4 addi a3, a3, 4 @@ -298,7 +292,7 @@ EX(11f) s32i a9, a3, 0 .Loop7: #endif EX(10f) l16ui a9, a2, 0 -EX(11f) s16i a9, a3, 0 +EX(10f) s16i a9, a3, 0 ONES_ADD(a5, a9) addi a2, a2, 2 addi a3, a3, 2 @@ -309,7 +303,7 @@ EX(11f) s16i a9, a3, 0 /* This section processes a possible trailing odd byte. */ _bbci.l a4, 0, 8f /* 1-byte chunk */ EX(10f) l8ui a9, a2, 0 -EX(11f) s8i a9, a3, 0 +EX(10f) s8i a9, a3, 0 #ifdef __XTENSA_EB__ slli a9, a9, 8 /* shift byte to bits 8..15 */ #endif @@ -334,8 +328,8 @@ EX(11f) s8i a9, a3, 0 #endif EX(10f) l8ui a9, a2, 0 EX(10f) l8ui a8, a2, 1 -EX(11f) s8i a9, a3, 0 -EX(11f) s8i a8, a3, 1 +EX(10f) s8i a9, a3, 0 +EX(10f) s8i a8, a3, 1 #ifdef __XTENSA_EB__ slli a9, a9, 8 /* combine into a single 16-bit value */ #else /* for checksum computation */ @@ -356,38 +350,7 @@ ENDPROC(csum_partial_copy_generic) # Exception handler: .section .fixup, "ax" -/* - a6 = src_err_ptr - a7 = dst_err_ptr - a11 = original len for exception handling - a12 = original dst for exception handling -*/ - 10: - _movi a2, -EFAULT - s32i a2, a6, 0 /* src_err_ptr */ - - # clear the complete destination - computing the rest - # is too much work - movi a2, 0 -#if XCHAL_HAVE_LOOPS - loopgtz a11, 2f -#else - beqz a11, 2f - add a11, a11, a12 /* a11 = ending address */ -.Leloop: -#endif - s8i a2, a12, 0 - addi a12, a12, 1 -#if !XCHAL_HAVE_LOOPS - blt a12, a11, .Leloop -#endif -2: - abi_ret_default - -11: - movi a2, -EFAULT - s32i a2, a7, 0 /* dst_err_ptr */ movi a2, 0 abi_ret_default |