diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-29 08:25:00 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2018-10-29 08:25:00 -0700 |
commit | ac435075892e3e651c667b4a9f2267cf3ef1d5a2 (patch) | |
tree | ff45694dd545392cc6553af1219b2bd840974307 /arch/csky/abiv1/memcpy.S | |
parent | 9f51ae62c84a23ade0ba86457d30a30c9db0c50f (diff) | |
parent | 2347e7e1aea410865e3c3f92014b07ff7d4c5b02 (diff) | |
download | linux-ac435075892e3e651c667b4a9f2267cf3ef1d5a2.tar.bz2 |
Merge tag 'csky-for-linus-4.20' of https://github.com/c-sky/csky-linux
Pull C-SKY architecture port from Guo Ren:
"This contains the Linux port for C-SKY(csky) based on linux-4.19
Release, which has been through 10 rounds of review on mailing list.
More information:
http://en.c-sky.com
The development repo:
https://github.com/c-sky/csky-linux
ABI Documentation:
https://github.com/c-sky/csky-doc
Here is the pre-built cross compiler for fast test from our CI:
https://gitlab.com/c-sky/buildroot/-/jobs/101608095/artifacts/file/output/images/csky_toolchain_qemu_csky_ck807f_4.18_glibc_defconfig_482b221e52908be1c9b2ccb444255e1562bb7025.tar.xz
We use buildroot as our CI-test enviornment. "LTP, Lmbench ..." will
be tested for every commit. See here for more details:
https://gitlab.com/c-sky/buildroot/pipelines
We'll continouslly improve csky subsystem in future"
Arnd acks, and adds the following notes:
"I did a thorough review of the ABI, which as usual mainly consists of
spotting any files that don't use the asm-generic ABI itself, and
having it changed to it matches exactly what we do on other new
architectures.
I also looked at every other patch and commented on maybe half of them
where I saw something that did not quite seem right. Others have
reviewed specific patches in greater depth. I'm sure that one could
fine more of the minor details, but as long as they are not ABI
relevant, they can be fixed later.
The only patch that is part of the ABI and that nobody reviewed is the
signal handling. This is one of the areas I never worked on in much
detail. I did not see anything wrong with it, but I also don't know
what the problems with the other architectures are here, and we seem
to be hitting issues occasionally, and we never managed to generalize
this enough for new architectures to have a trivial implementation.
I was originally hoping that we could have the 64-bit time_t
interfaces ready in time to completely drop the 32-bit ones, but that
did not happen. We might still remove them in the next merge window
depending on whether the libc upstream people prefer to keep them or
not.
One more general comment: I think this may well be the last new CPU
architecture we ever add to the kernel. Both nds32 and c-sky are made
by companies that also work on risc-v, and generally speaking risc-v
seems to be killing off any of the minor licensable instruction set
projects, just like ARM has mostly killed off the custom
vendor-specific instruction sets already.
If we add another architecture in the future, it may instead be
something like the LLVM bitcode or WebAssembly, who knows?"
To which Geert Uytterhoeven pipes in about another architecture still in
the pipeline: Kalray MPPA.
* tag 'csky-for-linus-4.20' of https://github.com/c-sky/csky-linux: (24 commits)
dt-bindings: interrupt-controller: C-SKY APB intc
irqchip: add C-SKY APB bus interrupt controller
dt-bindings: interrupt-controller: C-SKY SMP intc
irqchip: add C-SKY SMP interrupt controller
MAINTAINERS: Add csky
dt-bindings: Add vendor prefix for csky
dt-bindings: csky CPU Bindings
csky: Misc headers
csky: SMP support
csky: Debug and Ptrace GDB
csky: User access
csky: Library functions
csky: ELF and module probe
csky: Atomic operations
csky: IRQ handling
csky: VDSO and rt_sigreturn
csky: Process management and Signal
csky: MMU and page table management
csky: Cache and TLB routines
csky: System Call
...
Diffstat (limited to 'arch/csky/abiv1/memcpy.S')
-rw-r--r-- | arch/csky/abiv1/memcpy.S | 347 |
1 files changed, 347 insertions, 0 deletions
diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S new file mode 100644 index 000000000000..5078eb5169fa --- /dev/null +++ b/arch/csky/abiv1/memcpy.S @@ -0,0 +1,347 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd. + +#include <linux/linkage.h> + +.macro GET_FRONT_BITS rx y +#ifdef __cskyLE__ + lsri \rx, \y +#else + lsli \rx, \y +#endif +.endm + +.macro GET_AFTER_BITS rx y +#ifdef __cskyLE__ + lsli \rx, \y +#else + lsri \rx, \y +#endif +.endm + +/* void *memcpy(void *dest, const void *src, size_t n); */ +ENTRY(memcpy) + mov r7, r2 + cmplti r4, 4 + bt .L_copy_by_byte + mov r6, r2 + andi r6, 3 + cmpnei r6, 0 + jbt .L_dest_not_aligned + mov r6, r3 + andi r6, 3 + cmpnei r6, 0 + jbt .L_dest_aligned_but_src_not_aligned +.L0: + cmplti r4, 16 + jbt .L_aligned_and_len_less_16bytes + subi sp, 8 + stw r8, (sp, 0) +.L_aligned_and_len_larger_16bytes: + ldw r1, (r3, 0) + ldw r5, (r3, 4) + ldw r8, (r3, 8) + stw r1, (r7, 0) + ldw r1, (r3, 12) + stw r5, (r7, 4) + stw r8, (r7, 8) + stw r1, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L_aligned_and_len_larger_16bytes + ldw r8, (sp, 0) + addi sp, 8 + cmpnei r4, 0 + jbf .L_return + +.L_aligned_and_len_less_16bytes: + cmplti r4, 4 + bt .L_copy_by_byte +.L1: + ldw r1, (r3, 0) + stw r1, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + jbf .L1 + br .L_copy_by_byte + +.L_return: + rts + +.L_copy_by_byte: /* len less than 4 bytes */ + cmpnei r4, 0 + jbf .L_return +.L4: + ldb r1, (r3, 0) + stb r1, (r7, 0) + addi r3, 1 + addi r7, 1 + decne r4 + jbt .L4 + rts + +/* + * If dest is not aligned, just copying some bytes makes the dest align. + * Afther that, we judge whether the src is aligned. + */ +.L_dest_not_aligned: + mov r5, r3 + rsub r5, r5, r7 + abs r5, r5 + cmplt r5, r4 + bt .L_copy_by_byte + mov r5, r7 + sub r5, r3 + cmphs r5, r4 + bf .L_copy_by_byte + mov r5, r6 +.L5: + ldb r1, (r3, 0) /* makes the dest align. */ + stb r1, (r7, 0) + addi r5, 1 + subi r4, 1 + addi r3, 1 + addi r7, 1 + cmpnei r5, 4 + jbt .L5 + cmplti r4, 4 + jbt .L_copy_by_byte + mov r6, r3 /* judge whether the src is aligned. */ + andi r6, 3 + cmpnei r6, 0 + jbf .L0 + +/* Judge the number of misaligned, 1, 2, 3? */ +.L_dest_aligned_but_src_not_aligned: + mov r5, r3 + rsub r5, r5, r7 + abs r5, r5 + cmplt r5, r4 + bt .L_copy_by_byte + bclri r3, 0 + bclri r3, 1 + ldw r1, (r3, 0) + addi r3, 4 + cmpnei r6, 2 + bf .L_dest_aligned_but_src_not_aligned_2bytes + cmpnei r6, 3 + bf .L_dest_aligned_but_src_not_aligned_3bytes + +.L_dest_aligned_but_src_not_aligned_1byte: + mov r5, r7 + sub r5, r3 + cmphs r5, r4 + bf .L_copy_by_byte + cmplti r4, 16 + bf .L11 +.L10: /* If the len is less than 16 bytes */ + GET_FRONT_BITS r1 8 + mov r5, r1 + ldw r6, (r3, 0) + mov r1, r6 + GET_AFTER_BITS r6 24 + or r5, r6 + stw r5, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + bf .L10 + subi r3, 3 + br .L_copy_by_byte +.L11: + subi sp, 16 + stw r8, (sp, 0) + stw r9, (sp, 4) + stw r10, (sp, 8) + stw r11, (sp, 12) +.L12: + ldw r5, (r3, 0) + ldw r11, (r3, 4) + ldw r8, (r3, 8) + ldw r9, (r3, 12) + + GET_FRONT_BITS r1 8 /* little or big endian? */ + mov r10, r5 + GET_AFTER_BITS r5 24 + or r5, r1 + + GET_FRONT_BITS r10 8 + mov r1, r11 + GET_AFTER_BITS r11 24 + or r11, r10 + + GET_FRONT_BITS r1 8 + mov r10, r8 + GET_AFTER_BITS r8 24 + or r8, r1 + + GET_FRONT_BITS r10 8 + mov r1, r9 + GET_AFTER_BITS r9 24 + or r9, r10 + + stw r5, (r7, 0) + stw r11, (r7, 4) + stw r8, (r7, 8) + stw r9, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L12 + ldw r8, (sp, 0) + ldw r9, (sp, 4) + ldw r10, (sp, 8) + ldw r11, (sp, 12) + addi sp , 16 + cmplti r4, 4 + bf .L10 + subi r3, 3 + br .L_copy_by_byte + +.L_dest_aligned_but_src_not_aligned_2bytes: + cmplti r4, 16 + bf .L21 +.L20: + GET_FRONT_BITS r1 16 + mov r5, r1 + ldw r6, (r3, 0) + mov r1, r6 + GET_AFTER_BITS r6 16 + or r5, r6 + stw r5, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + bf .L20 + subi r3, 2 + br .L_copy_by_byte + rts + +.L21: /* n > 16 */ + subi sp, 16 + stw r8, (sp, 0) + stw r9, (sp, 4) + stw r10, (sp, 8) + stw r11, (sp, 12) + +.L22: + ldw r5, (r3, 0) + ldw r11, (r3, 4) + ldw r8, (r3, 8) + ldw r9, (r3, 12) + + GET_FRONT_BITS r1 16 + mov r10, r5 + GET_AFTER_BITS r5 16 + or r5, r1 + + GET_FRONT_BITS r10 16 + mov r1, r11 + GET_AFTER_BITS r11 16 + or r11, r10 + + GET_FRONT_BITS r1 16 + mov r10, r8 + GET_AFTER_BITS r8 16 + or r8, r1 + + GET_FRONT_BITS r10 16 + mov r1, r9 + GET_AFTER_BITS r9 16 + or r9, r10 + + stw r5, (r7, 0) + stw r11, (r7, 4) + stw r8, (r7, 8) + stw r9, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L22 + ldw r8, (sp, 0) + ldw r9, (sp, 4) + ldw r10, (sp, 8) + ldw r11, (sp, 12) + addi sp, 16 + cmplti r4, 4 + bf .L20 + subi r3, 2 + br .L_copy_by_byte + + +.L_dest_aligned_but_src_not_aligned_3bytes: + cmplti r4, 16 + bf .L31 +.L30: + GET_FRONT_BITS r1 24 + mov r5, r1 + ldw r6, (r3, 0) + mov r1, r6 + GET_AFTER_BITS r6 8 + or r5, r6 + stw r5, (r7, 0) + subi r4, 4 + addi r3, 4 + addi r7, 4 + cmplti r4, 4 + bf .L30 + subi r3, 1 + br .L_copy_by_byte +.L31: + subi sp, 16 + stw r8, (sp, 0) + stw r9, (sp, 4) + stw r10, (sp, 8) + stw r11, (sp, 12) +.L32: + ldw r5, (r3, 0) + ldw r11, (r3, 4) + ldw r8, (r3, 8) + ldw r9, (r3, 12) + + GET_FRONT_BITS r1 24 + mov r10, r5 + GET_AFTER_BITS r5 8 + or r5, r1 + + GET_FRONT_BITS r10 24 + mov r1, r11 + GET_AFTER_BITS r11 8 + or r11, r10 + + GET_FRONT_BITS r1 24 + mov r10, r8 + GET_AFTER_BITS r8 8 + or r8, r1 + + GET_FRONT_BITS r10 24 + mov r1, r9 + GET_AFTER_BITS r9 8 + or r9, r10 + + stw r5, (r7, 0) + stw r11, (r7, 4) + stw r8, (r7, 8) + stw r9, (r7, 12) + subi r4, 16 + addi r3, 16 + addi r7, 16 + cmplti r4, 16 + jbf .L32 + ldw r8, (sp, 0) + ldw r9, (sp, 4) + ldw r10, (sp, 8) + ldw r11, (sp, 12) + addi sp, 16 + cmplti r4, 4 + bf .L30 + subi r3, 1 + br .L_copy_by_byte |