summaryrefslogtreecommitdiffstats
path: root/arch/csky/abiv1/memcpy.S
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-29 08:25:00 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-29 08:25:00 -0700
commitac435075892e3e651c667b4a9f2267cf3ef1d5a2 (patch)
treeff45694dd545392cc6553af1219b2bd840974307 /arch/csky/abiv1/memcpy.S
parent9f51ae62c84a23ade0ba86457d30a30c9db0c50f (diff)
parent2347e7e1aea410865e3c3f92014b07ff7d4c5b02 (diff)
downloadlinux-ac435075892e3e651c667b4a9f2267cf3ef1d5a2.tar.bz2
Merge tag 'csky-for-linus-4.20' of https://github.com/c-sky/csky-linux
Pull C-SKY architecture port from Guo Ren: "This contains the Linux port for C-SKY(csky) based on linux-4.19 Release, which has been through 10 rounds of review on mailing list. More information: http://en.c-sky.com The development repo: https://github.com/c-sky/csky-linux ABI Documentation: https://github.com/c-sky/csky-doc Here is the pre-built cross compiler for fast test from our CI: https://gitlab.com/c-sky/buildroot/-/jobs/101608095/artifacts/file/output/images/csky_toolchain_qemu_csky_ck807f_4.18_glibc_defconfig_482b221e52908be1c9b2ccb444255e1562bb7025.tar.xz We use buildroot as our CI-test enviornment. "LTP, Lmbench ..." will be tested for every commit. See here for more details: https://gitlab.com/c-sky/buildroot/pipelines We'll continouslly improve csky subsystem in future" Arnd acks, and adds the following notes: "I did a thorough review of the ABI, which as usual mainly consists of spotting any files that don't use the asm-generic ABI itself, and having it changed to it matches exactly what we do on other new architectures. I also looked at every other patch and commented on maybe half of them where I saw something that did not quite seem right. Others have reviewed specific patches in greater depth. I'm sure that one could fine more of the minor details, but as long as they are not ABI relevant, they can be fixed later. The only patch that is part of the ABI and that nobody reviewed is the signal handling. This is one of the areas I never worked on in much detail. I did not see anything wrong with it, but I also don't know what the problems with the other architectures are here, and we seem to be hitting issues occasionally, and we never managed to generalize this enough for new architectures to have a trivial implementation. I was originally hoping that we could have the 64-bit time_t interfaces ready in time to completely drop the 32-bit ones, but that did not happen. We might still remove them in the next merge window depending on whether the libc upstream people prefer to keep them or not. One more general comment: I think this may well be the last new CPU architecture we ever add to the kernel. Both nds32 and c-sky are made by companies that also work on risc-v, and generally speaking risc-v seems to be killing off any of the minor licensable instruction set projects, just like ARM has mostly killed off the custom vendor-specific instruction sets already. If we add another architecture in the future, it may instead be something like the LLVM bitcode or WebAssembly, who knows?" To which Geert Uytterhoeven pipes in about another architecture still in the pipeline: Kalray MPPA. * tag 'csky-for-linus-4.20' of https://github.com/c-sky/csky-linux: (24 commits) dt-bindings: interrupt-controller: C-SKY APB intc irqchip: add C-SKY APB bus interrupt controller dt-bindings: interrupt-controller: C-SKY SMP intc irqchip: add C-SKY SMP interrupt controller MAINTAINERS: Add csky dt-bindings: Add vendor prefix for csky dt-bindings: csky CPU Bindings csky: Misc headers csky: SMP support csky: Debug and Ptrace GDB csky: User access csky: Library functions csky: ELF and module probe csky: Atomic operations csky: IRQ handling csky: VDSO and rt_sigreturn csky: Process management and Signal csky: MMU and page table management csky: Cache and TLB routines csky: System Call ...
Diffstat (limited to 'arch/csky/abiv1/memcpy.S')
-rw-r--r--arch/csky/abiv1/memcpy.S347
1 files changed, 347 insertions, 0 deletions
diff --git a/arch/csky/abiv1/memcpy.S b/arch/csky/abiv1/memcpy.S
new file mode 100644
index 000000000000..5078eb5169fa
--- /dev/null
+++ b/arch/csky/abiv1/memcpy.S
@@ -0,0 +1,347 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+// Copyright (C) 2018 Hangzhou C-SKY Microsystems co.,ltd.
+
+#include <linux/linkage.h>
+
+.macro GET_FRONT_BITS rx y
+#ifdef __cskyLE__
+ lsri \rx, \y
+#else
+ lsli \rx, \y
+#endif
+.endm
+
+.macro GET_AFTER_BITS rx y
+#ifdef __cskyLE__
+ lsli \rx, \y
+#else
+ lsri \rx, \y
+#endif
+.endm
+
+/* void *memcpy(void *dest, const void *src, size_t n); */
+ENTRY(memcpy)
+ mov r7, r2
+ cmplti r4, 4
+ bt .L_copy_by_byte
+ mov r6, r2
+ andi r6, 3
+ cmpnei r6, 0
+ jbt .L_dest_not_aligned
+ mov r6, r3
+ andi r6, 3
+ cmpnei r6, 0
+ jbt .L_dest_aligned_but_src_not_aligned
+.L0:
+ cmplti r4, 16
+ jbt .L_aligned_and_len_less_16bytes
+ subi sp, 8
+ stw r8, (sp, 0)
+.L_aligned_and_len_larger_16bytes:
+ ldw r1, (r3, 0)
+ ldw r5, (r3, 4)
+ ldw r8, (r3, 8)
+ stw r1, (r7, 0)
+ ldw r1, (r3, 12)
+ stw r5, (r7, 4)
+ stw r8, (r7, 8)
+ stw r1, (r7, 12)
+ subi r4, 16
+ addi r3, 16
+ addi r7, 16
+ cmplti r4, 16
+ jbf .L_aligned_and_len_larger_16bytes
+ ldw r8, (sp, 0)
+ addi sp, 8
+ cmpnei r4, 0
+ jbf .L_return
+
+.L_aligned_and_len_less_16bytes:
+ cmplti r4, 4
+ bt .L_copy_by_byte
+.L1:
+ ldw r1, (r3, 0)
+ stw r1, (r7, 0)
+ subi r4, 4
+ addi r3, 4
+ addi r7, 4
+ cmplti r4, 4
+ jbf .L1
+ br .L_copy_by_byte
+
+.L_return:
+ rts
+
+.L_copy_by_byte: /* len less than 4 bytes */
+ cmpnei r4, 0
+ jbf .L_return
+.L4:
+ ldb r1, (r3, 0)
+ stb r1, (r7, 0)
+ addi r3, 1
+ addi r7, 1
+ decne r4
+ jbt .L4
+ rts
+
+/*
+ * If dest is not aligned, just copying some bytes makes the dest align.
+ * Afther that, we judge whether the src is aligned.
+ */
+.L_dest_not_aligned:
+ mov r5, r3
+ rsub r5, r5, r7
+ abs r5, r5
+ cmplt r5, r4
+ bt .L_copy_by_byte
+ mov r5, r7
+ sub r5, r3
+ cmphs r5, r4
+ bf .L_copy_by_byte
+ mov r5, r6
+.L5:
+ ldb r1, (r3, 0) /* makes the dest align. */
+ stb r1, (r7, 0)
+ addi r5, 1
+ subi r4, 1
+ addi r3, 1
+ addi r7, 1
+ cmpnei r5, 4
+ jbt .L5
+ cmplti r4, 4
+ jbt .L_copy_by_byte
+ mov r6, r3 /* judge whether the src is aligned. */
+ andi r6, 3
+ cmpnei r6, 0
+ jbf .L0
+
+/* Judge the number of misaligned, 1, 2, 3? */
+.L_dest_aligned_but_src_not_aligned:
+ mov r5, r3
+ rsub r5, r5, r7
+ abs r5, r5
+ cmplt r5, r4
+ bt .L_copy_by_byte
+ bclri r3, 0
+ bclri r3, 1
+ ldw r1, (r3, 0)
+ addi r3, 4
+ cmpnei r6, 2
+ bf .L_dest_aligned_but_src_not_aligned_2bytes
+ cmpnei r6, 3
+ bf .L_dest_aligned_but_src_not_aligned_3bytes
+
+.L_dest_aligned_but_src_not_aligned_1byte:
+ mov r5, r7
+ sub r5, r3
+ cmphs r5, r4
+ bf .L_copy_by_byte
+ cmplti r4, 16
+ bf .L11
+.L10: /* If the len is less than 16 bytes */
+ GET_FRONT_BITS r1 8
+ mov r5, r1
+ ldw r6, (r3, 0)
+ mov r1, r6
+ GET_AFTER_BITS r6 24
+ or r5, r6
+ stw r5, (r7, 0)
+ subi r4, 4
+ addi r3, 4
+ addi r7, 4
+ cmplti r4, 4
+ bf .L10
+ subi r3, 3
+ br .L_copy_by_byte
+.L11:
+ subi sp, 16
+ stw r8, (sp, 0)
+ stw r9, (sp, 4)
+ stw r10, (sp, 8)
+ stw r11, (sp, 12)
+.L12:
+ ldw r5, (r3, 0)
+ ldw r11, (r3, 4)
+ ldw r8, (r3, 8)
+ ldw r9, (r3, 12)
+
+ GET_FRONT_BITS r1 8 /* little or big endian? */
+ mov r10, r5
+ GET_AFTER_BITS r5 24
+ or r5, r1
+
+ GET_FRONT_BITS r10 8
+ mov r1, r11
+ GET_AFTER_BITS r11 24
+ or r11, r10
+
+ GET_FRONT_BITS r1 8
+ mov r10, r8
+ GET_AFTER_BITS r8 24
+ or r8, r1
+
+ GET_FRONT_BITS r10 8
+ mov r1, r9
+ GET_AFTER_BITS r9 24
+ or r9, r10
+
+ stw r5, (r7, 0)
+ stw r11, (r7, 4)
+ stw r8, (r7, 8)
+ stw r9, (r7, 12)
+ subi r4, 16
+ addi r3, 16
+ addi r7, 16
+ cmplti r4, 16
+ jbf .L12
+ ldw r8, (sp, 0)
+ ldw r9, (sp, 4)
+ ldw r10, (sp, 8)
+ ldw r11, (sp, 12)
+ addi sp , 16
+ cmplti r4, 4
+ bf .L10
+ subi r3, 3
+ br .L_copy_by_byte
+
+.L_dest_aligned_but_src_not_aligned_2bytes:
+ cmplti r4, 16
+ bf .L21
+.L20:
+ GET_FRONT_BITS r1 16
+ mov r5, r1
+ ldw r6, (r3, 0)
+ mov r1, r6
+ GET_AFTER_BITS r6 16
+ or r5, r6
+ stw r5, (r7, 0)
+ subi r4, 4
+ addi r3, 4
+ addi r7, 4
+ cmplti r4, 4
+ bf .L20
+ subi r3, 2
+ br .L_copy_by_byte
+ rts
+
+.L21: /* n > 16 */
+ subi sp, 16
+ stw r8, (sp, 0)
+ stw r9, (sp, 4)
+ stw r10, (sp, 8)
+ stw r11, (sp, 12)
+
+.L22:
+ ldw r5, (r3, 0)
+ ldw r11, (r3, 4)
+ ldw r8, (r3, 8)
+ ldw r9, (r3, 12)
+
+ GET_FRONT_BITS r1 16
+ mov r10, r5
+ GET_AFTER_BITS r5 16
+ or r5, r1
+
+ GET_FRONT_BITS r10 16
+ mov r1, r11
+ GET_AFTER_BITS r11 16
+ or r11, r10
+
+ GET_FRONT_BITS r1 16
+ mov r10, r8
+ GET_AFTER_BITS r8 16
+ or r8, r1
+
+ GET_FRONT_BITS r10 16
+ mov r1, r9
+ GET_AFTER_BITS r9 16
+ or r9, r10
+
+ stw r5, (r7, 0)
+ stw r11, (r7, 4)
+ stw r8, (r7, 8)
+ stw r9, (r7, 12)
+ subi r4, 16
+ addi r3, 16
+ addi r7, 16
+ cmplti r4, 16
+ jbf .L22
+ ldw r8, (sp, 0)
+ ldw r9, (sp, 4)
+ ldw r10, (sp, 8)
+ ldw r11, (sp, 12)
+ addi sp, 16
+ cmplti r4, 4
+ bf .L20
+ subi r3, 2
+ br .L_copy_by_byte
+
+
+.L_dest_aligned_but_src_not_aligned_3bytes:
+ cmplti r4, 16
+ bf .L31
+.L30:
+ GET_FRONT_BITS r1 24
+ mov r5, r1
+ ldw r6, (r3, 0)
+ mov r1, r6
+ GET_AFTER_BITS r6 8
+ or r5, r6
+ stw r5, (r7, 0)
+ subi r4, 4
+ addi r3, 4
+ addi r7, 4
+ cmplti r4, 4
+ bf .L30
+ subi r3, 1
+ br .L_copy_by_byte
+.L31:
+ subi sp, 16
+ stw r8, (sp, 0)
+ stw r9, (sp, 4)
+ stw r10, (sp, 8)
+ stw r11, (sp, 12)
+.L32:
+ ldw r5, (r3, 0)
+ ldw r11, (r3, 4)
+ ldw r8, (r3, 8)
+ ldw r9, (r3, 12)
+
+ GET_FRONT_BITS r1 24
+ mov r10, r5
+ GET_AFTER_BITS r5 8
+ or r5, r1
+
+ GET_FRONT_BITS r10 24
+ mov r1, r11
+ GET_AFTER_BITS r11 8
+ or r11, r10
+
+ GET_FRONT_BITS r1 24
+ mov r10, r8
+ GET_AFTER_BITS r8 8
+ or r8, r1
+
+ GET_FRONT_BITS r10 24
+ mov r1, r9
+ GET_AFTER_BITS r9 8
+ or r9, r10
+
+ stw r5, (r7, 0)
+ stw r11, (r7, 4)
+ stw r8, (r7, 8)
+ stw r9, (r7, 12)
+ subi r4, 16
+ addi r3, 16
+ addi r7, 16
+ cmplti r4, 16
+ jbf .L32
+ ldw r8, (sp, 0)
+ ldw r9, (sp, 4)
+ ldw r10, (sp, 8)
+ ldw r11, (sp, 12)
+ addi sp, 16
+ cmplti r4, 4
+ bf .L30
+ subi r3, 1
+ br .L_copy_by_byte