diff options
author | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@ppc970.osdl.org> | 2005-04-16 15:20:36 -0700 |
commit | 1da177e4c3f41524e886b7f1b8a0c1fc7321cac2 (patch) | |
tree | 0bba044c4ce775e45a88a51686b5d9f90697ea9d /arch/alpha/lib/strlen.S | |
download | linux-1da177e4c3f41524e886b7f1b8a0c1fc7321cac2.tar.bz2 |
Linux-2.6.12-rc2v2.6.12-rc2
Initial git repository build. I'm not bothering with the full history,
even though we have it. We can create a separate "historical" git
archive of that later if we want to, and in the meantime it's about
3.2GB when imported into git - space that would just make the early
git days unnecessarily complicated, when we don't have a lot of good
infrastructure for it.
Let it rip!
Diffstat (limited to 'arch/alpha/lib/strlen.S')
-rw-r--r-- | arch/alpha/lib/strlen.S | 57 |
1 files changed, 57 insertions, 0 deletions
diff --git a/arch/alpha/lib/strlen.S b/arch/alpha/lib/strlen.S new file mode 100644 index 000000000000..fe63353de152 --- /dev/null +++ b/arch/alpha/lib/strlen.S @@ -0,0 +1,57 @@ +/* + * strlen.S (c) 1995 David Mosberger (davidm@cs.arizona.edu) + * + * Finds length of a 0-terminated string. Optimized for the + * Alpha architecture: + * + * - memory accessed as aligned quadwords only + * - uses bcmpge to compare 8 bytes in parallel + * - does binary search to find 0 byte in last + * quadword (HAKMEM needed 12 instructions to + * do this instead of the 9 instructions that + * binary search needs). + */ + + .set noreorder + .set noat + + .align 3 + + .globl strlen + .ent strlen + +strlen: + ldq_u $1, 0($16) # load first quadword ($16 may be misaligned) + lda $2, -1($31) + insqh $2, $16, $2 + andnot $16, 7, $0 + or $2, $1, $1 + cmpbge $31, $1, $2 # $2 <- bitmask: bit i == 1 <==> i-th byte == 0 + bne $2, found + +loop: ldq $1, 8($0) + addq $0, 8, $0 # addr += 8 + nop # helps dual issue last two insns + cmpbge $31, $1, $2 + beq $2, loop + +found: blbs $2, done # make aligned case fast + negq $2, $3 + and $2, $3, $2 + + and $2, 0x0f, $1 + addq $0, 4, $3 + cmoveq $1, $3, $0 + + and $2, 0x33, $1 + addq $0, 2, $3 + cmoveq $1, $3, $0 + + and $2, 0x55, $1 + addq $0, 1, $3 + cmoveq $1, $3, $0 + +done: subq $0, $16, $0 + ret $31, ($26) + + .end strlen |