summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-07-21 12:01:28 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2021-08-30 07:50:56 -0700
commit291d47ccad191322524d77e0769dadcc8a811630 (patch)
tree82d6a3be6e8672ea577cec01a26bcdcab6239b75
parent7d2a07b769330c34b4deabeed939325c77a7ec2f (diff)
downloadlinux-291d47ccad191322524d77e0769dadcc8a811630.tar.bz2
string: improve default out-of-line memcmp() implementation
This just does the "if the architecture does efficient unaligned handling, start the memcmp using 'unsigned long' accesses", since Nikolay Borisov found a load that cares. This is basically the minimal patch, and limited to architectures that are known to not have slow unaligned handling. We've had the stupid byte-at-a-time version forever, and nobody has ever even noticed before, so let's keep the fix minimal. A potential further improvement would be to align one of the sources in order to at least minimize unaligned cases, but the only real case of bigger memcmp() users seems to be the FIDEDUPERANGE ioctl(). As David Sterba says, the dedupe ioctl is typically called on ranges spanning many pages so the common case will all be page-aligned anyway. All the relevant architectures select HAVE_EFFICIENT_UNALIGNED_ACCESS, so I'm not going to worry about the combination of a very rare use-case and a rare architecture until somebody actually hits it. Particularly since Nikolay also tested the more complex patch with extra alignment handling code, and it only added overhead. Link: https://lore.kernel.org/lkml/20210721135926.602840-1-nborisov@suse.com/ Reported-by: Nikolay Borisov <nborisov@suse.com> Cc: David Sterba <dsterba@suse.cz> Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
-rw-r--r--lib/string.c16
1 files changed, 16 insertions, 0 deletions
diff --git a/lib/string.c b/lib/string.c
index 77bd0b1d3296..b2de45a581f4 100644
--- a/lib/string.c
+++ b/lib/string.c
@@ -29,6 +29,7 @@
#include <linux/errno.h>
#include <linux/slab.h>
+#include <asm/unaligned.h>
#include <asm/byteorder.h>
#include <asm/word-at-a-time.h>
#include <asm/page.h>
@@ -935,6 +936,21 @@ __visible int memcmp(const void *cs, const void *ct, size_t count)
const unsigned char *su1, *su2;
int res = 0;
+#ifdef CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS
+ if (count >= sizeof(unsigned long)) {
+ const unsigned long *u1 = cs;
+ const unsigned long *u2 = ct;
+ do {
+ if (get_unaligned(u1) != get_unaligned(u2))
+ break;
+ u1++;
+ u2++;
+ count -= sizeof(unsigned long);
+ } while (count >= sizeof(unsigned long));
+ cs = u1;
+ ct = u2;
+ }
+#endif
for (su1 = cs, su2 = ct; 0 < count; ++su1, ++su2, count--)
if ((res = *su1 - *su2) != 0)
break;