From fcfd2fbf22d2587196890103d41e3d554c47da0e Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@sciencehorizons.net>
Date: Fri, 20 May 2016 08:41:37 -0400
Subject: fs/namei.c: Add hashlen_string() function

We'd like to make more use of the highly-optimized dcache hash functions
throughout the kernel, rather than have every subsystem create its own,
and a function that hashes basic null-terminated strings is required
for that.

(The name is to emphasize that it returns both hash and length.)

It's actually useful in the dcache itself, specifically d_alloc_name().
Other uses in the next patch.

full_name_hash() is also tweaked to make it more generally useful:
1) Take a "char *" rather than "unsigned char *" argument, to
   be consistent with hash_name().
2) Handle zero-length inputs.  If we want more callers, we don't want
   to make them worry about corner cases.

Signed-off-by: George Spelvin <linux@sciencehorizons.net>
---
 fs/dcache.c |  3 +--
 fs/namei.c  | 51 ++++++++++++++++++++++++++++++++++++++++++++++-----
 2 files changed, 47 insertions(+), 7 deletions(-)

(limited to 'fs')

diff --git a/fs/dcache.c b/fs/dcache.c
index d5ecc6e477da..19b751806789 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1653,8 +1653,7 @@ struct dentry *d_alloc_name(struct dentry *parent, const char *name)
 	struct qstr q;
 
 	q.name = name;
-	q.len = strlen(name);
-	q.hash = full_name_hash(q.name, q.len);
+	q.hash_len = hashlen_string(name);
 	return d_alloc(parent, &q);
 }
 EXPORT_SYMBOL(d_alloc_name);
diff --git a/fs/namei.c b/fs/namei.c
index 42f8ca038254..dd98d43a54f8 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1822,19 +1822,20 @@ static inline unsigned long mix_hash(unsigned long hash)
 
 #endif
 
-unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+/* Return the hash of a string of known length */
+unsigned int full_name_hash(const char *name, unsigned int len)
 {
 	unsigned long a, hash = 0;
 
 	for (;;) {
+		if (!len)
+			goto done;
 		a = load_unaligned_zeropad(name);
 		if (len < sizeof(unsigned long))
 			break;
 		hash = mix_hash(hash + a);
 		name += sizeof(unsigned long);
 		len -= sizeof(unsigned long);
-		if (!len)
-			goto done;
 	}
 	hash += a & bytemask_from_count(len);
 done:
@@ -1842,6 +1843,29 @@ done:
 }
 EXPORT_SYMBOL(full_name_hash);
 
+/* Return the "hash_len" (hash and length) of a null-terminated string */
+u64 hashlen_string(const char *name)
+{
+	unsigned long a, adata, mask, hash, len;
+	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
+
+	hash = a = 0;
+	len = -sizeof(unsigned long);
+	do {
+		hash = mix_hash(hash + a);
+		len += sizeof(unsigned long);
+		a = load_unaligned_zeropad(name+len);
+	} while (!has_zero(a, &adata, &constants));
+
+	adata = prep_zero_mask(a, adata, &constants);
+	mask = create_zero_mask(adata);
+	hash += a & zero_bytemask(mask);
+	len += find_zero(mask);
+
+	return hashlen_create(fold_hash(hash), len);
+}
+EXPORT_SYMBOL(hashlen_string);
+
 /*
  * Calculate the length and hash of the path component, and
  * return the "hash_len" as the result.
@@ -1872,15 +1896,32 @@ static inline u64 hash_name(const char *name)
 
 #else
 
-unsigned int full_name_hash(const unsigned char *name, unsigned int len)
+/* Return the hash of a string of known length */
+unsigned int full_name_hash(const char *name, unsigned int len)
 {
 	unsigned long hash = init_name_hash();
 	while (len--)
-		hash = partial_name_hash(*name++, hash);
+		hash = partial_name_hash((unsigned char)*name++, hash);
 	return end_name_hash(hash);
 }
 EXPORT_SYMBOL(full_name_hash);
 
+/* Return the "hash_len" (hash and length) of a null-terminated string */
+u64 hash_string(const char *name)
+{
+	unsigned long hash = init_name_hash();
+	unsigned long len = 0, c;
+
+	c = (unsigned char)*name;
+	do {
+		len++;
+		hash = partial_name_hash(c, hash);
+		c = (unsigned char)name[len];
+	} while (c);
+	return hashlen_create(end_name_hash(hash), len);
+}
+EXPORT_SYMBOL(hash_string);
+
 /*
  * We know there's a real path component here of at least
  * one character.
-- 
cgit v1.2.3


From 2a18da7a9c7886f1c7307f8d3f23f24318583f03 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@sciencehorizons.net>
Date: Mon, 23 May 2016 07:43:58 -0400
Subject: fs/namei.c: Improve dcache hash function

Patch 0fed3ac866 improved the hash mixing, but the function is slower
than necessary; there's a 7-instruction dependency chain (10 on x86)
each loop iteration.

Word-at-a-time access is a very tight loop (which is good, because
link_path_walk() is one of the hottest code paths in the entire kernel),
and the hash mixing function must not have a longer latency to avoid
slowing it down.

There do not appear to be any published fast hash functions that:
1) Operate on the input a word at a time, and
2) Don't need to know the length of the input beforehand, and
3) Have a single iterated mixing function, not needing conditional
   branches or unrolling to distinguish different loop iterations.

One of the algorithms which comes closest is Yann Collet's xxHash, but
that's two dependent multiplies per word, which is too much.

The key insights in this design are:

1) Barring expensive ops like multiplies, to diffuse one input bit
   across 64 bits of hash state takes at least log2(64) = 6 sequentially
   dependent instructions.  That is more cycles than we'd like.
2) An operation like "hash ^= hash << 13" requires a second temporary
   register anyway, and on a 2-operand machine like x86, it's three
   instructions.
3) A better use of a second register is to hold a two-word hash state.
   With careful design, no temporaries are needed at all, so it doesn't
   increase register pressure.  And this gets rid of register copying
   on 2-operand machines, so the code is smaller and faster.
4) Using two words of state weakens the requirement for one-round mixing;
   we now have two rounds of mixing before cancellation is possible.
5) A two-word hash state also allows operations on both halves to be
   done in parallel, so on a superscalar processor we get more mixing
   in fewer cycles.

I ended up using a mixing function inspired by the ChaCha and Speck
round functions.  It is 6 simple instructions and 3 cycles per iteration
(assuming multiply by 9 can be done by an "lea" instruction):

		x ^= *input++;
	y ^= x;	x = ROL(x, K1);
	x += y;	y = ROL(y, K2);
	y *= 9;

Not only is this reversible, two consecutive rounds are reversible:
if you are given the initial and final states, but not the intermediate
state, it is possible to compute both input words.  This means that at
least 3 words of input are required to create a collision.

(It also has the property, used by hash_name() to avoid a branch, that
it hashes all-zero to all-zero.)

The rotate constants K1 and K2 were found by experiment.  The search took
a sample of random initial states (I used 1023) and considered the effect
of flipping each of the 64 input bits on each of the 128 output bits two
rounds later.  Each of the 8192 pairs can be considered a biased coin, and
adding up the Shannon entropy of all of them produces a score.

The best-scoring shifts also did well in other tests (flipping bits in y,
trying 3 or 4 rounds of mixing, flipping all 64*63/2 pairs of input bits),
so the choice was made with the additional constraint that the sum of the
shifts is odd and not too close to the word size.

The final state is then folded into a 32-bit hash value by a less carefully
optimized multiply-based scheme.  This also has to be fast, as pathname
components tend to be short (the most common case is one iteration!), but
there's some room for latency, as there is a fair bit of intervening logic
before the hash value is used for anything.

(Performance verified with "bonnie++ -s 0 -n 1536:-2" on tmpfs.  I need
a better benchmark; the numbers seem to show a slight dip in performance
between 4.6.0 and this patch, but they're too noisy to quote.)

Special thanks to Bruce fields for diligent testing which uncovered a
nasty fencepost error in an earlier version of this patch.

[checkpatch.pl formatting complaints noted and respectfully disagreed with.]

Signed-off-by: George Spelvin <linux@sciencehorizons.net>
Tested-by: J. Bruce Fields <bfields@redhat.com>
---
 fs/namei.c | 121 +++++++++++++++++++++++++++++++++++++++++--------------------
 1 file changed, 81 insertions(+), 40 deletions(-)

(limited to 'fs')

diff --git a/fs/namei.c b/fs/namei.c
index dd98d43a54f8..a49cbd7efcaa 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -35,6 +35,7 @@
 #include <linux/fs_struct.h>
 #include <linux/posix_acl.h>
 #include <linux/hash.h>
+#include <linux/bitops.h>
 #include <asm/uaccess.h>
 
 #include "internal.h"
@@ -1788,44 +1789,89 @@ static int walk_component(struct nameidata *nd, int flags)
 #include <asm/word-at-a-time.h>
 
 #ifdef CONFIG_64BIT
-
-static inline unsigned int fold_hash(unsigned long hash)
-{
-	return hash_64(hash, 32);
-}
+/*
+ * Register pressure in the mixing function is an issue, particularly
+ * on 32-bit x86, but almost any function requires one state value and
+ * one temporary.  Instead, use a function designed for two state values
+ * and no temporaries.
+ *
+ * This function cannot create a collision in only two iterations, so
+ * we have two iterations to achieve avalanche.  In those two iterations,
+ * we have six layers of mixing, which is enough to spread one bit's
+ * influence out to 2^6 = 64 state bits.
+ *
+ * Rotate constants are scored by considering either 64 one-bit input
+ * deltas or 64*63/2 = 2016 two-bit input deltas, and finding the
+ * probability of that delta causing a change to each of the 128 output
+ * bits, using a sample of random initial states.
+ *
+ * The Shannon entropy of the computed probabilities is then summed
+ * to produce a score.  Ideally, any input change has a 50% chance of
+ * toggling any given output bit.
+ *
+ * Mixing scores (in bits) for (12,45):
+ * Input delta: 1-bit      2-bit
+ * 1 round:     713.3    42542.6
+ * 2 rounds:   2753.7   140389.8
+ * 3 rounds:   5954.1   233458.2
+ * 4 rounds:   7862.6   256672.2
+ * Perfect:    8192     258048
+ *            (64*128) (64*63/2 * 128)
+ */
+#define HASH_MIX(x, y, a)	\
+	(	x ^= (a),	\
+	y ^= x,	x = rol64(x,12),\
+	x += y,	y = rol64(y,45),\
+	y *= 9			)
 
 /*
- * This is George Marsaglia's XORSHIFT generator.
- * It implements a maximum-period LFSR in only a few
- * instructions.  It also has the property (required
- * by hash_name()) that mix_hash(0) = 0.
+ * Fold two longs into one 32-bit hash value.  This must be fast, but
+ * latency isn't quite as critical, as there is a fair bit of additional
+ * work done before the hash value is used.
  */
-static inline unsigned long mix_hash(unsigned long hash)
+static inline unsigned int fold_hash(unsigned long x, unsigned long y)
 {
-	hash ^= hash << 13;
-	hash ^= hash >> 7;
-	hash ^= hash << 17;
-	return hash;
+	y ^= x * GOLDEN_RATIO_64;
+	y *= GOLDEN_RATIO_64;
+	return y >> 32;
 }
 
 #else	/* 32-bit case */
 
-#define fold_hash(x) (x)
+/*
+ * Mixing scores (in bits) for (7,20):
+ * Input delta: 1-bit      2-bit
+ * 1 round:     330.3     9201.6
+ * 2 rounds:   1246.4    25475.4
+ * 3 rounds:   1907.1    31295.1
+ * 4 rounds:   2042.3    31718.6
+ * Perfect:    2048      31744
+ *            (32*64)   (32*31/2 * 64)
+ */
+#define HASH_MIX(x, y, a)	\
+	(	x ^= (a),	\
+	y ^= x,	x = rol32(x, 7),\
+	x += y,	y = rol32(y,20),\
+	y *= 9			)
 
-static inline unsigned long mix_hash(unsigned long hash)
+static inline unsigned int fold_hash(unsigned long x, unsigned long y)
 {
-	hash ^= hash << 13;
-	hash ^= hash >> 17;
-	hash ^= hash << 5;
-	return hash;
+	/* Use arch-optimized multiply if one exists */
+	return __hash_32(y ^ __hash_32(x));
 }
 
 #endif
 
-/* Return the hash of a string of known length */
+/*
+ * Return the hash of a string of known length.  This is carfully
+ * designed to match hash_name(), which is the more critical function.
+ * In particular, we must end by hashing a final word containing 0..7
+ * payload bytes, to match the way that hash_name() iterates until it
+ * finds the delimiter after the name.
+ */
 unsigned int full_name_hash(const char *name, unsigned int len)
 {
-	unsigned long a, hash = 0;
+	unsigned long a, x = 0, y = 0;
 
 	for (;;) {
 		if (!len)
@@ -1833,36 +1879,34 @@ unsigned int full_name_hash(const char *name, unsigned int len)
 		a = load_unaligned_zeropad(name);
 		if (len < sizeof(unsigned long))
 			break;
-		hash = mix_hash(hash + a);
+		HASH_MIX(x, y, a);
 		name += sizeof(unsigned long);
 		len -= sizeof(unsigned long);
 	}
-	hash += a & bytemask_from_count(len);
+	x ^= a & bytemask_from_count(len);
 done:
-	return fold_hash(hash);
+	return fold_hash(x, y);
 }
 EXPORT_SYMBOL(full_name_hash);
 
 /* Return the "hash_len" (hash and length) of a null-terminated string */
 u64 hashlen_string(const char *name)
 {
-	unsigned long a, adata, mask, hash, len;
+	unsigned long a = 0, x = 0, y = 0, adata, mask, len;
 	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
 
-	hash = a = 0;
 	len = -sizeof(unsigned long);
 	do {
-		hash = mix_hash(hash + a);
+		HASH_MIX(x, y, a);
 		len += sizeof(unsigned long);
 		a = load_unaligned_zeropad(name+len);
 	} while (!has_zero(a, &adata, &constants));
 
 	adata = prep_zero_mask(a, adata, &constants);
 	mask = create_zero_mask(adata);
-	hash += a & zero_bytemask(mask);
-	len += find_zero(mask);
+	x ^= a & zero_bytemask(mask);
 
-	return hashlen_create(fold_hash(hash), len);
+	return hashlen_create(fold_hash(x, y), len + find_zero(mask));
 }
 EXPORT_SYMBOL(hashlen_string);
 
@@ -1872,13 +1916,12 @@ EXPORT_SYMBOL(hashlen_string);
  */
 static inline u64 hash_name(const char *name)
 {
-	unsigned long a, b, adata, bdata, mask, hash, len;
+	unsigned long a = 0, b, x = 0, y = 0, adata, bdata, mask, len;
 	const struct word_at_a_time constants = WORD_AT_A_TIME_CONSTANTS;
 
-	hash = a = 0;
 	len = -sizeof(unsigned long);
 	do {
-		hash = mix_hash(hash + a);
+		HASH_MIX(x, y, a);
 		len += sizeof(unsigned long);
 		a = load_unaligned_zeropad(name+len);
 		b = a ^ REPEAT_BYTE('/');
@@ -1886,15 +1929,13 @@ static inline u64 hash_name(const char *name)
 
 	adata = prep_zero_mask(a, adata, &constants);
 	bdata = prep_zero_mask(b, bdata, &constants);
-
 	mask = create_zero_mask(adata | bdata);
+	x ^= a & zero_bytemask(mask);
 
-	hash += a & zero_bytemask(mask);
-	len += find_zero(mask);
-	return hashlen_create(fold_hash(hash), len);
+	return hashlen_create(fold_hash(x, y), len + find_zero(mask));
 }
 
-#else
+#else	/* !CONFIG_DCACHE_WORD_ACCESS: Slow, byte-at-a-time version */
 
 /* Return the hash of a string of known length */
 unsigned int full_name_hash(const char *name, unsigned int len)
@@ -1965,7 +2006,7 @@ static int link_path_walk(const char *name, struct nameidata *nd)
 		int type;
 
 		err = may_lookup(nd);
- 		if (err)
+		if (err)
 			return err;
 
 		hash_len = hash_name(name);
-- 
cgit v1.2.3


From 468a9428521e7d00fb21250af363eb94dc1d6861 Mon Sep 17 00:00:00 2001
From: George Spelvin <linux@sciencehorizons.net>
Date: Thu, 26 May 2016 22:11:51 -0400
Subject: <linux/hash.h>: Add support for architecture-specific functions

This is just the infrastructure; there are no users yet.

This is modelled on CONFIG_ARCH_RANDOM; a CONFIG_ symbol declares
the existence of <asm/hash.h>.

That file may define its own versions of various functions, and define
HAVE_* symbols (no CONFIG_ prefix!) to suppress the generic ones.

Included is a self-test (in lib/test_hash.c) that verifies the basics.
It is NOT in general required that the arch-specific functions compute
the same thing as the generic, but if a HAVE_* symbol is defined with
the value 1, then equality is tested.

Signed-off-by: George Spelvin <linux@sciencehorizons.net>
Cc: Geert Uytterhoeven <geert@linux-m68k.org>
Cc: Greg Ungerer <gerg@linux-m68k.org>
Cc: Andreas Schwab <schwab@linux-m68k.org>
Cc: Philippe De Muyter <phdm@macq.eu>
Cc: linux-m68k@lists.linux-m68k.org
Cc: Alistair Francis <alistai@xilinx.com>
Cc: Michal Simek <michal.simek@xilinx.com>
Cc: Yoshinori Sato <ysato@users.sourceforge.jp>
Cc: uclinux-h8-devel@lists.sourceforge.jp
---
 arch/Kconfig         |   8 ++
 fs/namei.c           |   6 +-
 include/linux/hash.h |  27 +++++-
 lib/Kconfig.debug    |  11 +++
 lib/Makefile         |   1 +
 lib/test_hash.c      | 250 +++++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 299 insertions(+), 4 deletions(-)
 create mode 100644 lib/test_hash.c

(limited to 'fs')

diff --git a/arch/Kconfig b/arch/Kconfig
index 81869a5e7e17..96406e4db995 100644
--- a/arch/Kconfig
+++ b/arch/Kconfig
@@ -589,6 +589,14 @@ config HAVE_STACK_VALIDATION
 	  Architecture supports the 'objtool check' host tool command, which
 	  performs compile-time stack metadata validation.
 
+config HAVE_ARCH_HASH
+	bool
+	default n
+	help
+	  If this is set, the architecture provides an <asm/hash.h>
+	  file which provides platform-specific implementations of some
+	  functions in <linux/hash.h> or fs/namei.c.
+
 #
 # ABI hall of shame
 #
diff --git a/fs/namei.c b/fs/namei.c
index a49cbd7efcaa..968dae025230 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1788,7 +1788,11 @@ static int walk_component(struct nameidata *nd, int flags)
 
 #include <asm/word-at-a-time.h>
 
-#ifdef CONFIG_64BIT
+#ifdef HASH_MIX
+
+/* Architecture provides HASH_MIX and fold_hash() in <asm/hash.h> */
+
+#elif defined(CONFIG_64BIT)
 /*
  * Register pressure in the mixing function is an issue, particularly
  * on 32-bit x86, but almost any function requires one state value and
diff --git a/include/linux/hash.h b/include/linux/hash.h
index 613cfde3a1e0..ad6fa21d977b 100644
--- a/include/linux/hash.h
+++ b/include/linux/hash.h
@@ -41,19 +41,40 @@
 #define GOLDEN_RATIO_32 0x61C88647
 #define GOLDEN_RATIO_64 0x61C8864680B583EBull
 
+#ifdef CONFIG_HAVE_ARCH_HASH
+/* This header may use the GOLDEN_RATIO_xx constants */
+#include <asm/hash.h>
+#endif
 
-static inline u32 __hash_32(u32 val)
+/*
+ * The _generic versions exist only so lib/test_hash.c can compare
+ * the arch-optimized versions with the generic.
+ *
+ * Note that if you change these, any <asm/hash.h> that aren't updated
+ * to match need to have their HAVE_ARCH_* define values updated so the
+ * self-test will not false-positive.
+ */
+#ifndef HAVE_ARCH__HASH_32
+#define __hash_32 __hash_32_generic
+#endif
+static inline u32 __hash_32_generic(u32 val)
 {
 	return val * GOLDEN_RATIO_32;
 }
 
-static inline u32 hash_32(u32 val, unsigned int bits)
+#ifndef HAVE_ARCH_HASH_32
+#define hash_32 hash_32_generic
+#endif
+static inline u32 hash_32_generic(u32 val, unsigned int bits)
 {
 	/* High bits are more random, so use them. */
 	return __hash_32(val) >> (32 - bits);
 }
 
-static __always_inline u32 hash_64(u64 val, unsigned int bits)
+#ifndef HAVE_ARCH_HASH_64
+#define hash_64 hash_64_generic
+#endif
+static __always_inline u32 hash_64_generic(u64 val, unsigned int bits)
 {
 #if BITS_PER_LONG == 64
 	/* 64x64-bit multiply is efficient on all 64-bit processors */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 1e9a607534ca..18ec69ba8eb6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1815,6 +1815,17 @@ config TEST_RHASHTABLE
 
 	  If unsure, say N.
 
+config TEST_HASH
+	tristate "Perform selftest on hash functions"
+	default n
+	help
+	  Enable this option to test the kernel's integer (<linux/hash,h>)
+	  and string (<linux/stringhash.h>) hash functions on boot
+	  (or module load).
+
+	  This is intended to help people writing architecture-specific
+	  optimized versions.  If unsure, say N.
+
 endmenu # runtime tests
 
 config PROVIDE_OHCI1394_DMA_INIT
diff --git a/lib/Makefile b/lib/Makefile
index 7bd6fd436c97..f80b1a1b3afd 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -48,6 +48,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
+obj-$(CONFIG_TEST_HASH) += test_hash.o
 obj-$(CONFIG_TEST_KASAN) += test_kasan.o
 obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o
 obj-$(CONFIG_TEST_LKM) += test_module.o
diff --git a/lib/test_hash.c b/lib/test_hash.c
new file mode 100644
index 000000000000..c9549c8b4909
--- /dev/null
+++ b/lib/test_hash.c
@@ -0,0 +1,250 @@
+/*
+ * Test cases for <linux/hash.h> and <linux/stringhash.h>
+ * This just verifies that various ways of computing a hash
+ * produce the same thing and, for cases where a k-bit hash
+ * value is requested, is of the requested size.
+ *
+ * We fill a buffer with a 255-byte null-terminated string,
+ * and use both full_name_hash() and hashlen_string() to hash the
+ * substrings from i to j, where 0 <= i < j < 256.
+ *
+ * The returned values are used to check that __hash_32() and
+ * __hash_32_generic() compute the same thing.  Likewise hash_32()
+ * and hash_64().
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt "\n"
+
+#include <linux/compiler.h>
+#include <linux/types.h>
+#include <linux/module.h>
+#include <linux/hash.h>
+#include <linux/stringhash.h>
+#include <linux/printk.h>
+
+/* 32-bit XORSHIFT generator.  Seed must not be zero. */
+static u32 __init __attribute_const__
+xorshift(u32 seed)
+{
+	seed ^= seed << 13;
+	seed ^= seed >> 17;
+	seed ^= seed << 5;
+	return seed;
+}
+
+/* Given a non-zero x, returns a non-zero byte. */
+static u8 __init __attribute_const__
+mod255(u32 x)
+{
+	x = (x & 0xffff) + (x >> 16);	/* 1 <= x <= 0x1fffe */
+	x = (x & 0xff) + (x >> 8);	/* 1 <= x <= 0x2fd */
+	x = (x & 0xff) + (x >> 8);	/* 1 <= x <= 0x100 */
+	x = (x & 0xff) + (x >> 8);	/* 1 <= x <= 0xff */
+	return x;
+}
+
+/* Fill the buffer with non-zero bytes. */
+static void __init
+fill_buf(char *buf, size_t len, u32 seed)
+{
+	size_t i;
+
+	for (i = 0; i < len; i++) {
+		seed = xorshift(seed);
+		buf[i] = mod255(seed);
+	}
+}
+
+/*
+ * Test the various integer hash functions.  h64 (or its low-order bits)
+ * is the integer to hash.  hash_or accumulates the OR of the hash values,
+ * which are later checked to see that they cover all the requested bits.
+ *
+ * Because these functions (as opposed to the string hashes) are all
+ * inline, the code being tested is actually in the module, and you can
+ * recompile and re-test the module without rebooting.
+ */
+static bool __init
+test_int_hash(unsigned long long h64, u32 hash_or[2][33])
+{
+	int k;
+	u32 h0 = (u32)h64, h1, h2;
+
+	/* Test __hash32 */
+	hash_or[0][0] |= h1 = __hash_32(h0);
+#ifdef HAVE_ARCH__HASH_32
+	hash_or[1][0] |= h2 = __hash_32_generic(h0);
+#if HAVE_ARCH__HASH_32 == 1
+	if (h1 != h2) {
+		pr_err("__hash_32(%#x) = %#x != __hash_32_generic() = %#x",
+			h0, h1, h2);
+		return false;
+	}
+#endif
+#endif
+
+	/* Test k = 1..32 bits */
+	for (k = 1; k <= 32; k++) {
+		u32 const m = ((u32)2 << (k-1)) - 1;	/* Low k bits set */
+
+		/* Test hash_32 */
+		hash_or[0][k] |= h1 = hash_32(h0, k);
+		if (h1 > m) {
+			pr_err("hash_32(%#x, %d) = %#x > %#x", h0, k, h1, m);
+			return false;
+		}
+#ifdef HAVE_ARCH_HASH_32
+		h2 = hash_32_generic(h0, k);
+#if HAVE_ARCH_HASH_32 == 1
+		if (h1 != h2) {
+			pr_err("hash_32(%#x, %d) = %#x != hash_32_generic() "
+				" = %#x", h0, k, h1, h2);
+			return false;
+		}
+#else
+		if (h2 > m) {
+			pr_err("hash_32_generic(%#x, %d) = %#x > %#x",
+				h0, k, h1, m);
+			return false;
+		}
+#endif
+#endif
+		/* Test hash_64 */
+		hash_or[1][k] |= h1 = hash_64(h64, k);
+		if (h1 > m) {
+			pr_err("hash_64(%#llx, %d) = %#x > %#x", h64, k, h1, m);
+			return false;
+		}
+#ifdef HAVE_ARCH_HASH_64
+		h2 = hash_64_generic(h64, k);
+#if HAVE_ARCH_HASH_64 == 1
+		if (h1 != h2) {
+			pr_err("hash_64(%#llx, %d) = %#x != hash_64_generic() "
+				"= %#x", h64, k, h1, h2);
+			return false;
+		}
+#else
+		if (h2 > m) {
+			pr_err("hash_64_generic(%#llx, %d) = %#x > %#x",
+				h64, k, h1, m);
+			return false;
+		}
+#endif
+#endif
+	}
+
+	(void)h2;	/* Suppress unused variable warning */
+	return true;
+}
+
+#define SIZE 256	/* Run time is cubic in SIZE */
+
+static int __init
+test_hash_init(void)
+{
+	char buf[SIZE+1];
+	u32 string_or = 0, hash_or[2][33] = { 0 };
+	unsigned tests = 0;
+	unsigned long long h64 = 0;
+	int i, j;
+
+	fill_buf(buf, SIZE, 1);
+
+	/* Test every possible non-empty substring in the buffer. */
+	for (j = SIZE; j > 0; --j) {
+		buf[j] = '\0';
+
+		for (i = 0; i <= j; i++) {
+			u64 hashlen = hashlen_string(buf+i);
+			u32 h0 = full_name_hash(buf+i, j-i);
+
+			/* Check that hashlen_string gets the length right */
+			if (hashlen_len(hashlen) != j-i) {
+				pr_err("hashlen_string(%d..%d) returned length"
+					" %u, expected %d",
+					i, j, hashlen_len(hashlen), j-i);
+				return -EINVAL;
+			}
+			/* Check that the hashes match */
+			if (hashlen_hash(hashlen) != h0) {
+				pr_err("hashlen_string(%d..%d) = %08x != "
+					"full_name_hash() = %08x",
+					i, j, hashlen_hash(hashlen), h0);
+				return -EINVAL;
+			}
+
+			string_or |= h0;
+			h64 = h64 << 32 | h0;	/* For use with hash_64 */
+			if (!test_int_hash(h64, hash_or))
+				return -EINVAL;
+			tests++;
+		} /* i */
+	} /* j */
+
+	/* The OR of all the hash values should cover all the bits */
+	if (~string_or) {
+		pr_err("OR of all string hash results = %#x != %#x",
+			string_or, -1u);
+		return -EINVAL;
+	}
+	if (~hash_or[0][0]) {
+		pr_err("OR of all __hash_32 results = %#x != %#x",
+			hash_or[0][0], -1u);
+		return -EINVAL;
+	}
+#ifdef HAVE_ARCH__HASH_32
+#if HAVE_ARCH__HASH_32 != 1	/* Test is pointless if results match */
+	if (~hash_or[1][0]) {
+		pr_err("OR of all __hash_32_generic results = %#x != %#x",
+			hash_or[1][0], -1u);
+		return -EINVAL;
+	}
+#endif
+#endif
+
+	/* Likewise for all the i-bit hash values */
+	for (i = 1; i <= 32; i++) {
+		u32 const m = ((u32)2 << (i-1)) - 1;	/* Low i bits set */
+
+		if (hash_or[0][i] != m) {
+			pr_err("OR of all hash_32(%d) results = %#x "
+				"(%#x expected)", i, hash_or[0][i], m);
+			return -EINVAL;
+		}
+		if (hash_or[1][i] != m) {
+			pr_err("OR of all hash_64(%d) results = %#x "
+				"(%#x expected)", i, hash_or[1][i], m);
+			return -EINVAL;
+		}
+	}
+
+	/* Issue notices about skipped tests. */
+#ifndef HAVE_ARCH__HASH_32
+	pr_info("__hash_32() has no arch implementation to test.");
+#elif HAVE_ARCH__HASH_32 != 1
+	pr_info("__hash_32() is arch-specific; not compared to generic.");
+#endif
+#ifndef HAVE_ARCH_HASH_32
+	pr_info("hash_32() has no arch implementation to test.");
+#elif HAVE_ARCH_HASH_32 != 1
+	pr_info("hash_32() is arch-specific; not compared to generic.");
+#endif
+#ifndef HAVE_ARCH_HASH_64
+	pr_info("hash_64() has no arch implementation to test.");
+#elif HAVE_ARCH_HASH_64 != 1
+	pr_info("hash_64() is arch-specific; not compared to generic.");
+#endif
+
+	pr_notice("%u tests passed.", tests);
+
+	return 0;
+}
+
+static void __exit test_hash_exit(void)
+{
+}
+
+module_init(test_hash_init);	/* Does everything */
+module_exit(test_hash_exit);	/* Does nothing */
+
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3