From 57417cebc96b57122a2207fc84a6077d20c84b4b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:05:13 -0700 Subject: XArray: add xa_get_order Patch series "Fix read-only THP for non-tmpfs filesystems". As described more verbosely in the [3/3] changelog, we can inadvertently put an order-0 page in the page cache which occupies 512 consecutive entries. Users are running into this if they enable the READ_ONLY_THP_FOR_FS config option; see https://bugzilla.kernel.org/show_bug.cgi?id=206569 and Qian Cai has also reported it here: https://lore.kernel.org/lkml/20200616013309.GB815@lca.pw/ This is a rather intrusive way of fixing the problem, but has the advantage that I've actually been testing it with the THP patches, which means that it sees far more use than it does upstream -- indeed, Song has been entirely unable to reproduce it. It also has the advantage that it removes a few patches from my gargantuan backlog of THP patches. This patch (of 3): This function returns the order of the entry at the index. We need this because there isn't space in the shadow entry to encode its order. [akpm@linux-foundation.org: export xa_get_order to modules] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: "Kirill A . Shutemov" Cc: Qian Cai Cc: Song Liu Link: https://lkml.kernel.org/r/20200903183029.14930-1-willy@infradead.org Link: https://lkml.kernel.org/r/20200903183029.14930-2-willy@infradead.org Signed-off-by: Linus Torvalds --- lib/test_xarray.c | 21 +++++++++++++++++++++ lib/xarray.c | 40 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index d4f97925dbd8..bdd4d7995f79 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1649,6 +1649,26 @@ static noinline void check_account(struct xarray *xa) #endif } +static noinline void check_get_order(struct xarray *xa) +{ + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; + unsigned int order; + unsigned long i, j; + + for (i = 0; i < 3; i++) + XA_BUG_ON(xa, xa_get_order(xa, i) != 0); + + for (order = 0; order < max_order; order++) { + for (i = 0; i < 10; i++) { + xa_store_order(xa, i << order, order, + xa_mk_index(i << order), GFP_KERNEL); + for (j = i << order; j < (i + 1) << order; j++) + XA_BUG_ON(xa, xa_get_order(xa, j) != order); + xa_erase(xa, i << order); + } + } +} + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -1697,6 +1717,7 @@ static int xarray_checks(void) check_reserve(&array); check_reserve(&xa0); check_multi_store(&array); + check_get_order(&array); check_xa_alloc(); check_find(&array); check_find_entry(&array); diff --git a/lib/xarray.c b/lib/xarray.c index e9e641d3c0c3..f24cb9a43af8 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1592,6 +1592,46 @@ unlock: return xas_result(&xas, NULL); } EXPORT_SYMBOL(xa_store_range); + +/** + * xa_get_order() - Get the order of an entry. + * @xa: XArray. + * @index: Index of the entry. + * + * Return: A number between 0 and 63 indicating the order of the entry. + */ +int xa_get_order(struct xarray *xa, unsigned long index) +{ + XA_STATE(xas, xa, index); + void *entry; + int order = 0; + + rcu_read_lock(); + entry = xas_load(&xas); + + if (!entry) + goto unlock; + + if (!xas.xa_node) + goto unlock; + + for (;;) { + unsigned int slot = xas.xa_offset + (1 << order); + + if (slot >= XA_CHUNK_SIZE) + break; + if (!xa_is_sibling(xas.xa_node->slots[slot])) + break; + order++; + } + + order += xas.xa_node->shift; +unlock: + rcu_read_unlock(); + + return order; +} +EXPORT_SYMBOL(xa_get_order); #endif /* CONFIG_XARRAY_MULTI */ /** -- cgit v1.2.3 From 8fc75643c5e14574c8be59b69182452ece28315a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:05:16 -0700 Subject: XArray: add xas_split In order to use multi-index entries for huge pages in the page cache, we need to be able to split a multi-index entry (eg if a file is truncated in the middle of a huge page entry). This version does not support splitting more than one level of the tree at a time. This is an acceptable limitation for the page cache as we do not expect to support order-12 pages in the near future. [akpm@linux-foundation.org: export xas_split_alloc() to modules] [willy@infradead.org: fix xarray split] Link: https://lkml.kernel.org/r/20200910175450.GV6583@casper.infradead.org [willy@infradead.org: fix xarray] Link: https://lkml.kernel.org/r/20201001233943.GW20115@casper.infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: "Kirill A . Shutemov" Cc: Qian Cai Cc: Song Liu Link: https://lkml.kernel.org/r/20200903183029.14930-3-willy@infradead.org Signed-off-by: Linus Torvalds --- Documentation/core-api/xarray.rst | 16 ++-- include/linux/xarray.h | 13 +++ lib/test_xarray.c | 44 ++++++++++ lib/xarray.c | 168 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 225 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index 640934b6f7b4..a137a0e6d068 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -475,13 +475,15 @@ or iterations will move the index to the first index in the range. Each entry will only be returned once, no matter how many indices it occupies. -Using xas_next() or xas_prev() with a multi-index xa_state -is not supported. Using either of these functions on a multi-index entry -will reveal sibling entries; these should be skipped over by the caller. - -Storing ``NULL`` into any index of a multi-index entry will set the entry -at every index to ``NULL`` and dissolve the tie. Splitting a multi-index -entry into entries occupying smaller ranges is not yet supported. +Using xas_next() or xas_prev() with a multi-index xa_state is not +supported. Using either of these functions on a multi-index entry will +reveal sibling entries; these should be skipped over by the caller. + +Storing ``NULL`` into any index of a multi-index entry will set the +entry at every index to ``NULL`` and dissolve the tie. A multi-index +entry can be split into entries occupying smaller ranges by calling +xas_split_alloc() without the xa_lock held, followed by taking the lock +and calling xas_split(). Functions and structures ======================== diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 5b7f4ebcf4ff..5cdf441f6377 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1507,11 +1507,24 @@ void xas_create_range(struct xa_state *); #ifdef CONFIG_XARRAY_MULTI int xa_get_order(struct xarray *, unsigned long index); +void xas_split(struct xa_state *, void *entry, unsigned int order); +void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); #else static inline int xa_get_order(struct xarray *xa, unsigned long index) { return 0; } + +static inline void xas_split(struct xa_state *xas, void *entry, + unsigned int order) +{ + xas_store(xas, entry); +} + +static inline void xas_split_alloc(struct xa_state *xas, void *entry, + unsigned int order, gfp_t gfp) +{ +} #endif /** diff --git a/lib/test_xarray.c b/lib/test_xarray.c index bdd4d7995f79..8262c3f05a5d 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1503,6 +1503,49 @@ static noinline void check_store_range(struct xarray *xa) } } +#ifdef CONFIG_XARRAY_MULTI +static void check_split_1(struct xarray *xa, unsigned long index, + unsigned int order) +{ + XA_STATE(xas, xa, index); + void *entry; + unsigned int i = 0; + + xa_store_order(xa, index, order, xa, GFP_KERNEL); + + xas_split_alloc(&xas, xa, order, GFP_KERNEL); + xas_lock(&xas); + xas_split(&xas, xa, order); + xas_unlock(&xas); + + xa_for_each(xa, index, entry) { + XA_BUG_ON(xa, entry != xa); + i++; + } + XA_BUG_ON(xa, i != 1 << order); + + xa_set_mark(xa, index, XA_MARK_0); + XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0)); + + xa_destroy(xa); +} + +static noinline void check_split(struct xarray *xa) +{ + unsigned int order; + + XA_BUG_ON(xa, !xa_empty(xa)); + + for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) { + check_split_1(xa, 0, order); + check_split_1(xa, 1UL << order, order); + check_split_1(xa, 3UL << order, order); + } +} +#else +static void check_split(struct xarray *xa) { } +#endif + static void check_align_1(struct xarray *xa, char *name) { int i; @@ -1729,6 +1772,7 @@ static int xarray_checks(void) check_store_range(&array); check_store_iter(&array); check_align(&xa0); + check_split(&array); check_workingset(&array, 0); check_workingset(&array, 64); diff --git a/lib/xarray.c b/lib/xarray.c index f24cb9a43af8..b76eea7b314c 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -266,13 +266,14 @@ static void xa_node_free(struct xa_node *node) */ static void xas_destroy(struct xa_state *xas) { - struct xa_node *node = xas->xa_alloc; + struct xa_node *next, *node = xas->xa_alloc; - if (!node) - return; - XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); - kmem_cache_free(radix_tree_node_cachep, node); - xas->xa_alloc = NULL; + while (node) { + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); + next = rcu_dereference_raw(node->parent); + radix_tree_node_rcu_free(&node->rcu_head); + xas->xa_alloc = node = next; + } } /** @@ -304,6 +305,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp) xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!xas->xa_alloc) return false; + xas->xa_alloc->parent = NULL; XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); xas->xa_node = XAS_RESTART; return true; @@ -339,6 +341,7 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp) } if (!xas->xa_alloc) return false; + xas->xa_alloc->parent = NULL; XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); xas->xa_node = XAS_RESTART; return true; @@ -403,7 +406,7 @@ static unsigned long xas_size(const struct xa_state *xas) /* * Use this to calculate the maximum index that will need to be created * in order to add the entry described by @xas. Because we cannot store a - * multiple-index entry at index 0, the calculation is a little more complex + * multi-index entry at index 0, the calculation is a little more complex * than you might expect. */ static unsigned long xas_max(struct xa_state *xas) @@ -946,6 +949,153 @@ void xas_init_marks(const struct xa_state *xas) } EXPORT_SYMBOL_GPL(xas_init_marks); +#ifdef CONFIG_XARRAY_MULTI +static unsigned int node_get_marks(struct xa_node *node, unsigned int offset) +{ + unsigned int marks = 0; + xa_mark_t mark = XA_MARK_0; + + for (;;) { + if (node_get_mark(node, offset, mark)) + marks |= 1 << (__force unsigned int)mark; + if (mark == XA_MARK_MAX) + break; + mark_inc(mark); + } + + return marks; +} + +static void node_set_marks(struct xa_node *node, unsigned int offset, + struct xa_node *child, unsigned int marks) +{ + xa_mark_t mark = XA_MARK_0; + + for (;;) { + if (marks & (1 << (__force unsigned int)mark)) { + node_set_mark(node, offset, mark); + if (child) + node_mark_all(child, mark); + } + if (mark == XA_MARK_MAX) + break; + mark_inc(mark); + } +} + +/** + * xas_split_alloc() - Allocate memory for splitting an entry. + * @xas: XArray operation state. + * @entry: New entry which will be stored in the array. + * @order: New entry order. + * @gfp: Memory allocation flags. + * + * This function should be called before calling xas_split(). + * If necessary, it will allocate new nodes (and fill them with @entry) + * to prepare for the upcoming split of an entry of @order size into + * entries of the order stored in the @xas. + * + * Context: May sleep if @gfp flags permit. + */ +void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, + gfp_t gfp) +{ + unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; + unsigned int mask = xas->xa_sibs; + + /* XXX: no support for splitting really large entries yet */ + if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order)) + goto nomem; + if (xas->xa_shift + XA_CHUNK_SHIFT > order) + return; + + do { + unsigned int i; + void *sibling; + struct xa_node *node; + + node = kmem_cache_alloc(radix_tree_node_cachep, gfp); + if (!node) + goto nomem; + node->array = xas->xa; + for (i = 0; i < XA_CHUNK_SIZE; i++) { + if ((i & mask) == 0) { + RCU_INIT_POINTER(node->slots[i], entry); + sibling = xa_mk_sibling(0); + } else { + RCU_INIT_POINTER(node->slots[i], sibling); + } + } + RCU_INIT_POINTER(node->parent, xas->xa_alloc); + xas->xa_alloc = node; + } while (sibs-- > 0); + + return; +nomem: + xas_destroy(xas); + xas_set_err(xas, -ENOMEM); +} +EXPORT_SYMBOL_GPL(xas_split_alloc); + +/** + * xas_split() - Split a multi-index entry into smaller entries. + * @xas: XArray operation state. + * @entry: New entry to store in the array. + * @order: New entry order. + * + * The value in the entry is copied to all the replacement entries. + * + * Context: Any context. The caller should hold the xa_lock. + */ +void xas_split(struct xa_state *xas, void *entry, unsigned int order) +{ + unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; + unsigned int offset, marks; + struct xa_node *node; + void *curr = xas_load(xas); + int values = 0; + + node = xas->xa_node; + if (xas_top(node)) + return; + + marks = node_get_marks(node, xas->xa_offset); + + offset = xas->xa_offset + sibs; + do { + if (xas->xa_shift < node->shift) { + struct xa_node *child = xas->xa_alloc; + + xas->xa_alloc = rcu_dereference_raw(child->parent); + child->shift = node->shift - XA_CHUNK_SHIFT; + child->offset = offset; + child->count = XA_CHUNK_SIZE; + child->nr_values = xa_is_value(entry) ? + XA_CHUNK_SIZE : 0; + RCU_INIT_POINTER(child->parent, node); + node_set_marks(node, offset, child, marks); + rcu_assign_pointer(node->slots[offset], + xa_mk_node(child)); + if (xa_is_value(curr)) + values--; + } else { + unsigned int canon = offset - xas->xa_sibs; + + node_set_marks(node, canon, NULL, marks); + rcu_assign_pointer(node->slots[canon], entry); + while (offset > canon) + rcu_assign_pointer(node->slots[offset--], + xa_mk_sibling(canon)); + values += (xa_is_value(entry) - xa_is_value(curr)) * + (xas->xa_sibs + 1); + } + } while (offset-- > xas->xa_offset); + + node->nr_values += values; +} +EXPORT_SYMBOL_GPL(xas_split); +#endif + /** * xas_pause() - Pause a walk to drop a lock. * @xas: XArray operation state. @@ -1407,7 +1557,7 @@ EXPORT_SYMBOL(__xa_store); * @gfp: Memory allocation flags. * * After this function returns, loads from this index will return @entry. - * Storing into an existing multislot entry updates the entry of every index. + * Storing into an existing multi-index entry updates the entry of every index. * The marks associated with @index are unaffected unless @entry is %NULL. * * Context: Any context. Takes and releases the xa_lock. @@ -1549,7 +1699,7 @@ static void xas_set_range(struct xa_state *xas, unsigned long first, * * After this function returns, loads from any index between @first and @last, * inclusive will return @entry. - * Storing into an existing multislot entry updates the entry of every index. + * Storing into an existing multi-index entry updates the entry of every index. * The marks associated with @index are unaffected unless @entry is %NULL. * * Context: Process context. Takes and releases the xa_lock. May sleep -- cgit v1.2.3 From b296a6d53339a79082c1d2c1761e948e8b3def69 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Oct 2020 20:10:21 -0700 Subject: kernel.h: split out min()/max() et al. helpers kernel.h is being used as a dump for all kinds of stuff for a long time. Here is the attempt to start cleaning it up by splitting out min()/max() et al. helpers. At the same time convert users in header and lib folder to use new header. Though for time being include new header back to kernel.h to avoid twisted indirected includes for other existing users. Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Rasmus Villemoes Cc: Joe Perches Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20200910164152.GA1891694@smile.fi.intel.com Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 1 + include/linux/bvec.h | 6 +- include/linux/jiffies.h | 3 +- include/linux/kernel.h | 150 +-------------------------------------------- include/linux/minmax.h | 153 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nodemask.h | 2 +- include/linux/uaccess.h | 1 + kernel/range.c | 3 +- lib/find_bit.c | 1 + lib/hexdump.c | 1 + lib/math/rational.c | 2 +- lib/math/reciprocal_div.c | 1 + 12 files changed, 170 insertions(+), 154 deletions(-) create mode 100644 include/linux/minmax.h (limited to 'lib') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c09375e0a0eb..639cae2c158b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/bvec.h b/include/linux/bvec.h index dd74503f7e5e..2efec10bf792 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -7,10 +7,14 @@ #ifndef __LINUX_BVEC_ITER_H #define __LINUX_BVEC_ITER_H -#include #include #include +#include +#include #include +#include + +struct page; /** * struct bio_vec - a contiguous range of physical memory addresses diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index fed6ba96c527..5e13f801c902 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -3,8 +3,9 @@ #define _LINUX_JIFFIES_H #include +#include #include -#include +#include #include #include #include diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e4aa29b1ad62..c629215fdad9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -833,155 +834,6 @@ ftrace_vprintk(const char *fmt, va_list ap) static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ -/* - * min()/max()/clamp() macros must accomplish three things: - * - * - avoid multiple evaluations of the arguments (so side-effects like - * "x++" happen only once) when non-constant. - * - perform strict type-checking (to generate warnings instead of - * nasty runtime surprises). See the "unnecessary" pointer comparison - * in __typecheck(). - * - retain result as a constant expressions when called with only - * constant expressions (to avoid tripping VLA warnings in stack - * allocation usage). - */ -#define __typecheck(x, y) \ - (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) - -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - -#define __no_side_effects(x, y) \ - (__is_constexpr(x) && __is_constexpr(y)) - -#define __safe_cmp(x, y) \ - (__typecheck(x, y) && __no_side_effects(x, y)) - -#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) - -#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ - typeof(x) unique_x = (x); \ - typeof(y) unique_y = (y); \ - __cmp(unique_x, unique_y, op); }) - -#define __careful_cmp(x, y, op) \ - __builtin_choose_expr(__safe_cmp(x, y), \ - __cmp(x, y, op), \ - __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) - -/** - * min - return minimum of two values of the same or compatible types - * @x: first value - * @y: second value - */ -#define min(x, y) __careful_cmp(x, y, <) - -/** - * max - return maximum of two values of the same or compatible types - * @x: first value - * @y: second value - */ -#define max(x, y) __careful_cmp(x, y, >) - -/** - * min3 - return minimum of three values - * @x: first value - * @y: second value - * @z: third value - */ -#define min3(x, y, z) min((typeof(x))min(x, y), z) - -/** - * max3 - return maximum of three values - * @x: first value - * @y: second value - * @z: third value - */ -#define max3(x, y, z) max((typeof(x))max(x, y), z) - -/** - * min_not_zero - return the minimum that is _not_ zero, unless both are zero - * @x: value1 - * @y: value2 - */ -#define min_not_zero(x, y) ({ \ - typeof(x) __x = (x); \ - typeof(y) __y = (y); \ - __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) - -/** - * clamp - return a value clamped to a given range with strict typechecking - * @val: current value - * @lo: lowest allowable value - * @hi: highest allowable value - * - * This macro does strict typechecking of @lo/@hi to make sure they are of the - * same type as @val. See the unnecessary pointer comparisons. - */ -#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) - -/* - * ..and if you can't take the strict - * types, you can specify one yourself. - * - * Or not use min/max/clamp at all, of course. - */ - -/** - * min_t - return minimum of two values, using the specified type - * @type: data type to use - * @x: first value - * @y: second value - */ -#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) - -/** - * max_t - return maximum of two values, using the specified type - * @type: data type to use - * @x: first value - * @y: second value - */ -#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) - -/** - * clamp_t - return a value clamped to a given range using a given type - * @type: the type of variable to use - * @val: current value - * @lo: minimum allowable value - * @hi: maximum allowable value - * - * This macro does no typechecking and uses temporary variables of type - * @type to make all the comparisons. - */ -#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) - -/** - * clamp_val - return a value clamped to a given range using val's type - * @val: current value - * @lo: minimum allowable value - * @hi: maximum allowable value - * - * This macro does no typechecking and uses temporary variables of whatever - * type the input argument @val is. This is useful when @val is an unsigned - * type and @lo and @hi are literals that will otherwise be assigned a signed - * integer type. - */ -#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) - - -/** - * swap - swap values of @a and @b - * @a: first value - * @b: second value - */ -#define swap(a, b) \ - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) - /* This counts to 12. Any more, it will return 13th argument. */ #define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n #define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) diff --git a/include/linux/minmax.h b/include/linux/minmax.h new file mode 100644 index 000000000000..c0f57b0c64d9 --- /dev/null +++ b/include/linux/minmax.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MINMAX_H +#define _LINUX_MINMAX_H + +/* + * min()/max()/clamp() macros must accomplish three things: + * + * - avoid multiple evaluations of the arguments (so side-effects like + * "x++" happen only once) when non-constant. + * - perform strict type-checking (to generate warnings instead of + * nasty runtime surprises). See the "unnecessary" pointer comparison + * in __typecheck(). + * - retain result as a constant expressions when called with only + * constant expressions (to avoid tripping VLA warnings in stack + * allocation usage). + */ +#define __typecheck(x, y) \ + (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) + +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + +#define __no_side_effects(x, y) \ + (__is_constexpr(x) && __is_constexpr(y)) + +#define __safe_cmp(x, y) \ + (__typecheck(x, y) && __no_side_effects(x, y)) + +#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) + +#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + __cmp(unique_x, unique_y, op); }) + +#define __careful_cmp(x, y, op) \ + __builtin_choose_expr(__safe_cmp(x, y), \ + __cmp(x, y, op), \ + __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) + +/** + * min - return minimum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define min(x, y) __careful_cmp(x, y, <) + +/** + * max - return maximum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define max(x, y) __careful_cmp(x, y, >) + +/** + * min3 - return minimum of three values + * @x: first value + * @y: second value + * @z: third value + */ +#define min3(x, y, z) min((typeof(x))min(x, y), z) + +/** + * max3 - return maximum of three values + * @x: first value + * @y: second value + * @z: third value + */ +#define max3(x, y, z) max((typeof(x))max(x, y), z) + +/** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + +/** + * clamp - return a value clamped to a given range with strict typechecking + * @val: current value + * @lo: lowest allowable value + * @hi: highest allowable value + * + * This macro does strict typechecking of @lo/@hi to make sure they are of the + * same type as @val. See the unnecessary pointer comparisons. + */ +#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) + +/* + * ..and if you can't take the strict + * types, you can specify one yourself. + * + * Or not use min/max/clamp at all, of course. + */ + +/** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) + +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) + +/** + * clamp_t - return a value clamped to a given range using a given type + * @type: the type of variable to use + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of type + * @type to make all the comparisons. + */ +#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) + +/** + * clamp_val - return a value clamped to a given range using val's type + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of whatever + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed + * integer type. + */ +#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) + +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#endif /* _LINUX_MINMAX_H */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 3334ce056335..ac398e143c9a 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -90,9 +90,9 @@ * for such situations. See below and CPUMASK_ALLOC also. */ -#include #include #include +#include #include typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 1ae36bc8db35..ef084eacaa7c 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -3,6 +3,7 @@ #define __LINUX_UACCESS_H__ #include +#include #include #include diff --git a/kernel/range.c b/kernel/range.c index d84de6766472..56435f96da73 100644 --- a/kernel/range.c +++ b/kernel/range.c @@ -2,8 +2,9 @@ /* * Range add and subtract */ -#include #include +#include +#include #include #include #include diff --git a/lib/find_bit.c b/lib/find_bit.c index 49f875f1baf7..4a8751010d59 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -16,6 +16,7 @@ #include #include #include +#include #if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) || \ diff --git a/lib/hexdump.c b/lib/hexdump.c index 147133f8eb2f..9301578f98e8 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff --git a/lib/math/rational.c b/lib/math/rational.c index df75c8809693..9781d521963d 100644 --- a/lib/math/rational.c +++ b/lib/math/rational.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include /* * calculate best rational approximation for a given fraction diff --git a/lib/math/reciprocal_div.c b/lib/math/reciprocal_div.c index bf043258fa00..32436dd4171e 100644 --- a/lib/math/reciprocal_div.c +++ b/lib/math/reciprocal_div.c @@ -4,6 +4,7 @@ #include #include #include +#include /* * For a description of the algorithm please have a look at -- cgit v1.2.3 From 197d6c1dde4e19517e8cf843eec7fc6417241969 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:10:45 -0700 Subject: lib: bitmap: delete duplicated words Drop the repeated word "an". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040424.25760-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index 61394890788e..75006c4036e9 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -23,7 +23,7 @@ /** * DOC: bitmap introduction * - * bitmaps provide an array of bits, implemented using an an + * bitmaps provide an array of bits, implemented using an * array of unsigned longs. The number of valid bits in a * given bitmap does _not_ need to be an exact multiple of * BITS_PER_LONG. -- cgit v1.2.3 From f1e594acb1bdf29b583fe205793699a8e7c35fc0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:10:48 -0700 Subject: lib: libcrc32c: delete duplicated words Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040430.25807-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/libcrc32c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index 77ab839644c5..5ca0d815a95d 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -12,7 +12,7 @@ * pages = {}, * month = {June}, *} - * Used by the iSCSI driver, possibly others, and derived from the + * Used by the iSCSI driver, possibly others, and derived from * the iscsi-crc.c module of the linux-iscsi driver at * http://linux-iscsi.sourceforge.net. * -- cgit v1.2.3 From 2f22385fb1211a90f53ee197e0f41ae0d35b391c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:10:51 -0700 Subject: lib: decompress_bunzip2: delete duplicated words Drop the repeated word "how". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040436.25852-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/decompress_bunzip2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index f9628f3924ce..c72c865032fa 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -390,7 +390,7 @@ static int INIT get_next_block(struct bunzip_data *bd) j = (bd->inbufBits >> bd->inbufBitCount)& ((1 << hufGroup->maxLen)-1); got_huff_bits: - /* Figure how how many bits are in next symbol and + /* Figure how many bits are in next symbol and * unget extras */ i = hufGroup->minLen; while (j > limit[i]) -- cgit v1.2.3 From dde57fe01a0a39ce131ad5611e0a4430cec66bd8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:10:57 -0700 Subject: lib: dynamic_queue_limits: delete duplicated words + fix typo Drop the repeated word "the". Fix spelling of "excess". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040449.25946-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/dynamic_queue_limits.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index e659a027036e..fde0aa244148 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -60,8 +60,8 @@ void dql_completed(struct dql *dql, unsigned int count) * A decrease is only considered if the queue has been busy in * the whole interval (the check above). * - * If there is slack, the amount of execess data queued above - * the the amount needed to prevent starvation, the queue limit + * If there is slack, the amount of excess data queued above + * the amount needed to prevent starvation, the queue limit * can be decreased. To avoid hysteresis we consider the * minimum amount of slack found over several iterations of the * completion routine. -- cgit v1.2.3 From 4e20ace06f705404df09316b859c267e9ec99354 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:11:01 -0700 Subject: lib: earlycpio: delete duplicated words Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040455.25995-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/earlycpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/earlycpio.c b/lib/earlycpio.c index c001e084829e..e83628882001 100644 --- a/lib/earlycpio.c +++ b/lib/earlycpio.c @@ -42,7 +42,7 @@ enum cpio_fields { /** * cpio_data find_cpio_data - Search for files in an uncompressed cpio * @path: The directory to search for, including a slash at the end - * @data: Pointer to the the cpio archive or a header inside + * @data: Pointer to the cpio archive or a header inside * @len: Remaining length of the cpio based on data pointer * @nextoff: When a matching file is found, this is the offset from the * beginning of the cpio to the beginning of the next file, not the -- cgit v1.2.3 From e0656501a6192ebced36764a007f91cc014d896d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:11:04 -0700 Subject: lib: radix-tree: delete duplicated words Drop the repeated word "be". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040508.26086-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8e4a3a4397f2..005ced92a4a9 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -325,7 +325,7 @@ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) int ret = -ENOMEM; /* - * Nodes preloaded by one cgroup can be be used by another cgroup, so + * Nodes preloaded by one cgroup can be used by another cgroup, so * they should never be accounted to any particular memory cgroup. */ gfp_mask &= ~__GFP_ACCOUNT; -- cgit v1.2.3 From 408a93a2bb4f276f28059b51fbe0bbd06be350fc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:11:07 -0700 Subject: lib: syscall: delete duplicated words Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040514.26136-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/syscall.c b/lib/syscall.c index fb328e7ccb08..8533d2fea2d7 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -44,7 +44,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info * .data.instruction_pointer - filled with user PC * * If @target is blocked in a system call, returns zero with @info.data.nr - * set to the the call's number and @info.data.args filled in with its + * set to the call's number and @info.data.args filled in with its * arguments. Registers not used for system call arguments may not be available * and it is not kosher to use &struct user_regset calls while the system * call is still in progress. Note we may get this result if @target -- cgit v1.2.3 From 2d0469814ade60efb5cd8ca9af93c43bc3ba831e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:11:10 -0700 Subject: lib: test_sysctl: delete duplicated words Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040520.1999-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/test_sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c index 98bc92a91662..3750323973f4 100644 --- a/lib/test_sysctl.c +++ b/lib/test_sysctl.c @@ -16,7 +16,7 @@ */ /* - * This module provides an interface to the the proc sysctl interfaces. This + * This module provides an interface to the proc sysctl interfaces. This * driver requires CONFIG_PROC_SYSCTL. It will not normally be loaded by the * system unless explicitly requested by name. You can also build this driver * into your kernel. -- cgit v1.2.3 From 8d8472cfdefa0f5c839f5c4a507af5e26ee6f5bf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:11:14 -0700 Subject: lib/mpi/mpi-bit.c: fix spello of "functions" Fix typo/spello of "functions". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/8df15173-a6df-9426-7cad-a2d279bf1170@infradead.org Signed-off-by: Linus Torvalds --- lib/mpi/mpi-bit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c index a5119a2bcdd4..142b680835df 100644 --- a/lib/mpi/mpi-bit.c +++ b/lib/mpi/mpi-bit.c @@ -1,4 +1,4 @@ -/* mpi-bit.c - MPI bit level fucntions +/* mpi-bit.c - MPI bit level functions * Copyright (C) 1998, 1999 Free Software Foundation, Inc. * * This file is part of GnuPG. -- cgit v1.2.3 From 3b6742618ed9216dd6caad968fe8c83b32dff485 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 15 Oct 2020 20:11:17 -0700 Subject: lib/idr.c: document calling context for IDA APIs mustn't use locks The documentation for these functions indicates that callers don't need to hold a lock while calling them, but that documentation is only in one place under "IDA Usage". Let's state the same information on each IDA function so that it's clear what the calling context requires. Furthermore, let's document ida_simple_get() with the same information so that callers know how this API works. Signed-off-by: Stephen Boyd Signed-off-by: Andrew Morton Reviewed-by: Greg Kroah-Hartman Cc: Tri Vo Cc: Jonathan Corbet Cc: Matthew Wilcox Link: https://lkml.kernel.org/r/20200910055246.2297797-1-swboyd@chromium.org Signed-off-by: Linus Torvalds --- include/linux/idr.h | 9 ++++++--- lib/idr.c | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/include/linux/idr.h b/include/linux/idr.h index 3ade03e5c7af..b235ed987021 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -263,7 +263,8 @@ void ida_destroy(struct ida *ida); * * Allocate an ID between 0 and %INT_MAX, inclusive. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ @@ -280,7 +281,8 @@ static inline int ida_alloc(struct ida *ida, gfp_t gfp) * * Allocate an ID between @min and %INT_MAX, inclusive. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ @@ -297,7 +299,8 @@ static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp) * * Allocate an ID between 0 and @max, inclusive. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ diff --git a/lib/idr.c b/lib/idr.c index c2cf2c52bbde..3fa8be43696f 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -372,7 +372,8 @@ EXPORT_SYMBOL(idr_replace); * Allocate an ID between @min and @max, inclusive. The allocated ID will * not exceed %INT_MAX, even if @max is larger. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ @@ -479,7 +480,8 @@ EXPORT_SYMBOL(ida_alloc_range); * @ida: IDA handle. * @id: Previously allocated ID. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. */ void ida_free(struct ida *ida, unsigned int id) { @@ -531,7 +533,8 @@ EXPORT_SYMBOL(ida_free); * or freed. If the IDA is already empty, there is no need to call this * function. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. */ void ida_destroy(struct ida *ida) { -- cgit v1.2.3 From 6ed9b92e290b530197c2dda2271f5312abc475e6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 15 Oct 2020 20:11:25 -0700 Subject: lib/scatterlist.c: avoid a double memset 'sgl' is zeroed a few lines below in 'sg_init_table()'. There is no need to clear it twice. Remove the redundant initialization. Signed-off-by: Christophe JAILLET Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200920071544.368841-1-christophe.jaillet@wanadoo.fr Signed-off-by: Linus Torvalds --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 5d63a8857f36..d94628fa3349 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -504,7 +504,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, nalloc++; } sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), - (gfp & ~GFP_DMA) | __GFP_ZERO); + gfp & ~GFP_DMA); if (!sgl) return NULL; -- cgit v1.2.3 From 1d339638a954edce06ffcaa15d883d322e9e8291 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 15 Oct 2020 20:11:28 -0700 Subject: lib/percpu_counter.c: use helper macro abs() Use helper macro abs() to simplify the "x >= t || x <= -t" cmp. Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: https://lkml.kernel.org/r/20200927122746.5964-1-linmiaohe@huawei.com Signed-off-by: Linus Torvalds --- lib/percpu_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index f61689a96e85..00f666d94486 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -85,7 +85,7 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) preempt_disable(); count = __this_cpu_read(*fbc->counters) + amount; - if (count >= batch || count <= -batch) { + if (abs(count) >= batch) { unsigned long flags; raw_spin_lock_irqsave(&fbc->lock, flags); fbc->count += count; -- cgit v1.2.3 From f3c9d0a3fe97a8b05548d27e9a8e7a5e6875004c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Oct 2020 20:11:34 -0700 Subject: lib/test_hmm.c: fix an error code in dmirror_allocate_chunk() This is supposed to return false on failure, not a negative error code. Fixes: 170e38548b81 ("mm/hmm/test: use after free in dmirror_allocate_chunk()") Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Reviewed-by: Ralph Campbell Cc: Jerome Glisse Cc: Stephen Rothwell Cc: Jason Gunthorpe Cc: Dan Williams Link: https://lkml.kernel.org/r/20201010200812.GA1886610@mwanda Signed-off-by: Linus Torvalds --- lib/test_hmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_hmm.c b/lib/test_hmm.c index e151a7f10519..80a78877bd93 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -461,7 +461,7 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); if (!devmem) - return -ENOMEM; + return false; res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, "hmm_dmirror"); -- cgit v1.2.3 From 904542dc56524f921a6bab0639ff6249c01e775f Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Thu, 15 Oct 2020 20:11:38 -0700 Subject: lib/crc32.c: fix trivial typo in preprocessor condition Whether crc32_be needs a lookup table is chosen based on CRC_LE_BITS. Obviously, the _be function should be governed by the _BE_ define. This probably never pops up as it's hard to come up with a configuration where CRC_BE_BITS isn't the same as CRC_LE_BITS and as nobody is using bitwise CRC anyway. Fixes: 46c5801eaf86 ("crc32: bolt on crc32c") Signed-off-by: Tobias Jordan Signed-off-by: Andrew Morton Cc: Krzysztof Kozlowski Cc: Jonathan Corbet Cc: Mauro Carvalho Chehab Link: https://lkml.kernel.org/r/20200923182122.GA3338@agrajag.zerfleddert.de Signed-off-by: Linus Torvalds --- lib/crc32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crc32.c b/lib/crc32.c index 35a03d03f973..2a68dfd3b96c 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -331,7 +331,7 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, return crc; } -#if CRC_LE_BITS == 1 +#if CRC_BE_BITS == 1 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) { return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); -- cgit v1.2.3 From 6a6155f664e31c9be43cd690541a9a682ba3dc22 Mon Sep 17 00:00:00 2001 From: George Popescu Date: Thu, 15 Oct 2020 20:13:38 -0700 Subject: ubsan: introduce CONFIG_UBSAN_LOCAL_BOUNDS for Clang When the kernel is compiled with Clang, -fsanitize=bounds expands to -fsanitize=array-bounds and -fsanitize=local-bounds. Enabling -fsanitize=local-bounds with Clang has the unfortunate side-effect of inserting traps; this goes back to its original intent, which was as a hardening and not a debugging feature [1]. The same feature made its way into -fsanitize=bounds, but the traps remained. For that reason, -fsanitize=bounds was split into 'array-bounds' and 'local-bounds' [2]. Since 'local-bounds' doesn't behave like a normal sanitizer, enable it with Clang only if trapping behaviour was requested by CONFIG_UBSAN_TRAP=y. Add the UBSAN_BOUNDS_LOCAL config to Kconfig.ubsan to enable the 'local-bounds' option by default when UBSAN_TRAP is enabled. [1] http://lists.llvm.org/pipermail/llvm-dev/2012-May/049972.html [2] http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20131021/091536.html Suggested-by: Marco Elver Signed-off-by: George Popescu Signed-off-by: Andrew Morton Reviewed-by: David Brazdil Reviewed-by: Marco Elver Cc: Masahiro Yamada Cc: Michal Marek Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Kees Cook Cc: Dmitry Vyukov Cc: Arnd Bergmann Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/20200922074330.2549523-1-georgepope@google.com Signed-off-by: Linus Torvalds --- lib/Kconfig.ubsan | 14 ++++++++++++++ scripts/Makefile.ubsan | 10 +++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 774315de555a..58f8d03d037b 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -47,6 +47,20 @@ config UBSAN_BOUNDS to the {str,mem}*cpy() family of functions (that is addressed by CONFIG_FORTIFY_SOURCE). +config UBSAN_LOCAL_BOUNDS + bool "Perform array local bounds checking" + depends on UBSAN_TRAP + depends on CC_IS_CLANG + depends on !UBSAN_KCOV_BROKEN + help + This option enables -fsanitize=local-bounds which traps when an + exception/error is detected. Therefore, it should be enabled only + if trapping is expected. + Enabling this option detects errors due to accesses through a + pointer that is derived from an object of a statically-known size, + where an added offset (which may not be known statically) is + out-of-bounds. + config UBSAN_MISC bool "Enable all other Undefined Behavior sanity checks" default UBSAN diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan index 27348029b2b8..4e3fff0745e8 100644 --- a/scripts/Makefile.ubsan +++ b/scripts/Makefile.ubsan @@ -4,7 +4,15 @@ ifdef CONFIG_UBSAN_ALIGNMENT endif ifdef CONFIG_UBSAN_BOUNDS - CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds) + ifdef CONFIG_CC_IS_CLANG + CFLAGS_UBSAN += -fsanitize=array-bounds + else + CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds) + endif +endif + +ifdef CONFIG_UBSAN_LOCAL_BOUNDS + CFLAGS_UBSAN += -fsanitize=local-bounds endif ifdef CONFIG_UBSAN_MISC -- cgit v1.2.3 From 2c739ced5886cd8c8361faa79a9522ec05174ed0 Mon Sep 17 00:00:00 2001 From: Albert van der Linde Date: Thu, 15 Oct 2020 20:13:46 -0700 Subject: lib, include/linux: add usercopy failure capability Patch series "add fault injection to user memory access", v3. The goal of this series is to improve testing of fault-tolerance in usages of user memory access functions, by adding support for fault injection. syzkaller/syzbot are using the existing fault injection modes and will use this particular feature also. The first patch adds failure injection capability for usercopy functions. The second changes usercopy functions to use this new failure capability (copy_from_user, ...). The third patch adds get/put/clear_user failures to x86. This patch (of 3): Add a failure injection capability to improve testing of fault-tolerance in usages of user memory access functions. Add CONFIG_FAULT_INJECTION_USERCOPY to enable faults in usercopy functions. The should_fail_usercopy function is to be called by these functions (copy_from_user, get_user, ...) in order to fail or not. Signed-off-by: Albert van der Linde Signed-off-by: Andrew Morton Reviewed-by: Akinobu Mita Reviewed-by: Alexander Potapenko Cc: Borislav Petkov Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Peter Zijlstra (Intel) Cc: "H. Peter Anvin" Cc: Al Viro Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Marco Elver Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200831171733.955393-1-alinde@google.com Link: http://lkml.kernel.org/r/20200831171733.955393-2-alinde@google.com Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 1 + Documentation/fault-injection/fault-injection.rst | 7 +++- include/linux/fault-inject-usercopy.h | 22 +++++++++++++ lib/Kconfig.debug | 7 ++++ lib/Makefile | 1 + lib/fault-inject-usercopy.c | 39 +++++++++++++++++++++++ 6 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 include/linux/fault-inject-usercopy.h create mode 100644 lib/fault-inject-usercopy.c (limited to 'lib') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f7ac0e663976..6d9afb221ff7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1343,6 +1343,7 @@ current integrity status. failslab= + fail_usercopy= fail_page_alloc= fail_make_request=[KNL] General fault injection mechanism. diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst index f850ad018b70..31ecfe44e5b4 100644 --- a/Documentation/fault-injection/fault-injection.rst +++ b/Documentation/fault-injection/fault-injection.rst @@ -16,6 +16,10 @@ Available fault injection capabilities injects page allocation failures. (alloc_pages(), get_free_pages(), ...) +- fail_usercopy + + injects failures in user memory access functions. (copy_from_user(), get_user(), ...) + - fail_futex injects futex deadlock and uaddr fault errors. @@ -177,6 +181,7 @@ use the boot option:: failslab= fail_page_alloc= + fail_usercopy= fail_make_request= fail_futex= mmc_core.fail_request=,,, @@ -222,7 +227,7 @@ How to add new fault injection capability - debugfs entries - failslab, fail_page_alloc, and fail_make_request use this way. + failslab, fail_page_alloc, fail_usercopy, and fail_make_request use this way. Helper functions: fault_create_debugfs_attr(name, parent, attr); diff --git a/include/linux/fault-inject-usercopy.h b/include/linux/fault-inject-usercopy.h new file mode 100644 index 000000000000..56c3a693fdd9 --- /dev/null +++ b/include/linux/fault-inject-usercopy.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_FAULT_INJECT_USERCOPY_H__ +#define __LINUX_FAULT_INJECT_USERCOPY_H__ + +/* + * This header provides a wrapper for injecting failures to user space memory + * access functions. + */ + +#include + +#ifdef CONFIG_FAULT_INJECTION_USERCOPY + +bool should_fail_usercopy(void); + +#else + +static inline bool should_fail_usercopy(void) { return false; } + +#endif /* CONFIG_FAULT_INJECTION_USERCOPY */ + +#endif /* __LINUX_FAULT_INJECT_USERCOPY_H__ */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 491789a793ae..ebe5ab111e65 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1768,6 +1768,13 @@ config FAIL_PAGE_ALLOC help Provide fault-injection capability for alloc_pages(). +config FAULT_INJECTION_USERCOPY + bool "Fault injection capability for usercopy functions" + depends on FAULT_INJECTION + help + Provides fault-injection capability to inject failures + in usercopy functions (copy_from_user(), get_user(), ...). + config FAIL_MAKE_REQUEST bool "Fault-injection capability for disk IO" depends on FAULT_INJECTION && BLOCK diff --git a/lib/Makefile b/lib/Makefile index 49a2a9e36224..1c7577b2e86a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -210,6 +210,7 @@ obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o +obj-$(CONFIG_FAULT_INJECTION_USERCOPY) += fault-inject-usercopy.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o diff --git a/lib/fault-inject-usercopy.c b/lib/fault-inject-usercopy.c new file mode 100644 index 000000000000..77558b6c29ca --- /dev/null +++ b/lib/fault-inject-usercopy.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include + +static struct { + struct fault_attr attr; +} fail_usercopy = { + .attr = FAULT_ATTR_INITIALIZER, +}; + +static int __init setup_fail_usercopy(char *str) +{ + return setup_fault_attr(&fail_usercopy.attr, str); +} +__setup("fail_usercopy=", setup_fail_usercopy); + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + +static int __init fail_usercopy_debugfs(void) +{ + struct dentry *dir; + + dir = fault_create_debugfs_attr("fail_usercopy", NULL, + &fail_usercopy.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + return 0; +} + +late_initcall(fail_usercopy_debugfs); + +#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ + +bool should_fail_usercopy(void) +{ + return should_fail(&fail_usercopy.attr, 1); +} +EXPORT_SYMBOL_GPL(should_fail_usercopy); -- cgit v1.2.3 From 4d0e9df5e43dba52d38b251e3b909df8fa1110be Mon Sep 17 00:00:00 2001 From: Albert van der Linde Date: Thu, 15 Oct 2020 20:13:50 -0700 Subject: lib, uaccess: add failure injection to usercopy functions To test fault-tolerance of user memory access functions, introduce fault injection to usercopy functions. If a failure is expected return either -EFAULT or the total amount of bytes that were not copied. Signed-off-by: Albert van der Linde Signed-off-by: Andrew Morton Reviewed-by: Akinobu Mita Reviewed-by: Alexander Potapenko Cc: Al Viro Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Marco Elver Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200831171733.955393-3-alinde@google.com Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 11 ++++++++++- lib/iov_iter.c | 5 +++++ lib/strncpy_from_user.c | 3 +++ lib/usercopy.c | 5 ++++- 4 files changed, 22 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index ef084eacaa7c..1b8c9d6162bc 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -2,6 +2,7 @@ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ +#include #include #include #include @@ -84,6 +85,8 @@ static __always_inline __must_check unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); @@ -105,6 +108,8 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline __must_check unsigned long __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { + if (should_fail_usercopy()) + return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); @@ -114,6 +119,8 @@ static __always_inline __must_check unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); @@ -125,7 +132,7 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); - if (likely(access_ok(from, n))) { + if (!should_fail_usercopy() && likely(access_ok(from, n))) { instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } @@ -143,6 +150,8 @@ static inline __must_check unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 14cae2584db4..1635111c5bd2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -140,6 +141,8 @@ static int copyout(void __user *to, const void *from, size_t n) { + if (should_fail_usercopy()) + return n; if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); @@ -149,6 +152,8 @@ static int copyout(void __user *to, const void *from, size_t n) static int copyin(void *to, const void __user *from, size_t n) { + if (should_fail_usercopy()) + return n; if (access_ok(from, n)) { instrument_copy_from_user(to, from, n); n = raw_copy_from_user(to, from, n); diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 34696a348864..e6d5fcc2cdf3 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -99,6 +100,8 @@ long strncpy_from_user(char *dst, const char __user *src, long count) unsigned long max_addr, src_addr; might_fault(); + if (should_fail_usercopy()) + return -EFAULT; if (unlikely(count <= 0)) return 0; diff --git a/lib/usercopy.c b/lib/usercopy.c index b26509f112f9..7413dd300516 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include @@ -10,7 +11,7 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n { unsigned long res = n; might_fault(); - if (likely(access_ok(from, n))) { + if (!should_fail_usercopy() && likely(access_ok(from, n))) { instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } @@ -25,6 +26,8 @@ EXPORT_SYMBOL(_copy_from_user); unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; if (likely(access_ok(to, n))) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); -- cgit v1.2.3