From 3cb989501c2688cacbb7dc4b0d353faf838f53a1 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Sep 2013 10:35:17 +0100 Subject: Add a generic associative array implementation. Add a generic associative array implementation that can be used as the container for keyrings, thereby massively increasing the capacity available whilst also speeding up searching in keyrings that contain a lot of keys. This may also be useful in FS-Cache for tracking cookies. Documentation is added into Documentation/associative_array.txt Some of the properties of the implementation are: (1) Objects are opaque pointers. The implementation does not care where they point (if anywhere) or what they point to (if anything). [!] NOTE: Pointers to objects _must_ be zero in the two least significant bits. (2) Objects do not need to contain linkage blocks for use by the array. This permits an object to be located in multiple arrays simultaneously. Rather, the array is made up of metadata blocks that point to objects. (3) Objects are labelled as being one of two types (the type is a bool value). This information is stored in the array, but has no consequence to the array itself or its algorithms. (4) Objects require index keys to locate them within the array. (5) Index keys must be unique. Inserting an object with the same key as one already in the array will replace the old object. (6) Index keys can be of any length and can be of different lengths. (7) Index keys should encode the length early on, before any variation due to length is seen. (8) Index keys can include a hash to scatter objects throughout the array. (9) The array can iterated over. The objects will not necessarily come out in key order. (10) The array can be iterated whilst it is being modified, provided the RCU readlock is being held by the iterator. Note, however, under these circumstances, some objects may be seen more than once. If this is a problem, the iterator should lock against modification. Objects will not be missed, however, unless deleted. (11) Objects in the array can be looked up by means of their index key. (12) Objects can be looked up whilst the array is being modified, provided the RCU readlock is being held by the thread doing the look up. The implementation uses a tree of 16-pointer nodes internally that are indexed on each level by nibbles from the index key. To improve memory efficiency, shortcuts can be emplaced to skip over what would otherwise be a series of single-occupancy nodes. Further, nodes pack leaf object pointers into spare space in the node rather than making an extra branch until as such time an object needs to be added to a full node. Signed-off-by: David Howells --- lib/Kconfig | 14 + lib/Makefile | 1 + lib/assoc_array.c | 1745 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 1760 insertions(+) create mode 100644 lib/assoc_array.c (limited to 'lib') diff --git a/lib/Kconfig b/lib/Kconfig index b3c8be0da17f..3cb879b1f282 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -322,6 +322,20 @@ config TEXTSEARCH_FSM config BTREE boolean +config ASSOCIATIVE_ARRAY + bool + help + Generic associative array. Can be searched and iterated over whilst + it is being modified. It is also reasonably quick to search and + modify. The algorithms are non-recursive, and the trees are highly + capacious. + + See: + + Documentation/assoc_array.txt + + for more information. + config HAS_IOMEM boolean depends on !NO_IOMEM diff --git a/lib/Makefile b/lib/Makefile index f3bb2cb98adf..1e806477e472 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -51,6 +51,7 @@ CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o obj-$(CONFIG_BTREE) += btree.o +obj-$(CONFIG_ASSOCIATIVE_ARRAY) += assoc_array.o obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o diff --git a/lib/assoc_array.c b/lib/assoc_array.c new file mode 100644 index 000000000000..a0952818f938 --- /dev/null +++ b/lib/assoc_array.c @@ -0,0 +1,1745 @@ +/* Generic associative array implementation. + * + * See Documentation/assoc_array.txt for information. + * + * Copyright (C) 2013 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +//#define DEBUG +#include +#include + +/* + * Iterate over an associative array. The caller must hold the RCU read lock + * or better. + */ +static int assoc_array_subtree_iterate(const struct assoc_array_ptr *root, + const struct assoc_array_ptr *stop, + int (*iterator)(const void *leaf, + void *iterator_data), + void *iterator_data) +{ + const struct assoc_array_shortcut *shortcut; + const struct assoc_array_node *node; + const struct assoc_array_ptr *cursor, *ptr, *parent; + unsigned long has_meta; + int slot, ret; + + cursor = root; + +begin_node: + if (assoc_array_ptr_is_shortcut(cursor)) { + /* Descend through a shortcut */ + shortcut = assoc_array_ptr_to_shortcut(cursor); + smp_read_barrier_depends(); + cursor = ACCESS_ONCE(shortcut->next_node); + } + + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + slot = 0; + + /* We perform two passes of each node. + * + * The first pass does all the leaves in this node. This means we + * don't miss any leaves if the node is split up by insertion whilst + * we're iterating over the branches rooted here (we may, however, see + * some leaves twice). + */ + has_meta = 0; + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + has_meta |= (unsigned long)ptr; + if (ptr && assoc_array_ptr_is_leaf(ptr)) { + /* We need a barrier between the read of the pointer + * and dereferencing the pointer - but only if we are + * actually going to dereference it. + */ + smp_read_barrier_depends(); + + /* Invoke the callback */ + ret = iterator(assoc_array_ptr_to_leaf(ptr), + iterator_data); + if (ret) + return ret; + } + } + + /* The second pass attends to all the metadata pointers. If we follow + * one of these we may find that we don't come back here, but rather go + * back to a replacement node with the leaves in a different layout. + * + * We are guaranteed to make progress, however, as the slot number for + * a particular portion of the key space cannot change - and we + * continue at the back pointer + 1. + */ + if (!(has_meta & ASSOC_ARRAY_PTR_META_TYPE)) + goto finished_node; + slot = 0; + +continue_node: + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + if (assoc_array_ptr_is_meta(ptr)) { + cursor = ptr; + goto begin_node; + } + } + +finished_node: + /* Move up to the parent (may need to skip back over a shortcut) */ + parent = ACCESS_ONCE(node->back_pointer); + slot = node->parent_slot; + if (parent == stop) + return 0; + + if (assoc_array_ptr_is_shortcut(parent)) { + shortcut = assoc_array_ptr_to_shortcut(parent); + smp_read_barrier_depends(); + cursor = parent; + parent = ACCESS_ONCE(shortcut->back_pointer); + slot = shortcut->parent_slot; + if (parent == stop) + return 0; + } + + /* Ascend to next slot in parent node */ + cursor = parent; + slot++; + goto continue_node; +} + +/** + * assoc_array_iterate - Pass all objects in the array to a callback + * @array: The array to iterate over. + * @iterator: The callback function. + * @iterator_data: Private data for the callback function. + * + * Iterate over all the objects in an associative array. Each one will be + * presented to the iterator function. + * + * If the array is being modified concurrently with the iteration then it is + * possible that some objects in the array will be passed to the iterator + * callback more than once - though every object should be passed at least + * once. If this is undesirable then the caller must lock against modification + * for the duration of this function. + * + * The function will return 0 if no objects were in the array or else it will + * return the result of the last iterator function called. Iteration stops + * immediately if any call to the iteration function results in a non-zero + * return. + * + * The caller should hold the RCU read lock or better if concurrent + * modification is possible. + */ +int assoc_array_iterate(const struct assoc_array *array, + int (*iterator)(const void *object, + void *iterator_data), + void *iterator_data) +{ + struct assoc_array_ptr *root = ACCESS_ONCE(array->root); + + if (!root) + return 0; + return assoc_array_subtree_iterate(root, NULL, iterator, iterator_data); +} + +enum assoc_array_walk_status { + assoc_array_walk_tree_empty, + assoc_array_walk_found_terminal_node, + assoc_array_walk_found_wrong_shortcut, +} status; + +struct assoc_array_walk_result { + struct { + struct assoc_array_node *node; /* Node in which leaf might be found */ + int level; + int slot; + } terminal_node; + struct { + struct assoc_array_shortcut *shortcut; + int level; + int sc_level; + unsigned long sc_segments; + unsigned long dissimilarity; + } wrong_shortcut; +}; + +/* + * Navigate through the internal tree looking for the closest node to the key. + */ +static enum assoc_array_walk_status +assoc_array_walk(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *cursor, *ptr; + unsigned long sc_segments, dissimilarity; + unsigned long segments; + int level, sc_level, next_sc_level; + int slot; + + pr_devel("-->%s()\n", __func__); + + cursor = ACCESS_ONCE(array->root); + if (!cursor) + return assoc_array_walk_tree_empty; + + level = 0; + + /* Use segments from the key for the new leaf to navigate through the + * internal tree, skipping through nodes and shortcuts that are on + * route to the destination. Eventually we'll come to a slot that is + * either empty or contains a leaf at which point we've found a node in + * which the leaf we're looking for might be found or into which it + * should be inserted. + */ +jumped: + segments = ops->get_key_chunk(index_key, level); + pr_devel("segments[%d]: %lx\n", level, segments); + + if (assoc_array_ptr_is_shortcut(cursor)) + goto follow_shortcut; + +consider_node: + node = assoc_array_ptr_to_node(cursor); + smp_read_barrier_depends(); + + slot = segments >> (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + slot &= ASSOC_ARRAY_FAN_MASK; + ptr = ACCESS_ONCE(node->slots[slot]); + + pr_devel("consider slot %x [ix=%d type=%lu]\n", + slot, level, (unsigned long)ptr & 3); + + if (!assoc_array_ptr_is_meta(ptr)) { + /* The node doesn't have a node/shortcut pointer in the slot + * corresponding to the index key that we have to follow. + */ + result->terminal_node.node = node; + result->terminal_node.level = level; + result->terminal_node.slot = slot; + pr_devel("<--%s() = terminal_node\n", __func__); + return assoc_array_walk_found_terminal_node; + } + + if (assoc_array_ptr_is_node(ptr)) { + /* There is a pointer to a node in the slot corresponding to + * this index key segment, so we need to follow it. + */ + cursor = ptr; + level += ASSOC_ARRAY_LEVEL_STEP; + if ((level & ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) + goto consider_node; + goto jumped; + } + + /* There is a shortcut in the slot corresponding to the index key + * segment. We follow the shortcut if its partial index key matches + * this leaf's. Otherwise we need to split the shortcut. + */ + cursor = ptr; +follow_shortcut: + shortcut = assoc_array_ptr_to_shortcut(cursor); + smp_read_barrier_depends(); + pr_devel("shortcut to %d\n", shortcut->skip_to_level); + sc_level = level + ASSOC_ARRAY_LEVEL_STEP; + BUG_ON(sc_level > shortcut->skip_to_level); + + do { + /* Check the leaf against the shortcut's index key a word at a + * time, trimming the final word (the shortcut stores the index + * key completely from the root to the shortcut's target). + */ + if ((sc_level & ASSOC_ARRAY_KEY_CHUNK_MASK) == 0) + segments = ops->get_key_chunk(index_key, sc_level); + + sc_segments = shortcut->index_key[sc_level >> ASSOC_ARRAY_KEY_CHUNK_SHIFT]; + dissimilarity = segments ^ sc_segments; + + if (round_up(sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE) > shortcut->skip_to_level) { + /* Trim segments that are beyond the shortcut */ + int shift = shortcut->skip_to_level & ASSOC_ARRAY_KEY_CHUNK_MASK; + dissimilarity &= ~(ULONG_MAX << shift); + next_sc_level = shortcut->skip_to_level; + } else { + next_sc_level = sc_level + ASSOC_ARRAY_KEY_CHUNK_SIZE; + next_sc_level = round_down(next_sc_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + } + + if (dissimilarity != 0) { + /* This shortcut points elsewhere */ + result->wrong_shortcut.shortcut = shortcut; + result->wrong_shortcut.level = level; + result->wrong_shortcut.sc_level = sc_level; + result->wrong_shortcut.sc_segments = sc_segments; + result->wrong_shortcut.dissimilarity = dissimilarity; + return assoc_array_walk_found_wrong_shortcut; + } + + sc_level = next_sc_level; + } while (sc_level < shortcut->skip_to_level); + + /* The shortcut matches the leaf's index to this point. */ + cursor = ACCESS_ONCE(shortcut->next_node); + if (((level ^ sc_level) & ~ASSOC_ARRAY_KEY_CHUNK_MASK) != 0) { + level = sc_level; + goto jumped; + } else { + level = sc_level; + goto consider_node; + } +} + +/** + * assoc_array_find - Find an object by index key + * @array: The associative array to search. + * @ops: The operations to use. + * @index_key: The key to the object. + * + * Find an object in an associative array by walking through the internal tree + * to the node that should contain the object and then searching the leaves + * there. NULL is returned if the requested object was not found in the array. + * + * The caller must hold the RCU read lock or better. + */ +void *assoc_array_find(const struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key) +{ + struct assoc_array_walk_result result; + const struct assoc_array_node *node; + const struct assoc_array_ptr *ptr; + const void *leaf; + int slot; + + if (assoc_array_walk(array, ops, index_key, &result) != + assoc_array_walk_found_terminal_node) + return NULL; + + node = result.terminal_node.node; + smp_read_barrier_depends(); + + /* If the target key is available to us, it's has to be pointed to by + * the terminal node. + */ + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + if (ptr && assoc_array_ptr_is_leaf(ptr)) { + /* We need a barrier between the read of the pointer + * and dereferencing the pointer - but only if we are + * actually going to dereference it. + */ + leaf = assoc_array_ptr_to_leaf(ptr); + smp_read_barrier_depends(); + if (ops->compare_object(leaf, index_key)) + return (void *)leaf; + } + } + + return NULL; +} + +/* + * Destructively iterate over an associative array. The caller must prevent + * other simultaneous accesses. + */ +static void assoc_array_destroy_subtree(struct assoc_array_ptr *root, + const struct assoc_array_ops *ops) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *cursor, *parent = NULL; + int slot = -1; + + pr_devel("-->%s()\n", __func__); + + cursor = root; + if (!cursor) { + pr_devel("empty\n"); + return; + } + +move_to_meta: + if (assoc_array_ptr_is_shortcut(cursor)) { + /* Descend through a shortcut */ + pr_devel("[%d] shortcut\n", slot); + BUG_ON(!assoc_array_ptr_is_shortcut(cursor)); + shortcut = assoc_array_ptr_to_shortcut(cursor); + BUG_ON(shortcut->back_pointer != parent); + BUG_ON(slot != -1 && shortcut->parent_slot != slot); + parent = cursor; + cursor = shortcut->next_node; + slot = -1; + BUG_ON(!assoc_array_ptr_is_node(cursor)); + } + + pr_devel("[%d] node\n", slot); + node = assoc_array_ptr_to_node(cursor); + BUG_ON(node->back_pointer != parent); + BUG_ON(slot != -1 && node->parent_slot != slot); + slot = 0; + +continue_node: + pr_devel("Node %p [back=%p]\n", node, node->back_pointer); + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + struct assoc_array_ptr *ptr = node->slots[slot]; + if (!ptr) + continue; + if (assoc_array_ptr_is_meta(ptr)) { + parent = cursor; + cursor = ptr; + goto move_to_meta; + } + + if (ops) { + pr_devel("[%d] free leaf\n", slot); + ops->free_object(assoc_array_ptr_to_leaf(ptr)); + } + } + + parent = node->back_pointer; + slot = node->parent_slot; + pr_devel("free node\n"); + kfree(node); + if (!parent) + return; /* Done */ + + /* Move back up to the parent (may need to free a shortcut on + * the way up) */ + if (assoc_array_ptr_is_shortcut(parent)) { + shortcut = assoc_array_ptr_to_shortcut(parent); + BUG_ON(shortcut->next_node != cursor); + cursor = parent; + parent = shortcut->back_pointer; + slot = shortcut->parent_slot; + pr_devel("free shortcut\n"); + kfree(shortcut); + if (!parent) + return; + + BUG_ON(!assoc_array_ptr_is_node(parent)); + } + + /* Ascend to next slot in parent node */ + pr_devel("ascend to %p[%d]\n", parent, slot); + cursor = parent; + node = assoc_array_ptr_to_node(cursor); + slot++; + goto continue_node; +} + +/** + * assoc_array_destroy - Destroy an associative array + * @array: The array to destroy. + * @ops: The operations to use. + * + * Discard all metadata and free all objects in an associative array. The + * array will be empty and ready to use again upon completion. This function + * cannot fail. + * + * The caller must prevent all other accesses whilst this takes place as no + * attempt is made to adjust pointers gracefully to permit RCU readlock-holding + * accesses to continue. On the other hand, no memory allocation is required. + */ +void assoc_array_destroy(struct assoc_array *array, + const struct assoc_array_ops *ops) +{ + assoc_array_destroy_subtree(array->root, ops); + array->root = NULL; +} + +/* + * Handle insertion into an empty tree. + */ +static bool assoc_array_insert_in_empty_tree(struct assoc_array_edit *edit) +{ + struct assoc_array_node *new_n0; + + pr_devel("-->%s()\n", __func__); + + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + edit->leaf_p = &new_n0->slots[0]; + edit->adjust_count_on = new_n0; + edit->set[0].ptr = &edit->array->root; + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + + pr_devel("<--%s() = ok [no root]\n", __func__); + return true; +} + +/* + * Handle insertion into a terminal node. + */ +static bool assoc_array_insert_into_terminal_node(struct assoc_array_edit *edit, + const struct assoc_array_ops *ops, + const void *index_key, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut, *new_s0; + struct assoc_array_node *node, *new_n0, *new_n1, *side; + struct assoc_array_ptr *ptr; + unsigned long dissimilarity, base_seg, blank; + size_t keylen; + bool have_meta; + int level, diff; + int slot, next_slot, free_slot, i, j; + + node = result->terminal_node.node; + level = result->terminal_node.level; + edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = result->terminal_node.slot; + + pr_devel("-->%s()\n", __func__); + + /* We arrived at a node which doesn't have an onward node or shortcut + * pointer that we have to follow. This means that (a) the leaf we + * want must go here (either by insertion or replacement) or (b) we + * need to split this node and insert in one of the fragments. + */ + free_slot = -1; + + /* Firstly, we have to check the leaves in this node to see if there's + * a matching one we should replace in place. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (!ptr) { + free_slot = i; + continue; + } + if (ops->compare_object(assoc_array_ptr_to_leaf(ptr), index_key)) { + pr_devel("replace in slot %d\n", i); + edit->leaf_p = &node->slots[i]; + edit->dead_leaf = node->slots[i]; + pr_devel("<--%s() = ok [replace]\n", __func__); + return true; + } + } + + /* If there is a free slot in this node then we can just insert the + * leaf here. + */ + if (free_slot >= 0) { + pr_devel("insert in free slot %d\n", free_slot); + edit->leaf_p = &node->slots[free_slot]; + edit->adjust_count_on = node; + pr_devel("<--%s() = ok [insert]\n", __func__); + return true; + } + + /* The node has no spare slots - so we're either going to have to split + * it or insert another node before it. + * + * Whatever, we're going to need at least two new nodes - so allocate + * those now. We may also need a new shortcut, but we deal with that + * when we need it. + */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + new_n1 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n1) + return false; + edit->new_meta[1] = assoc_array_node_to_ptr(new_n1); + + /* We need to find out how similar the leaves are. */ + pr_devel("no spare slots\n"); + have_meta = false; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (assoc_array_ptr_is_meta(ptr)) { + edit->segment_cache[i] = 0xff; + have_meta = true; + continue; + } + base_seg = ops->get_object_key_chunk( + assoc_array_ptr_to_leaf(ptr), level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; + } + + if (have_meta) { + pr_devel("have meta\n"); + goto split_node; + } + + /* The node contains only leaves */ + dissimilarity = 0; + base_seg = edit->segment_cache[0]; + for (i = 1; i < ASSOC_ARRAY_FAN_OUT; i++) + dissimilarity |= edit->segment_cache[i] ^ base_seg; + + pr_devel("only leaves; dissimilarity=%lx\n", dissimilarity); + + if ((dissimilarity & ASSOC_ARRAY_FAN_MASK) == 0) { + /* The old leaves all cluster in the same slot. We will need + * to insert a shortcut if the new node wants to cluster with them. + */ + if ((edit->segment_cache[ASSOC_ARRAY_FAN_OUT] ^ base_seg) == 0) + goto all_leaves_cluster_together; + + /* Otherwise we can just insert a new node ahead of the old + * one. + */ + goto present_leaves_cluster_but_not_new_leaf; + } + +split_node: + pr_devel("split node\n"); + + /* We need to split the current node; we know that the node doesn't + * simply contain a full set of leaves that cluster together (it + * contains meta pointers and/or non-clustering leaves). + * + * We need to expel at least two leaves out of a set consisting of the + * leaves in the node and the new leaf. + * + * We need a new node (n0) to replace the current one and a new node to + * take the expelled nodes (n1). + */ + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = -1; /* Need to calculate this */ + +do_split_node: + pr_devel("do_split_node\n"); + + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + new_n1->nr_leaves_on_branch = 0; + + /* Begin by finding two matching leaves. There have to be at least two + * that match - even if there are meta pointers - because any leaf that + * would match a slot with a meta pointer in it must be somewhere + * behind that meta pointer and cannot be here. Further, given N + * remaining leaf slots, we now have N+1 leaves to go in them. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + slot = edit->segment_cache[i]; + if (slot != 0xff) + for (j = i + 1; j < ASSOC_ARRAY_FAN_OUT + 1; j++) + if (edit->segment_cache[j] == slot) + goto found_slot_for_multiple_occupancy; + } +found_slot_for_multiple_occupancy: + pr_devel("same slot: %x %x [%02x]\n", i, j, slot); + BUG_ON(i >= ASSOC_ARRAY_FAN_OUT); + BUG_ON(j >= ASSOC_ARRAY_FAN_OUT + 1); + BUG_ON(slot >= ASSOC_ARRAY_FAN_OUT); + + new_n1->parent_slot = slot; + + /* Metadata pointers cannot change slot */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) + if (assoc_array_ptr_is_meta(node->slots[i])) + new_n0->slots[i] = node->slots[i]; + else + new_n0->slots[i] = NULL; + BUG_ON(new_n0->slots[slot] != NULL); + new_n0->slots[slot] = assoc_array_node_to_ptr(new_n1); + + /* Filter the leaf pointers between the new nodes */ + free_slot = -1; + next_slot = 0; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + if (assoc_array_ptr_is_meta(node->slots[i])) + continue; + if (edit->segment_cache[i] == slot) { + new_n1->slots[next_slot++] = node->slots[i]; + new_n1->nr_leaves_on_branch++; + } else { + do { + free_slot++; + } while (new_n0->slots[free_slot] != NULL); + new_n0->slots[free_slot] = node->slots[i]; + } + } + + pr_devel("filtered: f=%x n=%x\n", free_slot, next_slot); + + if (edit->segment_cache[ASSOC_ARRAY_FAN_OUT] != slot) { + do { + free_slot++; + } while (new_n0->slots[free_slot] != NULL); + edit->leaf_p = &new_n0->slots[free_slot]; + edit->adjust_count_on = new_n0; + } else { + edit->leaf_p = &new_n1->slots[next_slot++]; + edit->adjust_count_on = new_n1; + } + + BUG_ON(next_slot <= 1); + + edit->set_backpointers_to = assoc_array_node_to_ptr(new_n0); + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + if (edit->segment_cache[i] == 0xff) { + ptr = node->slots[i]; + BUG_ON(assoc_array_ptr_is_leaf(ptr)); + if (assoc_array_ptr_is_node(ptr)) { + side = assoc_array_ptr_to_node(ptr); + edit->set_backpointers[i] = &side->back_pointer; + } else { + shortcut = assoc_array_ptr_to_shortcut(ptr); + edit->set_backpointers[i] = &shortcut->back_pointer; + } + } + } + + ptr = node->back_pointer; + if (!ptr) + edit->set[0].ptr = &edit->array->root; + else if (assoc_array_ptr_is_node(ptr)) + edit->set[0].ptr = &assoc_array_ptr_to_node(ptr)->slots[node->parent_slot]; + else + edit->set[0].ptr = &assoc_array_ptr_to_shortcut(ptr)->next_node; + edit->excised_meta[0] = assoc_array_node_to_ptr(node); + pr_devel("<--%s() = ok [split node]\n", __func__); + return true; + +present_leaves_cluster_but_not_new_leaf: + /* All the old leaves cluster in the same slot, but the new leaf wants + * to go into a different slot, so we create a new node to hold the new + * leaf and a pointer to a new node holding all the old leaves. + */ + pr_devel("present leaves cluster but not new leaf\n"); + + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = edit->segment_cache[0]; + new_n1->nr_leaves_on_branch = node->nr_leaves_on_branch; + edit->adjust_count_on = new_n0; + + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) + new_n1->slots[i] = node->slots[i]; + + new_n0->slots[edit->segment_cache[0]] = assoc_array_node_to_ptr(new_n0); + edit->leaf_p = &new_n0->slots[edit->segment_cache[ASSOC_ARRAY_FAN_OUT]]; + + edit->set[0].ptr = &assoc_array_ptr_to_node(node->back_pointer)->slots[node->parent_slot]; + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + edit->excised_meta[0] = assoc_array_node_to_ptr(node); + pr_devel("<--%s() = ok [insert node before]\n", __func__); + return true; + +all_leaves_cluster_together: + /* All the leaves, new and old, want to cluster together in this node + * in the same slot, so we have to replace this node with a shortcut to + * skip over the identical parts of the key and then place a pair of + * nodes, one inside the other, at the end of the shortcut and + * distribute the keys between them. + * + * Firstly we need to work out where the leaves start diverging as a + * bit position into their keys so that we know how big the shortcut + * needs to be. + * + * We only need to make a single pass of N of the N+1 leaves because if + * any keys differ between themselves at bit X then at least one of + * them must also differ with the base key at bit X or before. + */ + pr_devel("all leaves cluster together\n"); + diff = INT_MAX; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + int x = ops->diff_objects(assoc_array_ptr_to_leaf(edit->leaf), + assoc_array_ptr_to_leaf(node->slots[i])); + if (x < diff) { + BUG_ON(x < 0); + diff = x; + } + } + BUG_ON(diff == INT_MAX); + BUG_ON(diff < level + ASSOC_ARRAY_LEVEL_STEP); + + keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s0) + return false; + edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s0); + + edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); + new_s0->back_pointer = node->back_pointer; + new_s0->parent_slot = node->parent_slot; + new_s0->next_node = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); + new_n0->parent_slot = 0; + new_n1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_n1->parent_slot = -1; /* Need to calculate this */ + + new_s0->skip_to_level = level = diff & ~ASSOC_ARRAY_LEVEL_STEP_MASK; + pr_devel("skip_to_level = %d [diff %d]\n", level, diff); + BUG_ON(level <= 0); + + for (i = 0; i < keylen; i++) + new_s0->index_key[i] = + ops->get_key_chunk(index_key, i * ASSOC_ARRAY_KEY_CHUNK_SIZE); + + blank = ULONG_MAX << (level & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, level, blank); + new_s0->index_key[keylen - 1] &= ~blank; + + /* This now reduces to a node splitting exercise for which we'll need + * to regenerate the disparity table. + */ + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + base_seg = ops->get_object_key_chunk(assoc_array_ptr_to_leaf(ptr), + level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[i] = base_seg & ASSOC_ARRAY_FAN_MASK; + } + + base_seg = ops->get_key_chunk(index_key, level); + base_seg >>= level & ASSOC_ARRAY_KEY_CHUNK_MASK; + edit->segment_cache[ASSOC_ARRAY_FAN_OUT] = base_seg & ASSOC_ARRAY_FAN_MASK; + goto do_split_node; +} + +/* + * Handle insertion into the middle of a shortcut. + */ +static bool assoc_array_insert_mid_shortcut(struct assoc_array_edit *edit, + const struct assoc_array_ops *ops, + struct assoc_array_walk_result *result) +{ + struct assoc_array_shortcut *shortcut, *new_s0, *new_s1; + struct assoc_array_node *node, *new_n0, *side; + unsigned long sc_segments, dissimilarity, blank; + size_t keylen; + int level, sc_level, diff; + int sc_slot; + + shortcut = result->wrong_shortcut.shortcut; + level = result->wrong_shortcut.level; + sc_level = result->wrong_shortcut.sc_level; + sc_segments = result->wrong_shortcut.sc_segments; + dissimilarity = result->wrong_shortcut.dissimilarity; + + pr_devel("-->%s(ix=%d dis=%lx scix=%d)\n", + __func__, level, dissimilarity, sc_level); + + /* We need to split a shortcut and insert a node between the two + * pieces. Zero-length pieces will be dispensed with entirely. + * + * First of all, we need to find out in which level the first + * difference was. + */ + diff = __ffs(dissimilarity); + diff &= ~ASSOC_ARRAY_LEVEL_STEP_MASK; + diff += sc_level & ~ASSOC_ARRAY_KEY_CHUNK_MASK; + pr_devel("diff=%d\n", diff); + + if (!shortcut->back_pointer) { + edit->set[0].ptr = &edit->array->root; + } else if (assoc_array_ptr_is_node(shortcut->back_pointer)) { + node = assoc_array_ptr_to_node(shortcut->back_pointer); + edit->set[0].ptr = &node->slots[shortcut->parent_slot]; + } else { + BUG(); + } + + edit->excised_meta[0] = assoc_array_shortcut_to_ptr(shortcut); + + /* Create a new node now since we're going to need it anyway */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + return false; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + edit->adjust_count_on = new_n0; + + /* Insert a new shortcut before the new node if this segment isn't of + * zero length - otherwise we just connect the new node directly to the + * parent. + */ + level += ASSOC_ARRAY_LEVEL_STEP; + if (diff > level) { + pr_devel("pre-shortcut %d...%d\n", level, diff); + keylen = round_up(diff, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s0 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s0) + return false; + edit->new_meta[1] = assoc_array_shortcut_to_ptr(new_s0); + edit->set[0].to = assoc_array_shortcut_to_ptr(new_s0); + new_s0->back_pointer = shortcut->back_pointer; + new_s0->parent_slot = shortcut->parent_slot; + new_s0->next_node = assoc_array_node_to_ptr(new_n0); + new_s0->skip_to_level = diff; + + new_n0->back_pointer = assoc_array_shortcut_to_ptr(new_s0); + new_n0->parent_slot = 0; + + memcpy(new_s0->index_key, shortcut->index_key, + keylen * sizeof(unsigned long)); + + blank = ULONG_MAX << (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); + pr_devel("blank off [%zu] %d: %lx\n", keylen - 1, diff, blank); + new_s0->index_key[keylen - 1] &= ~blank; + } else { + pr_devel("no pre-shortcut\n"); + edit->set[0].to = assoc_array_node_to_ptr(new_n0); + new_n0->back_pointer = shortcut->back_pointer; + new_n0->parent_slot = shortcut->parent_slot; + } + + side = assoc_array_ptr_to_node(shortcut->next_node); + new_n0->nr_leaves_on_branch = side->nr_leaves_on_branch; + + /* We need to know which slot in the new node is going to take a + * metadata pointer. + */ + sc_slot = sc_segments >> (diff & ASSOC_ARRAY_KEY_CHUNK_MASK); + sc_slot &= ASSOC_ARRAY_FAN_MASK; + + pr_devel("new slot %lx >> %d -> %d\n", + sc_segments, diff & ASSOC_ARRAY_KEY_CHUNK_MASK, sc_slot); + + /* Determine whether we need to follow the new node with a replacement + * for the current shortcut. We could in theory reuse the current + * shortcut if its parent slot number doesn't change - but that's a + * 1-in-16 chance so not worth expending the code upon. + */ + level = diff + ASSOC_ARRAY_LEVEL_STEP; + if (level < shortcut->skip_to_level) { + pr_devel("post-shortcut %d...%d\n", level, shortcut->skip_to_level); + keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + + new_s1 = kzalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s1) + return false; + edit->new_meta[2] = assoc_array_shortcut_to_ptr(new_s1); + + new_s1->back_pointer = assoc_array_node_to_ptr(new_n0); + new_s1->parent_slot = sc_slot; + new_s1->next_node = shortcut->next_node; + new_s1->skip_to_level = shortcut->skip_to_level; + + new_n0->slots[sc_slot] = assoc_array_shortcut_to_ptr(new_s1); + + memcpy(new_s1->index_key, shortcut->index_key, + keylen * sizeof(unsigned long)); + + edit->set[1].ptr = &side->back_pointer; + edit->set[1].to = assoc_array_shortcut_to_ptr(new_s1); + } else { + pr_devel("no post-shortcut\n"); + + /* We don't have to replace the pointed-to node as long as we + * use memory barriers to make sure the parent slot number is + * changed before the back pointer (the parent slot number is + * irrelevant to the old parent shortcut). + */ + new_n0->slots[sc_slot] = shortcut->next_node; + edit->set_parent_slot[0].p = &side->parent_slot; + edit->set_parent_slot[0].to = sc_slot; + edit->set[1].ptr = &side->back_pointer; + edit->set[1].to = assoc_array_node_to_ptr(new_n0); + } + + /* Install the new leaf in a spare slot in the new node. */ + if (sc_slot == 0) + edit->leaf_p = &new_n0->slots[1]; + else + edit->leaf_p = &new_n0->slots[0]; + + pr_devel("<--%s() = ok [split shortcut]\n", __func__); + return edit; +} + +/** + * assoc_array_insert - Script insertion of an object into an associative array + * @array: The array to insert into. + * @ops: The operations to use. + * @index_key: The key to insert at. + * @object: The object to insert. + * + * Precalculate and preallocate a script for the insertion or replacement of an + * object in an associative array. This results in an edit script that can + * either be applied or cancelled. + * + * The function returns a pointer to an edit script or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_insert(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key, + void *object) +{ + struct assoc_array_walk_result result; + struct assoc_array_edit *edit; + + pr_devel("-->%s()\n", __func__); + + /* The leaf pointer we're given must not have the bottom bit set as we + * use those for type-marking the pointer. NULL pointers are also not + * allowed as they indicate an empty slot but we have to allow them + * here as they can be updated later. + */ + BUG_ON(assoc_array_ptr_is_meta(object)); + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->leaf = assoc_array_leaf_to_ptr(object); + edit->adjust_count_by = 1; + + switch (assoc_array_walk(array, ops, index_key, &result)) { + case assoc_array_walk_tree_empty: + /* Allocate a root node if there isn't one yet */ + if (!assoc_array_insert_in_empty_tree(edit)) + goto enomem; + return edit; + + case assoc_array_walk_found_terminal_node: + /* We found a node that doesn't have a node/shortcut pointer in + * the slot corresponding to the index key that we have to + * follow. + */ + if (!assoc_array_insert_into_terminal_node(edit, ops, index_key, + &result)) + goto enomem; + return edit; + + case assoc_array_walk_found_wrong_shortcut: + /* We found a shortcut that didn't match our key in a slot we + * needed to follow. + */ + if (!assoc_array_insert_mid_shortcut(edit, ops, &result)) + goto enomem; + return edit; + } + +enomem: + /* Clean up after an out of memory error */ + pr_devel("enomem\n"); + assoc_array_cancel_edit(edit); + return ERR_PTR(-ENOMEM); +} + +/** + * assoc_array_insert_set_object - Set the new object pointer in an edit script + * @edit: The edit script to modify. + * @object: The object pointer to set. + * + * Change the object to be inserted in an edit script. The object pointed to + * by the old object is not freed. This must be done prior to applying the + * script. + */ +void assoc_array_insert_set_object(struct assoc_array_edit *edit, void *object) +{ + BUG_ON(!object); + edit->leaf = assoc_array_leaf_to_ptr(object); +} + +struct assoc_array_delete_collapse_context { + struct assoc_array_node *node; + const void *skip_leaf; + int slot; +}; + +/* + * Subtree collapse to node iterator. + */ +static int assoc_array_delete_collapse_iterator(const void *leaf, + void *iterator_data) +{ + struct assoc_array_delete_collapse_context *collapse = iterator_data; + + if (leaf == collapse->skip_leaf) + return 0; + + BUG_ON(collapse->slot >= ASSOC_ARRAY_FAN_OUT); + + collapse->node->slots[collapse->slot++] = assoc_array_leaf_to_ptr(leaf); + return 0; +} + +/** + * assoc_array_delete - Script deletion of an object from an associative array + * @array: The array to search. + * @ops: The operations to use. + * @index_key: The key to the object. + * + * Precalculate and preallocate a script for the deletion of an object from an + * associative array. This results in an edit script that can either be + * applied or cancelled. + * + * The function returns a pointer to an edit script if the object was found, + * NULL if the object was not found or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_delete(struct assoc_array *array, + const struct assoc_array_ops *ops, + const void *index_key) +{ + struct assoc_array_delete_collapse_context collapse; + struct assoc_array_walk_result result; + struct assoc_array_node *node, *new_n0; + struct assoc_array_edit *edit; + struct assoc_array_ptr *ptr; + bool has_meta; + int slot, i; + + pr_devel("-->%s()\n", __func__); + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->adjust_count_by = -1; + + switch (assoc_array_walk(array, ops, index_key, &result)) { + case assoc_array_walk_found_terminal_node: + /* We found a node that should contain the leaf we've been + * asked to remove - *if* it's in the tree. + */ + pr_devel("terminal_node\n"); + node = result.terminal_node.node; + + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = node->slots[slot]; + if (ptr && + assoc_array_ptr_is_leaf(ptr) && + ops->compare_object(assoc_array_ptr_to_leaf(ptr), + index_key)) + goto found_leaf; + } + case assoc_array_walk_tree_empty: + case assoc_array_walk_found_wrong_shortcut: + default: + assoc_array_cancel_edit(edit); + pr_devel("not found\n"); + return NULL; + } + +found_leaf: + BUG_ON(array->nr_leaves_on_tree <= 0); + + /* In the simplest form of deletion we just clear the slot and release + * the leaf after a suitable interval. + */ + edit->dead_leaf = node->slots[slot]; + edit->set[0].ptr = &node->slots[slot]; + edit->set[0].to = NULL; + edit->adjust_count_on = node; + + /* If that concludes erasure of the last leaf, then delete the entire + * internal array. + */ + if (array->nr_leaves_on_tree == 1) { + edit->set[1].ptr = &array->root; + edit->set[1].to = NULL; + edit->adjust_count_on = NULL; + edit->excised_subtree = array->root; + pr_devel("all gone\n"); + return edit; + } + + /* However, we'd also like to clear up some metadata blocks if we + * possibly can. + * + * We go for a simple algorithm of: if this node has FAN_OUT or fewer + * leaves in it, then attempt to collapse it - and attempt to + * recursively collapse up the tree. + * + * We could also try and collapse in partially filled subtrees to take + * up space in this node. + */ + if (node->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { + struct assoc_array_node *parent, *grandparent; + struct assoc_array_ptr *ptr; + + /* First of all, we need to know if this node has metadata so + * that we don't try collapsing if all the leaves are already + * here. + */ + has_meta = false; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + ptr = node->slots[i]; + if (assoc_array_ptr_is_meta(ptr)) { + has_meta = true; + break; + } + } + + pr_devel("leaves: %ld [m=%d]\n", + node->nr_leaves_on_branch - 1, has_meta); + + /* Look further up the tree to see if we can collapse this node + * into a more proximal node too. + */ + parent = node; + collapse_up: + pr_devel("collapse subtree: %ld\n", parent->nr_leaves_on_branch); + + ptr = parent->back_pointer; + if (!ptr) + goto do_collapse; + if (assoc_array_ptr_is_shortcut(ptr)) { + struct assoc_array_shortcut *s = assoc_array_ptr_to_shortcut(ptr); + ptr = s->back_pointer; + if (!ptr) + goto do_collapse; + } + + grandparent = assoc_array_ptr_to_node(ptr); + if (grandparent->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT + 1) { + parent = grandparent; + goto collapse_up; + } + + do_collapse: + /* There's no point collapsing if the original node has no meta + * pointers to discard and if we didn't merge into one of that + * node's ancestry. + */ + if (has_meta || parent != node) { + node = parent; + + /* Create a new node to collapse into */ + new_n0 = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n0) + goto enomem; + edit->new_meta[0] = assoc_array_node_to_ptr(new_n0); + + new_n0->back_pointer = node->back_pointer; + new_n0->parent_slot = node->parent_slot; + new_n0->nr_leaves_on_branch = node->nr_leaves_on_branch; + edit->adjust_count_on = new_n0; + + collapse.node = new_n0; + collapse.skip_leaf = assoc_array_ptr_to_leaf(edit->dead_leaf); + collapse.slot = 0; + assoc_array_subtree_iterate(assoc_array_node_to_ptr(node), + node->back_pointer, + assoc_array_delete_collapse_iterator, + &collapse); + pr_devel("collapsed %d,%lu\n", collapse.slot, new_n0->nr_leaves_on_branch); + BUG_ON(collapse.slot != new_n0->nr_leaves_on_branch - 1); + + if (!node->back_pointer) { + edit->set[1].ptr = &array->root; + } else if (assoc_array_ptr_is_leaf(node->back_pointer)) { + BUG(); + } else if (assoc_array_ptr_is_node(node->back_pointer)) { + struct assoc_array_node *p = + assoc_array_ptr_to_node(node->back_pointer); + edit->set[1].ptr = &p->slots[node->parent_slot]; + } else if (assoc_array_ptr_is_shortcut(node->back_pointer)) { + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(node->back_pointer); + edit->set[1].ptr = &s->next_node; + } + edit->set[1].to = assoc_array_node_to_ptr(new_n0); + edit->excised_subtree = assoc_array_node_to_ptr(node); + } + } + + return edit; + +enomem: + /* Clean up after an out of memory error */ + pr_devel("enomem\n"); + assoc_array_cancel_edit(edit); + return ERR_PTR(-ENOMEM); +} + +/** + * assoc_array_clear - Script deletion of all objects from an associative array + * @array: The array to clear. + * @ops: The operations to use. + * + * Precalculate and preallocate a script for the deletion of all the objects + * from an associative array. This results in an edit script that can either + * be applied or cancelled. + * + * The function returns a pointer to an edit script if there are objects to be + * deleted, NULL if there are no objects in the array or -ENOMEM. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +struct assoc_array_edit *assoc_array_clear(struct assoc_array *array, + const struct assoc_array_ops *ops) +{ + struct assoc_array_edit *edit; + + pr_devel("-->%s()\n", __func__); + + if (!array->root) + return NULL; + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return ERR_PTR(-ENOMEM); + edit->array = array; + edit->ops = ops; + edit->set[1].ptr = &array->root; + edit->set[1].to = NULL; + edit->excised_subtree = array->root; + edit->ops_for_excised_subtree = ops; + pr_devel("all gone\n"); + return edit; +} + +/* + * Handle the deferred destruction after an applied edit. + */ +static void assoc_array_rcu_cleanup(struct rcu_head *head) +{ + struct assoc_array_edit *edit = + container_of(head, struct assoc_array_edit, rcu); + int i; + + pr_devel("-->%s()\n", __func__); + + if (edit->dead_leaf) + edit->ops->free_object(assoc_array_ptr_to_leaf(edit->dead_leaf)); + for (i = 0; i < ARRAY_SIZE(edit->excised_meta); i++) + if (edit->excised_meta[i]) + kfree(assoc_array_ptr_to_node(edit->excised_meta[i])); + + if (edit->excised_subtree) { + BUG_ON(assoc_array_ptr_is_leaf(edit->excised_subtree)); + if (assoc_array_ptr_is_node(edit->excised_subtree)) { + struct assoc_array_node *n = + assoc_array_ptr_to_node(edit->excised_subtree); + n->back_pointer = NULL; + } else { + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(edit->excised_subtree); + s->back_pointer = NULL; + } + assoc_array_destroy_subtree(edit->excised_subtree, + edit->ops_for_excised_subtree); + } + + kfree(edit); +} + +/** + * assoc_array_apply_edit - Apply an edit script to an associative array + * @edit: The script to apply. + * + * Apply an edit script to an associative array to effect an insertion, + * deletion or clearance. As the edit script includes preallocated memory, + * this is guaranteed not to fail. + * + * The edit script, dead objects and dead metadata will be scheduled for + * destruction after an RCU grace period to permit those doing read-only + * accesses on the array to continue to do so under the RCU read lock whilst + * the edit is taking place. + */ +void assoc_array_apply_edit(struct assoc_array_edit *edit) +{ + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *ptr; + int i; + + pr_devel("-->%s()\n", __func__); + + smp_wmb(); + if (edit->leaf_p) + *edit->leaf_p = edit->leaf; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set_parent_slot); i++) + if (edit->set_parent_slot[i].p) + *edit->set_parent_slot[i].p = edit->set_parent_slot[i].to; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set_backpointers); i++) + if (edit->set_backpointers[i]) + *edit->set_backpointers[i] = edit->set_backpointers_to; + + smp_wmb(); + for (i = 0; i < ARRAY_SIZE(edit->set); i++) + if (edit->set[i].ptr) + *edit->set[i].ptr = edit->set[i].to; + + if (edit->array->root == NULL) { + edit->array->nr_leaves_on_tree = 0; + } else if (edit->adjust_count_on) { + node = edit->adjust_count_on; + for (;;) { + node->nr_leaves_on_branch += edit->adjust_count_by; + + ptr = node->back_pointer; + if (!ptr) + break; + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + ptr = shortcut->back_pointer; + if (!ptr) + break; + } + BUG_ON(!assoc_array_ptr_is_node(ptr)); + node = assoc_array_ptr_to_node(ptr); + } + + edit->array->nr_leaves_on_tree += edit->adjust_count_by; + } + + call_rcu(&edit->rcu, assoc_array_rcu_cleanup); +} + +/** + * assoc_array_cancel_edit - Discard an edit script. + * @edit: The script to discard. + * + * Free an edit script and all the preallocated data it holds without making + * any changes to the associative array it was intended for. + * + * NOTE! In the case of an insertion script, this does _not_ release the leaf + * that was to be inserted. That is left to the caller. + */ +void assoc_array_cancel_edit(struct assoc_array_edit *edit) +{ + struct assoc_array_ptr *ptr; + int i; + + pr_devel("-->%s()\n", __func__); + + /* Clean up after an out of memory error */ + for (i = 0; i < ARRAY_SIZE(edit->new_meta); i++) { + ptr = edit->new_meta[i]; + if (ptr) { + if (assoc_array_ptr_is_node(ptr)) + kfree(assoc_array_ptr_to_node(ptr)); + else + kfree(assoc_array_ptr_to_shortcut(ptr)); + } + } + kfree(edit); +} + +/** + * assoc_array_gc - Garbage collect an associative array. + * @array: The array to clean. + * @ops: The operations to use. + * @iterator: A callback function to pass judgement on each object. + * @iterator_data: Private data for the callback function. + * + * Collect garbage from an associative array and pack down the internal tree to + * save memory. + * + * The iterator function is asked to pass judgement upon each object in the + * array. If it returns false, the object is discard and if it returns true, + * the object is kept. If it returns true, it must increment the object's + * usage count (or whatever it needs to do to retain it) before returning. + * + * This function returns 0 if successful or -ENOMEM if out of memory. In the + * latter case, the array is not changed. + * + * The caller should lock against other modifications and must continue to hold + * the lock until assoc_array_apply_edit() has been called. + * + * Accesses to the tree may take place concurrently with this function, + * provided they hold the RCU read lock. + */ +int assoc_array_gc(struct assoc_array *array, + const struct assoc_array_ops *ops, + bool (*iterator)(void *object, void *iterator_data), + void *iterator_data) +{ + struct assoc_array_shortcut *shortcut, *new_s; + struct assoc_array_node *node, *new_n; + struct assoc_array_edit *edit; + struct assoc_array_ptr *cursor, *ptr; + struct assoc_array_ptr *new_root, *new_parent, **new_ptr_pp; + unsigned long nr_leaves_on_tree; + int keylen, slot, nr_free, next_slot, i; + + pr_devel("-->%s()\n", __func__); + + if (!array->root) + return 0; + + edit = kzalloc(sizeof(struct assoc_array_edit), GFP_KERNEL); + if (!edit) + return -ENOMEM; + edit->array = array; + edit->ops = ops; + edit->ops_for_excised_subtree = ops; + edit->set[0].ptr = &array->root; + edit->excised_subtree = array->root; + + new_root = new_parent = NULL; + new_ptr_pp = &new_root; + cursor = array->root; + +descend: + /* If this point is a shortcut, then we need to duplicate it and + * advance the target cursor. + */ + if (assoc_array_ptr_is_shortcut(cursor)) { + shortcut = assoc_array_ptr_to_shortcut(cursor); + keylen = round_up(shortcut->skip_to_level, ASSOC_ARRAY_KEY_CHUNK_SIZE); + keylen >>= ASSOC_ARRAY_KEY_CHUNK_SHIFT; + new_s = kmalloc(sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long), GFP_KERNEL); + if (!new_s) + goto enomem; + pr_devel("dup shortcut %p -> %p\n", shortcut, new_s); + memcpy(new_s, shortcut, (sizeof(struct assoc_array_shortcut) + + keylen * sizeof(unsigned long))); + new_s->back_pointer = new_parent; + new_s->parent_slot = shortcut->parent_slot; + *new_ptr_pp = new_parent = assoc_array_shortcut_to_ptr(new_s); + new_ptr_pp = &new_s->next_node; + cursor = shortcut->next_node; + } + + /* Duplicate the node at this position */ + node = assoc_array_ptr_to_node(cursor); + new_n = kzalloc(sizeof(struct assoc_array_node), GFP_KERNEL); + if (!new_n) + goto enomem; + pr_devel("dup node %p -> %p\n", node, new_n); + new_n->back_pointer = new_parent; + new_n->parent_slot = node->parent_slot; + *new_ptr_pp = new_parent = assoc_array_node_to_ptr(new_n); + new_ptr_pp = NULL; + slot = 0; + +continue_node: + /* Filter across any leaves and gc any subtrees */ + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = node->slots[slot]; + if (!ptr) + continue; + + if (assoc_array_ptr_is_leaf(ptr)) { + if (iterator(assoc_array_ptr_to_leaf(ptr), + iterator_data)) + /* The iterator will have done any reference + * counting on the object for us. + */ + new_n->slots[slot] = ptr; + continue; + } + + new_ptr_pp = &new_n->slots[slot]; + cursor = ptr; + goto descend; + } + + pr_devel("-- compress node %p --\n", new_n); + + /* Count up the number of empty slots in this node and work out the + * subtree leaf count. + */ + new_n->nr_leaves_on_branch = 0; + nr_free = 0; + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = new_n->slots[slot]; + if (!ptr) + nr_free++; + else if (assoc_array_ptr_is_leaf(ptr)) + new_n->nr_leaves_on_branch++; + } + pr_devel("free=%d, leaves=%lu\n", nr_free, new_n->nr_leaves_on_branch); + + /* See what we can fold in */ + next_slot = 0; + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + struct assoc_array_shortcut *s; + struct assoc_array_node *child; + + ptr = new_n->slots[slot]; + if (!ptr || assoc_array_ptr_is_leaf(ptr)) + continue; + + s = NULL; + if (assoc_array_ptr_is_shortcut(ptr)) { + s = assoc_array_ptr_to_shortcut(ptr); + ptr = s->next_node; + } + + child = assoc_array_ptr_to_node(ptr); + new_n->nr_leaves_on_branch += child->nr_leaves_on_branch; + + if (child->nr_leaves_on_branch <= nr_free + 1) { + /* Fold the child node into this one */ + pr_devel("[%d] fold node %lu/%d [nx %d]\n", + slot, child->nr_leaves_on_branch, nr_free + 1, + next_slot); + + /* We would already have reaped an intervening shortcut + * on the way back up the tree. + */ + BUG_ON(s); + + new_n->slots[slot] = NULL; + nr_free++; + if (slot < next_slot) + next_slot = slot; + for (i = 0; i < ASSOC_ARRAY_FAN_OUT; i++) { + struct assoc_array_ptr *p = child->slots[i]; + if (!p) + continue; + BUG_ON(assoc_array_ptr_is_meta(p)); + while (new_n->slots[next_slot]) + next_slot++; + BUG_ON(next_slot >= ASSOC_ARRAY_FAN_OUT); + new_n->slots[next_slot++] = p; + nr_free--; + } + kfree(child); + } else { + pr_devel("[%d] retain node %lu/%d [nx %d]\n", + slot, child->nr_leaves_on_branch, nr_free + 1, + next_slot); + } + } + + pr_devel("after: %lu\n", new_n->nr_leaves_on_branch); + + nr_leaves_on_tree = new_n->nr_leaves_on_branch; + + /* Excise this node if it is singly occupied by a shortcut */ + if (nr_free == ASSOC_ARRAY_FAN_OUT - 1) { + for (slot = 0; slot < ASSOC_ARRAY_FAN_OUT; slot++) + if ((ptr = new_n->slots[slot])) + break; + + if (assoc_array_ptr_is_meta(ptr) && + assoc_array_ptr_is_shortcut(ptr)) { + pr_devel("excise node %p with 1 shortcut\n", new_n); + new_s = assoc_array_ptr_to_shortcut(ptr); + new_parent = new_n->back_pointer; + slot = new_n->parent_slot; + kfree(new_n); + if (!new_parent) { + new_s->back_pointer = NULL; + new_s->parent_slot = 0; + new_root = ptr; + goto gc_complete; + } + + if (assoc_array_ptr_is_shortcut(new_parent)) { + /* We can discard any preceding shortcut also */ + struct assoc_array_shortcut *s = + assoc_array_ptr_to_shortcut(new_parent); + + pr_devel("excise preceding shortcut\n"); + + new_parent = new_s->back_pointer = s->back_pointer; + slot = new_s->parent_slot = s->parent_slot; + kfree(s); + if (!new_parent) { + new_s->back_pointer = NULL; + new_s->parent_slot = 0; + new_root = ptr; + goto gc_complete; + } + } + + new_s->back_pointer = new_parent; + new_s->parent_slot = slot; + new_n = assoc_array_ptr_to_node(new_parent); + new_n->slots[slot] = ptr; + goto ascend_old_tree; + } + } + + /* Excise any shortcuts we might encounter that point to nodes that + * only contain leaves. + */ + ptr = new_n->back_pointer; + if (!ptr) + goto gc_complete; + + if (assoc_array_ptr_is_shortcut(ptr)) { + new_s = assoc_array_ptr_to_shortcut(ptr); + new_parent = new_s->back_pointer; + slot = new_s->parent_slot; + + if (new_n->nr_leaves_on_branch <= ASSOC_ARRAY_FAN_OUT) { + struct assoc_array_node *n; + + pr_devel("excise shortcut\n"); + new_n->back_pointer = new_parent; + new_n->parent_slot = slot; + kfree(new_s); + if (!new_parent) { + new_root = assoc_array_node_to_ptr(new_n); + goto gc_complete; + } + + n = assoc_array_ptr_to_node(new_parent); + n->slots[slot] = assoc_array_node_to_ptr(new_n); + } + } else { + new_parent = ptr; + } + new_n = assoc_array_ptr_to_node(new_parent); + +ascend_old_tree: + ptr = node->back_pointer; + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + slot = shortcut->parent_slot; + cursor = shortcut->back_pointer; + } else { + slot = node->parent_slot; + cursor = ptr; + } + BUG_ON(!ptr); + node = assoc_array_ptr_to_node(cursor); + slot++; + goto continue_node; + +gc_complete: + edit->set[0].to = new_root; + assoc_array_apply_edit(edit); + edit->array->nr_leaves_on_tree = nr_leaves_on_tree; + return 0; + +enomem: + pr_devel("enomem\n"); + assoc_array_destroy_subtree(new_root, edit->ops); + kfree(edit); + return -ENOMEM; +} -- cgit v1.2.3 From b2a4df200d570b2c33a57e1ebfa5896e4bc81b69 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 24 Sep 2013 10:35:18 +0100 Subject: KEYS: Expand the capacity of a keyring Expand the capacity of a keyring to be able to hold a lot more keys by using the previously added associative array implementation. Currently the maximum capacity is: (PAGE_SIZE - sizeof(header)) / sizeof(struct key *) which, on a 64-bit system, is a little more 500. However, since this is being used for the NFS uid mapper, we need more than that. The new implementation gives us effectively unlimited capacity. With some alterations, the keyutils testsuite runs successfully to completion after this patch is applied. The alterations are because (a) keyrings that are simply added to no longer appear ordered and (b) some of the errors have changed a bit. Signed-off-by: David Howells --- include/keys/keyring-type.h | 17 +- include/linux/key.h | 13 +- lib/assoc_array.c | 1 + security/keys/Kconfig | 1 + security/keys/gc.c | 33 +- security/keys/internal.h | 17 +- security/keys/key.c | 35 +- security/keys/keyring.c | 1436 ++++++++++++++++++++++--------------------- security/keys/request_key.c | 12 +- 9 files changed, 803 insertions(+), 762 deletions(-) (limited to 'lib') diff --git a/include/keys/keyring-type.h b/include/keys/keyring-type.h index cf49159b0e3a..fca5c62340a4 100644 --- a/include/keys/keyring-type.h +++ b/include/keys/keyring-type.h @@ -1,6 +1,6 @@ /* Keyring key type * - * Copyright (C) 2008 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2008, 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -13,19 +13,6 @@ #define _KEYS_KEYRING_TYPE_H #include -#include - -/* - * the keyring payload contains a list of the keys to which the keyring is - * subscribed - */ -struct keyring_list { - struct rcu_head rcu; /* RCU deletion hook */ - unsigned short maxkeys; /* max keys this list can hold */ - unsigned short nkeys; /* number of keys currently held */ - unsigned short delkey; /* key to be unlinked by RCU */ - struct key __rcu *keys[0]; -}; - +#include #endif /* _KEYS_KEYRING_TYPE_H */ diff --git a/include/linux/key.h b/include/linux/key.h index ef596c7af585..2417f789d29b 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -22,6 +22,7 @@ #include #include #include +#include #ifdef __KERNEL__ #include @@ -196,11 +197,13 @@ struct key { * whatever */ union { - unsigned long value; - void __rcu *rcudata; - void *data; - struct keyring_list __rcu *subscriptions; - } payload; + union { + unsigned long value; + void __rcu *rcudata; + void *data; + } payload; + struct assoc_array keys; + }; }; extern struct key *key_alloc(struct key_type *type, diff --git a/lib/assoc_array.c b/lib/assoc_array.c index a0952818f938..17edeaf19180 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -12,6 +12,7 @@ */ //#define DEBUG #include +#include #include /* diff --git a/security/keys/Kconfig b/security/keys/Kconfig index a90d6d300dbd..15e0dfe8c80f 100644 --- a/security/keys/Kconfig +++ b/security/keys/Kconfig @@ -4,6 +4,7 @@ config KEYS bool "Enable access key retention support" + select ASSOCIATIVE_ARRAY help This option provides support for retaining authentication tokens and access keys in the kernel. diff --git a/security/keys/gc.c b/security/keys/gc.c index d67c97bb1025..cce621c33dce 100644 --- a/security/keys/gc.c +++ b/security/keys/gc.c @@ -130,6 +130,13 @@ void key_gc_keytype(struct key_type *ktype) kleave(""); } +static int key_gc_keyring_func(const void *object, void *iterator_data) +{ + const struct key *key = object; + time_t *limit = iterator_data; + return key_is_dead(key, *limit); +} + /* * Garbage collect pointers from a keyring. * @@ -138,10 +145,9 @@ void key_gc_keytype(struct key_type *ktype) */ static void key_gc_keyring(struct key *keyring, time_t limit) { - struct keyring_list *klist; - int loop; + int result; - kenter("%x", key_serial(keyring)); + kenter("%x{%s}", keyring->serial, keyring->description ?: ""); if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | (1 << KEY_FLAG_REVOKED))) @@ -149,27 +155,17 @@ static void key_gc_keyring(struct key *keyring, time_t limit) /* scan the keyring looking for dead keys */ rcu_read_lock(); - klist = rcu_dereference(keyring->payload.subscriptions); - if (!klist) - goto unlock_dont_gc; - - loop = klist->nkeys; - smp_rmb(); - for (loop--; loop >= 0; loop--) { - struct key *key = rcu_dereference(klist->keys[loop]); - if (key_is_dead(key, limit)) - goto do_gc; - } - -unlock_dont_gc: + result = assoc_array_iterate(&keyring->keys, + key_gc_keyring_func, &limit); rcu_read_unlock(); + if (result == true) + goto do_gc; + dont_gc: kleave(" [no gc]"); return; do_gc: - rcu_read_unlock(); - keyring_gc(keyring, limit); kleave(" [gc]"); } @@ -392,7 +388,6 @@ found_unreferenced_key: */ found_keyring: spin_unlock(&key_serial_lock); - kdebug("scan keyring %d", key->serial); key_gc_keyring(key, limit); goto maybe_resched; diff --git a/security/keys/internal.h b/security/keys/internal.h index 73950bf8f875..581c6f688352 100644 --- a/security/keys/internal.h +++ b/security/keys/internal.h @@ -90,20 +90,23 @@ extern void key_type_put(struct key_type *ktype); extern int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key, - unsigned long *_prealloc); + struct assoc_array_edit **_edit); extern int __key_link_check_live_key(struct key *keyring, struct key *key); -extern void __key_link(struct key *keyring, struct key *key, - unsigned long *_prealloc); +extern void __key_link(struct key *key, struct assoc_array_edit **_edit); extern void __key_link_end(struct key *keyring, const struct keyring_index_key *index_key, - unsigned long prealloc); + struct assoc_array_edit *edit); -extern key_ref_t __keyring_search_one(key_ref_t keyring_ref, - const struct keyring_index_key *index_key); +extern key_ref_t find_key_to_update(key_ref_t keyring_ref, + const struct keyring_index_key *index_key); extern struct key *keyring_search_instkey(struct key *keyring, key_serial_t target_id); +extern int iterate_over_keyring(const struct key *keyring, + int (*func)(const struct key *key, void *data), + void *data); + typedef int (*key_match_func_t)(const struct key *, const void *); struct keyring_search_context { @@ -119,6 +122,8 @@ struct keyring_search_context { #define KEYRING_SEARCH_NO_CHECK_PERM 0x0010 /* Don't check permissions */ #define KEYRING_SEARCH_DETECT_TOO_DEEP 0x0020 /* Give an error on excessive depth */ + int (*iterator)(const void *object, void *iterator_data); + /* Internal stuff */ int skipped_ret; bool possessed; diff --git a/security/keys/key.c b/security/keys/key.c index 7d716b82a61e..a819b5c7d4ec 100644 --- a/security/keys/key.c +++ b/security/keys/key.c @@ -409,7 +409,7 @@ static int __key_instantiate_and_link(struct key *key, struct key_preparsed_payload *prep, struct key *keyring, struct key *authkey, - unsigned long *_prealloc) + struct assoc_array_edit **_edit) { int ret, awaken; @@ -436,7 +436,7 @@ static int __key_instantiate_and_link(struct key *key, /* and link it into the destination keyring */ if (keyring) - __key_link(keyring, key, _prealloc); + __key_link(key, _edit); /* disable the authorisation key */ if (authkey) @@ -476,7 +476,7 @@ int key_instantiate_and_link(struct key *key, struct key *authkey) { struct key_preparsed_payload prep; - unsigned long prealloc; + struct assoc_array_edit *edit; int ret; memset(&prep, 0, sizeof(prep)); @@ -490,16 +490,15 @@ int key_instantiate_and_link(struct key *key, } if (keyring) { - ret = __key_link_begin(keyring, &key->index_key, &prealloc); + ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret < 0) goto error_free_preparse; } - ret = __key_instantiate_and_link(key, &prep, keyring, authkey, - &prealloc); + ret = __key_instantiate_and_link(key, &prep, keyring, authkey, &edit); if (keyring) - __key_link_end(keyring, &key->index_key, prealloc); + __key_link_end(keyring, &key->index_key, edit); error_free_preparse: if (key->type->preparse) @@ -537,7 +536,7 @@ int key_reject_and_link(struct key *key, struct key *keyring, struct key *authkey) { - unsigned long prealloc; + struct assoc_array_edit *edit; struct timespec now; int ret, awaken, link_ret = 0; @@ -548,7 +547,7 @@ int key_reject_and_link(struct key *key, ret = -EBUSY; if (keyring) - link_ret = __key_link_begin(keyring, &key->index_key, &prealloc); + link_ret = __key_link_begin(keyring, &key->index_key, &edit); mutex_lock(&key_construction_mutex); @@ -570,7 +569,7 @@ int key_reject_and_link(struct key *key, /* and link it into the destination keyring */ if (keyring && link_ret == 0) - __key_link(keyring, key, &prealloc); + __key_link(key, &edit); /* disable the authorisation key */ if (authkey) @@ -580,7 +579,7 @@ int key_reject_and_link(struct key *key, mutex_unlock(&key_construction_mutex); if (keyring) - __key_link_end(keyring, &key->index_key, prealloc); + __key_link_end(keyring, &key->index_key, edit); /* wake up anyone waiting for a key to be constructed */ if (awaken) @@ -783,8 +782,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, .description = description, }; struct key_preparsed_payload prep; + struct assoc_array_edit *edit; const struct cred *cred = current_cred(); - unsigned long prealloc; struct key *keyring, *key = NULL; key_ref_t key_ref; int ret; @@ -828,7 +827,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } index_key.desc_len = strlen(index_key.description); - ret = __key_link_begin(keyring, &index_key, &prealloc); + ret = __key_link_begin(keyring, &index_key, &edit); if (ret < 0) { key_ref = ERR_PTR(ret); goto error_free_prep; @@ -847,8 +846,8 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, * update that instead if possible */ if (index_key.type->update) { - key_ref = __keyring_search_one(keyring_ref, &index_key); - if (!IS_ERR(key_ref)) + key_ref = find_key_to_update(keyring_ref, &index_key); + if (key_ref) goto found_matching_key; } @@ -874,7 +873,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, } /* instantiate it and link it into the target keyring */ - ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &prealloc); + ret = __key_instantiate_and_link(key, &prep, keyring, NULL, &edit); if (ret < 0) { key_put(key); key_ref = ERR_PTR(ret); @@ -884,7 +883,7 @@ key_ref_t key_create_or_update(key_ref_t keyring_ref, key_ref = make_key_ref(key, is_key_possessed(keyring_ref)); error_link_end: - __key_link_end(keyring, &index_key, prealloc); + __key_link_end(keyring, &index_key, edit); error_free_prep: if (index_key.type->preparse) index_key.type->free_preparse(&prep); @@ -897,7 +896,7 @@ error: /* we found a matching key, so we're going to try to update it * - we can drop the locks first as we have the key pinned */ - __key_link_end(keyring, &index_key, prealloc); + __key_link_end(keyring, &index_key, edit); key_ref = __key_update(key_ref, &prep); goto error_free_prep; diff --git a/security/keys/keyring.c b/security/keys/keyring.c index eeef1a073db4..f7cdea22214f 100644 --- a/security/keys/keyring.c +++ b/security/keys/keyring.c @@ -1,6 +1,6 @@ /* Keyring handling * - * Copyright (C) 2004-2005, 2008 Red Hat, Inc. All Rights Reserved. + * Copyright (C) 2004-2005, 2008, 2013 Red Hat, Inc. All Rights Reserved. * Written by David Howells (dhowells@redhat.com) * * This program is free software; you can redistribute it and/or @@ -17,25 +17,11 @@ #include #include #include +#include +#include #include #include "internal.h" -#define rcu_dereference_locked_keyring(keyring) \ - (rcu_dereference_protected( \ - (keyring)->payload.subscriptions, \ - rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem))) - -#define rcu_deref_link_locked(klist, index, keyring) \ - (rcu_dereference_protected( \ - (klist)->keys[index], \ - rwsem_is_locked((struct rw_semaphore *)&(keyring)->sem))) - -#define MAX_KEYRING_LINKS \ - min_t(size_t, USHRT_MAX - 1, \ - ((PAGE_SIZE - sizeof(struct keyring_list)) / sizeof(struct key *))) - -#define KEY_LINK_FIXQUOTA 1UL - /* * When plumbing the depths of the key tree, this sets a hard limit * set on how deep we're willing to go. @@ -47,6 +33,28 @@ */ #define KEYRING_NAME_HASH_SIZE (1 << 5) +/* + * We mark pointers we pass to the associative array with bit 1 set if + * they're keyrings and clear otherwise. + */ +#define KEYRING_PTR_SUBTYPE 0x2UL + +static inline bool keyring_ptr_is_keyring(const struct assoc_array_ptr *x) +{ + return (unsigned long)x & KEYRING_PTR_SUBTYPE; +} +static inline struct key *keyring_ptr_to_key(const struct assoc_array_ptr *x) +{ + void *object = assoc_array_ptr_to_leaf(x); + return (struct key *)((unsigned long)object & ~KEYRING_PTR_SUBTYPE); +} +static inline void *keyring_key_to_ptr(struct key *key) +{ + if (key->type == &key_type_keyring) + return (void *)((unsigned long)key | KEYRING_PTR_SUBTYPE); + return key; +} + static struct list_head keyring_name_hash[KEYRING_NAME_HASH_SIZE]; static DEFINE_RWLOCK(keyring_name_lock); @@ -67,7 +75,6 @@ static inline unsigned keyring_hash(const char *desc) */ static int keyring_instantiate(struct key *keyring, struct key_preparsed_payload *prep); -static int keyring_match(const struct key *keyring, const void *criterion); static void keyring_revoke(struct key *keyring); static void keyring_destroy(struct key *keyring); static void keyring_describe(const struct key *keyring, struct seq_file *m); @@ -76,9 +83,9 @@ static long keyring_read(const struct key *keyring, struct key_type key_type_keyring = { .name = "keyring", - .def_datalen = sizeof(struct keyring_list), + .def_datalen = 0, .instantiate = keyring_instantiate, - .match = keyring_match, + .match = user_match, .revoke = keyring_revoke, .destroy = keyring_destroy, .describe = keyring_describe, @@ -127,6 +134,7 @@ static int keyring_instantiate(struct key *keyring, ret = -EINVAL; if (prep->datalen == 0) { + assoc_array_init(&keyring->keys); /* make the keyring available by name if it has one */ keyring_publish_name(keyring); ret = 0; @@ -136,14 +144,225 @@ static int keyring_instantiate(struct key *keyring, } /* - * Match keyrings on their name + * Multiply 64-bits by 32-bits to 96-bits and fold back to 64-bit. Ideally we'd + * fold the carry back too, but that requires inline asm. + */ +static u64 mult_64x32_and_fold(u64 x, u32 y) +{ + u64 hi = (u64)(u32)(x >> 32) * y; + u64 lo = (u64)(u32)(x) * y; + return lo + ((u64)(u32)hi << 32) + (u32)(hi >> 32); +} + +/* + * Hash a key type and description. + */ +static unsigned long hash_key_type_and_desc(const struct keyring_index_key *index_key) +{ + const unsigned level_shift = ASSOC_ARRAY_LEVEL_STEP; + const unsigned long level_mask = ASSOC_ARRAY_LEVEL_STEP_MASK; + const char *description = index_key->description; + unsigned long hash, type; + u32 piece; + u64 acc; + int n, desc_len = index_key->desc_len; + + type = (unsigned long)index_key->type; + + acc = mult_64x32_and_fold(type, desc_len + 13); + acc = mult_64x32_and_fold(acc, 9207); + for (;;) { + n = desc_len; + if (n <= 0) + break; + if (n > 4) + n = 4; + piece = 0; + memcpy(&piece, description, n); + description += n; + desc_len -= n; + acc = mult_64x32_and_fold(acc, piece); + acc = mult_64x32_and_fold(acc, 9207); + } + + /* Fold the hash down to 32 bits if need be. */ + hash = acc; + if (ASSOC_ARRAY_KEY_CHUNK_SIZE == 32) + hash ^= acc >> 32; + + /* Squidge all the keyrings into a separate part of the tree to + * ordinary keys by making sure the lowest level segment in the hash is + * zero for keyrings and non-zero otherwise. + */ + if (index_key->type != &key_type_keyring && (hash & level_mask) == 0) + return hash | (hash >> (ASSOC_ARRAY_KEY_CHUNK_SIZE - level_shift)) | 1; + if (index_key->type == &key_type_keyring && (hash & level_mask) != 0) + return (hash + (hash << level_shift)) & ~level_mask; + return hash; +} + +/* + * Build the next index key chunk. + * + * On 32-bit systems the index key is laid out as: + * + * 0 4 5 9... + * hash desclen typeptr desc[] + * + * On 64-bit systems: + * + * 0 8 9 17... + * hash desclen typeptr desc[] + * + * We return it one word-sized chunk at a time. */ -static int keyring_match(const struct key *keyring, const void *description) +static unsigned long keyring_get_key_chunk(const void *data, int level) +{ + const struct keyring_index_key *index_key = data; + unsigned long chunk = 0; + long offset = 0; + int desc_len = index_key->desc_len, n = sizeof(chunk); + + level /= ASSOC_ARRAY_KEY_CHUNK_SIZE; + switch (level) { + case 0: + return hash_key_type_and_desc(index_key); + case 1: + return ((unsigned long)index_key->type << 8) | desc_len; + case 2: + if (desc_len == 0) + return (u8)((unsigned long)index_key->type >> + (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8)); + n--; + offset = 1; + default: + offset += sizeof(chunk) - 1; + offset += (level - 3) * sizeof(chunk); + if (offset >= desc_len) + return 0; + desc_len -= offset; + if (desc_len > n) + desc_len = n; + offset += desc_len; + do { + chunk <<= 8; + chunk |= ((u8*)index_key->description)[--offset]; + } while (--desc_len > 0); + + if (level == 2) { + chunk <<= 8; + chunk |= (u8)((unsigned long)index_key->type >> + (ASSOC_ARRAY_KEY_CHUNK_SIZE - 8)); + } + return chunk; + } +} + +static unsigned long keyring_get_object_key_chunk(const void *object, int level) +{ + const struct key *key = keyring_ptr_to_key(object); + return keyring_get_key_chunk(&key->index_key, level); +} + +static bool keyring_compare_object(const void *object, const void *data) { - return keyring->description && - strcmp(keyring->description, description) == 0; + const struct keyring_index_key *index_key = data; + const struct key *key = keyring_ptr_to_key(object); + + return key->index_key.type == index_key->type && + key->index_key.desc_len == index_key->desc_len && + memcmp(key->index_key.description, index_key->description, + index_key->desc_len) == 0; } +/* + * Compare the index keys of a pair of objects and determine the bit position + * at which they differ - if they differ. + */ +static int keyring_diff_objects(const void *_a, const void *_b) +{ + const struct key *key_a = keyring_ptr_to_key(_a); + const struct key *key_b = keyring_ptr_to_key(_b); + const struct keyring_index_key *a = &key_a->index_key; + const struct keyring_index_key *b = &key_b->index_key; + unsigned long seg_a, seg_b; + int level, i; + + level = 0; + seg_a = hash_key_type_and_desc(a); + seg_b = hash_key_type_and_desc(b); + if ((seg_a ^ seg_b) != 0) + goto differ; + + /* The number of bits contributed by the hash is controlled by a + * constant in the assoc_array headers. Everything else thereafter we + * can deal with as being machine word-size dependent. + */ + level += ASSOC_ARRAY_KEY_CHUNK_SIZE / 8; + seg_a = a->desc_len; + seg_b = b->desc_len; + if ((seg_a ^ seg_b) != 0) + goto differ; + + /* The next bit may not work on big endian */ + level++; + seg_a = (unsigned long)a->type; + seg_b = (unsigned long)b->type; + if ((seg_a ^ seg_b) != 0) + goto differ; + + level += sizeof(unsigned long); + if (a->desc_len == 0) + goto same; + + i = 0; + if (((unsigned long)a->description | (unsigned long)b->description) & + (sizeof(unsigned long) - 1)) { + do { + seg_a = *(unsigned long *)(a->description + i); + seg_b = *(unsigned long *)(b->description + i); + if ((seg_a ^ seg_b) != 0) + goto differ_plus_i; + i += sizeof(unsigned long); + } while (i < (a->desc_len & (sizeof(unsigned long) - 1))); + } + + for (; i < a->desc_len; i++) { + seg_a = *(unsigned char *)(a->description + i); + seg_b = *(unsigned char *)(b->description + i); + if ((seg_a ^ seg_b) != 0) + goto differ_plus_i; + } + +same: + return -1; + +differ_plus_i: + level += i; +differ: + i = level * 8 + __ffs(seg_a ^ seg_b); + return i; +} + +/* + * Free an object after stripping the keyring flag off of the pointer. + */ +static void keyring_free_object(void *object) +{ + key_put(keyring_ptr_to_key(object)); +} + +/* + * Operations for keyring management by the index-tree routines. + */ +static const struct assoc_array_ops keyring_assoc_array_ops = { + .get_key_chunk = keyring_get_key_chunk, + .get_object_key_chunk = keyring_get_object_key_chunk, + .compare_object = keyring_compare_object, + .diff_objects = keyring_diff_objects, + .free_object = keyring_free_object, +}; + /* * Clean up a keyring when it is destroyed. Unpublish its name if it had one * and dispose of its data. @@ -155,9 +374,6 @@ static int keyring_match(const struct key *keyring, const void *description) */ static void keyring_destroy(struct key *keyring) { - struct keyring_list *klist; - int loop; - if (keyring->description) { write_lock(&keyring_name_lock); @@ -168,12 +384,7 @@ static void keyring_destroy(struct key *keyring) write_unlock(&keyring_name_lock); } - klist = rcu_access_pointer(keyring->payload.subscriptions); - if (klist) { - for (loop = klist->nkeys - 1; loop >= 0; loop--) - key_put(rcu_access_pointer(klist->keys[loop])); - kfree(klist); - } + assoc_array_destroy(&keyring->keys, &keyring_assoc_array_ops); } /* @@ -181,76 +392,88 @@ static void keyring_destroy(struct key *keyring) */ static void keyring_describe(const struct key *keyring, struct seq_file *m) { - struct keyring_list *klist; - if (keyring->description) seq_puts(m, keyring->description); else seq_puts(m, "[anon]"); if (key_is_instantiated(keyring)) { - rcu_read_lock(); - klist = rcu_dereference(keyring->payload.subscriptions); - if (klist) - seq_printf(m, ": %u/%u", klist->nkeys, klist->maxkeys); + if (keyring->keys.nr_leaves_on_tree != 0) + seq_printf(m, ": %lu", keyring->keys.nr_leaves_on_tree); else seq_puts(m, ": empty"); - rcu_read_unlock(); } } +struct keyring_read_iterator_context { + size_t qty; + size_t count; + key_serial_t __user *buffer; +}; + +static int keyring_read_iterator(const void *object, void *data) +{ + struct keyring_read_iterator_context *ctx = data; + const struct key *key = keyring_ptr_to_key(object); + int ret; + + kenter("{%s,%d},,{%zu/%zu}", + key->type->name, key->serial, ctx->count, ctx->qty); + + if (ctx->count >= ctx->qty) + return 1; + + ret = put_user(key->serial, ctx->buffer); + if (ret < 0) + return ret; + ctx->buffer++; + ctx->count += sizeof(key->serial); + return 0; +} + /* * Read a list of key IDs from the keyring's contents in binary form * - * The keyring's semaphore is read-locked by the caller. + * The keyring's semaphore is read-locked by the caller. This prevents someone + * from modifying it under us - which could cause us to read key IDs multiple + * times. */ static long keyring_read(const struct key *keyring, char __user *buffer, size_t buflen) { - struct keyring_list *klist; - struct key *key; - size_t qty, tmp; - int loop, ret; + struct keyring_read_iterator_context ctx; + unsigned long nr_keys; + int ret; - ret = 0; - klist = rcu_dereference_locked_keyring(keyring); - if (klist) { - /* calculate how much data we could return */ - qty = klist->nkeys * sizeof(key_serial_t); - - if (buffer && buflen > 0) { - if (buflen > qty) - buflen = qty; - - /* copy the IDs of the subscribed keys into the - * buffer */ - ret = -EFAULT; - - for (loop = 0; loop < klist->nkeys; loop++) { - key = rcu_deref_link_locked(klist, loop, - keyring); - - tmp = sizeof(key_serial_t); - if (tmp > buflen) - tmp = buflen; - - if (copy_to_user(buffer, - &key->serial, - tmp) != 0) - goto error; - - buflen -= tmp; - if (buflen == 0) - break; - buffer += tmp; - } - } + kenter("{%d},,%zu", key_serial(keyring), buflen); + + if (buflen & (sizeof(key_serial_t) - 1)) + return -EINVAL; + + nr_keys = keyring->keys.nr_leaves_on_tree; + if (nr_keys == 0) + return 0; - ret = qty; + /* Calculate how much data we could return */ + ctx.qty = nr_keys * sizeof(key_serial_t); + + if (!buffer || !buflen) + return ctx.qty; + + if (buflen > ctx.qty) + ctx.qty = buflen; + + /* Copy the IDs of the subscribed keys into the buffer */ + ctx.buffer = (key_serial_t __user *)buffer; + ctx.count = 0; + ret = assoc_array_iterate(&keyring->keys, keyring_read_iterator, &ctx); + if (ret < 0) { + kleave(" = %d [iterate]", ret); + return ret; } -error: - return ret; + kleave(" = %zu [ok]", ctx.count); + return ctx.count; } /* @@ -277,219 +500,360 @@ struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid, } EXPORT_SYMBOL(keyring_alloc); -/** - * keyring_search_aux - Search a keyring tree for a key matching some criteria - * @keyring_ref: A pointer to the keyring with possession indicator. - * @ctx: The keyring search context. - * - * Search the supplied keyring tree for a key that matches the criteria given. - * The root keyring and any linked keyrings must grant Search permission to the - * caller to be searchable and keys can only be found if they too grant Search - * to the caller. The possession flag on the root keyring pointer controls use - * of the possessor bits in permissions checking of the entire tree. In - * addition, the LSM gets to forbid keyring searches and key matches. - * - * The search is performed as a breadth-then-depth search up to the prescribed - * limit (KEYRING_SEARCH_MAX_DEPTH). - * - * Keys are matched to the type provided and are then filtered by the match - * function, which is given the description to use in any way it sees fit. The - * match function may use any attributes of a key that it wishes to to - * determine the match. Normally the match function from the key type would be - * used. - * - * RCU is used to prevent the keyring key lists from disappearing without the - * need to take lots of locks. - * - * Returns a pointer to the found key and increments the key usage count if - * successful; -EAGAIN if no matching keys were found, or if expired or revoked - * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the - * specified keyring wasn't a keyring. - * - * In the case of a successful return, the possession attribute from - * @keyring_ref is propagated to the returned key reference. +/* + * Iteration function to consider each key found. */ -key_ref_t keyring_search_aux(key_ref_t keyring_ref, - struct keyring_search_context *ctx) +static int keyring_search_iterator(const void *object, void *iterator_data) { - struct { - /* Need a separate keylist pointer for RCU purposes */ - struct key *keyring; - struct keyring_list *keylist; - int kix; - } stack[KEYRING_SEARCH_MAX_DEPTH]; - - struct keyring_list *keylist; - unsigned long kflags; - struct key *keyring, *key; - key_ref_t key_ref; - long err; - int sp, nkeys, kix; + struct keyring_search_context *ctx = iterator_data; + const struct key *key = keyring_ptr_to_key(object); + unsigned long kflags = key->flags; - keyring = key_ref_to_ptr(keyring_ref); - ctx->possessed = is_key_possessed(keyring_ref); - key_check(keyring); + kenter("{%d}", key->serial); - /* top keyring must have search permission to begin the search */ - err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH); - if (err < 0) { - key_ref = ERR_PTR(err); - goto error; + /* ignore keys not of this type */ + if (key->type != ctx->index_key.type) { + kleave(" = 0 [!type]"); + return 0; } - key_ref = ERR_PTR(-ENOTDIR); - if (keyring->type != &key_type_keyring) - goto error; + /* skip invalidated, revoked and expired keys */ + if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { + if (kflags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) { + ctx->result = ERR_PTR(-EKEYREVOKED); + kleave(" = %d [invrev]", ctx->skipped_ret); + goto skipped; + } - rcu_read_lock(); + if (key->expiry && ctx->now.tv_sec >= key->expiry) { + ctx->result = ERR_PTR(-EKEYEXPIRED); + kleave(" = %d [expire]", ctx->skipped_ret); + goto skipped; + } + } - ctx->now = current_kernel_time(); - err = -EAGAIN; - sp = 0; - - /* firstly we should check to see if this top-level keyring is what we - * are looking for */ - key_ref = ERR_PTR(-EAGAIN); - kflags = keyring->flags; - if (keyring->type == ctx->index_key.type && - ctx->match(keyring, ctx->match_data)) { - key = keyring; - if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK) - goto found; + /* keys that don't match */ + if (!ctx->match(key, ctx->match_data)) { + kleave(" = 0 [!match]"); + return 0; + } - /* check it isn't negative and hasn't expired or been - * revoked */ - if (kflags & (1 << KEY_FLAG_REVOKED)) - goto error_2; - if (key->expiry && ctx->now.tv_sec >= key->expiry) - goto error_2; - key_ref = ERR_PTR(key->type_data.reject_error); - if (kflags & (1 << KEY_FLAG_NEGATIVE)) - goto error_2; - goto found; + /* key must have search permissions */ + if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && + key_task_permission(make_key_ref(key, ctx->possessed), + ctx->cred, KEY_SEARCH) < 0) { + ctx->result = ERR_PTR(-EACCES); + kleave(" = %d [!perm]", ctx->skipped_ret); + goto skipped; } - /* otherwise, the top keyring must not be revoked, expired, or - * negatively instantiated if we are to search it */ - key_ref = ERR_PTR(-EAGAIN); - if (kflags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED) | - (1 << KEY_FLAG_NEGATIVE)) || - (keyring->expiry && ctx->now.tv_sec >= keyring->expiry)) - goto error_2; - - /* start processing a new keyring */ -descend: - kflags = keyring->flags; - if (kflags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - goto not_this_keyring; + if (ctx->flags & KEYRING_SEARCH_DO_STATE_CHECK) { + /* we set a different error code if we pass a negative key */ + if (kflags & (1 << KEY_FLAG_NEGATIVE)) { + ctx->result = ERR_PTR(key->type_data.reject_error); + kleave(" = %d [neg]", ctx->skipped_ret); + goto skipped; + } + } - keylist = rcu_dereference(keyring->payload.subscriptions); - if (!keylist) - goto not_this_keyring; + /* Found */ + ctx->result = make_key_ref(key, ctx->possessed); + kleave(" = 1 [found]"); + return 1; - /* iterate through the keys in this keyring first */ - nkeys = keylist->nkeys; - smp_rmb(); - for (kix = 0; kix < nkeys; kix++) { - key = rcu_dereference(keylist->keys[kix]); - kflags = key->flags; +skipped: + return ctx->skipped_ret; +} - /* ignore keys not of this type */ - if (key->type != ctx->index_key.type) - continue; +/* + * Search inside a keyring for a key. We can search by walking to it + * directly based on its index-key or we can iterate over the entire + * tree looking for it, based on the match function. + */ +static int search_keyring(struct key *keyring, struct keyring_search_context *ctx) +{ + if ((ctx->flags & KEYRING_SEARCH_LOOKUP_TYPE) == + KEYRING_SEARCH_LOOKUP_DIRECT) { + const void *object; + + object = assoc_array_find(&keyring->keys, + &keyring_assoc_array_ops, + &ctx->index_key); + return object ? ctx->iterator(object, ctx) : 0; + } + return assoc_array_iterate(&keyring->keys, ctx->iterator, ctx); +} - /* skip invalidated, revoked and expired keys */ - if (!(ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK)) { - if (kflags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - continue; +/* + * Search a tree of keyrings that point to other keyrings up to the maximum + * depth. + */ +static bool search_nested_keyrings(struct key *keyring, + struct keyring_search_context *ctx) +{ + struct { + struct key *keyring; + struct assoc_array_node *node; + int slot; + } stack[KEYRING_SEARCH_MAX_DEPTH]; - if (key->expiry && ctx->now.tv_sec >= key->expiry) - continue; - } + struct assoc_array_shortcut *shortcut; + struct assoc_array_node *node; + struct assoc_array_ptr *ptr; + struct key *key; + int sp = 0, slot; - /* keys that don't match */ - if (!ctx->match(key, ctx->match_data)) - continue; + kenter("{%d},{%s,%s}", + keyring->serial, + ctx->index_key.type->name, + ctx->index_key.description); - /* key must have search permissions */ - if (key_task_permission(make_key_ref(key, ctx->possessed), - ctx->cred, KEY_SEARCH) < 0) - continue; + if (ctx->index_key.description) + ctx->index_key.desc_len = strlen(ctx->index_key.description); - if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK) + /* Check to see if this top-level keyring is what we are looking for + * and whether it is valid or not. + */ + if (ctx->flags & KEYRING_SEARCH_LOOKUP_ITERATE || + keyring_compare_object(keyring, &ctx->index_key)) { + ctx->skipped_ret = 2; + ctx->flags |= KEYRING_SEARCH_DO_STATE_CHECK; + switch (ctx->iterator(keyring_key_to_ptr(keyring), ctx)) { + case 1: goto found; - - /* we set a different error code if we pass a negative key */ - if (kflags & (1 << KEY_FLAG_NEGATIVE)) { - err = key->type_data.reject_error; - continue; + case 2: + return false; + default: + break; } + } + ctx->skipped_ret = 0; + if (ctx->flags & KEYRING_SEARCH_NO_STATE_CHECK) + ctx->flags &= ~KEYRING_SEARCH_DO_STATE_CHECK; + + /* Start processing a new keyring */ +descend_to_keyring: + kdebug("descend to %d", keyring->serial); + if (keyring->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) + goto not_this_keyring; + + /* Search through the keys in this keyring before its searching its + * subtrees. + */ + if (search_keyring(keyring, ctx)) goto found; - } - /* search through the keyrings nested in this one */ - kix = 0; -ascend: - nkeys = keylist->nkeys; - smp_rmb(); - for (; kix < nkeys; kix++) { - key = rcu_dereference(keylist->keys[kix]); - if (key->type != &key_type_keyring) - continue; + /* Then manually iterate through the keyrings nested in this one. + * + * Start from the root node of the index tree. Because of the way the + * hash function has been set up, keyrings cluster on the leftmost + * branch of the root node (root slot 0) or in the root node itself. + * Non-keyrings avoid the leftmost branch of the root entirely (root + * slots 1-15). + */ + ptr = ACCESS_ONCE(keyring->keys.root); + if (!ptr) + goto not_this_keyring; - /* recursively search nested keyrings - * - only search keyrings for which we have search permission + if (assoc_array_ptr_is_shortcut(ptr)) { + /* If the root is a shortcut, either the keyring only contains + * keyring pointers (everything clusters behind root slot 0) or + * doesn't contain any keyring pointers. */ - if (sp >= KEYRING_SEARCH_MAX_DEPTH) + shortcut = assoc_array_ptr_to_shortcut(ptr); + smp_read_barrier_depends(); + if ((shortcut->index_key[0] & ASSOC_ARRAY_FAN_MASK) != 0) + goto not_this_keyring; + + ptr = ACCESS_ONCE(shortcut->next_node); + node = assoc_array_ptr_to_node(ptr); + goto begin_node; + } + + node = assoc_array_ptr_to_node(ptr); + smp_read_barrier_depends(); + + ptr = node->slots[0]; + if (!assoc_array_ptr_is_meta(ptr)) + goto begin_node; + +descend_to_node: + /* Descend to a more distal node in this keyring's content tree and go + * through that. + */ + kdebug("descend"); + if (assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + smp_read_barrier_depends(); + ptr = ACCESS_ONCE(shortcut->next_node); + BUG_ON(!assoc_array_ptr_is_node(ptr)); + node = assoc_array_ptr_to_node(ptr); + } + +begin_node: + kdebug("begin_node"); + smp_read_barrier_depends(); + slot = 0; +ascend_to_node: + /* Go through the slots in a node */ + for (; slot < ASSOC_ARRAY_FAN_OUT; slot++) { + ptr = ACCESS_ONCE(node->slots[slot]); + + if (assoc_array_ptr_is_meta(ptr) && node->back_pointer) + goto descend_to_node; + + if (!keyring_ptr_is_keyring(ptr)) continue; - if (key_task_permission(make_key_ref(key, ctx->possessed), + key = keyring_ptr_to_key(ptr); + + if (sp >= KEYRING_SEARCH_MAX_DEPTH) { + if (ctx->flags & KEYRING_SEARCH_DETECT_TOO_DEEP) { + ctx->result = ERR_PTR(-ELOOP); + return false; + } + goto not_this_keyring; + } + + /* Search a nested keyring */ + if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM) && + key_task_permission(make_key_ref(key, ctx->possessed), ctx->cred, KEY_SEARCH) < 0) continue; /* stack the current position */ stack[sp].keyring = keyring; - stack[sp].keylist = keylist; - stack[sp].kix = kix; + stack[sp].node = node; + stack[sp].slot = slot; sp++; /* begin again with the new keyring */ keyring = key; - goto descend; + goto descend_to_keyring; + } + + /* We've dealt with all the slots in the current node, so now we need + * to ascend to the parent and continue processing there. + */ + ptr = ACCESS_ONCE(node->back_pointer); + slot = node->parent_slot; + + if (ptr && assoc_array_ptr_is_shortcut(ptr)) { + shortcut = assoc_array_ptr_to_shortcut(ptr); + smp_read_barrier_depends(); + ptr = ACCESS_ONCE(shortcut->back_pointer); + slot = shortcut->parent_slot; + } + if (!ptr) + goto not_this_keyring; + node = assoc_array_ptr_to_node(ptr); + smp_read_barrier_depends(); + slot++; + + /* If we've ascended to the root (zero backpointer), we must have just + * finished processing the leftmost branch rather than the root slots - + * so there can't be any more keyrings for us to find. + */ + if (node->back_pointer) { + kdebug("ascend %d", slot); + goto ascend_to_node; } - /* the keyring we're looking at was disqualified or didn't contain a - * matching key */ + /* The keyring we're looking at was disqualified or didn't contain a + * matching key. + */ not_this_keyring: - if (sp > 0) { - /* resume the processing of a keyring higher up in the tree */ - sp--; - keyring = stack[sp].keyring; - keylist = stack[sp].keylist; - kix = stack[sp].kix + 1; - goto ascend; + kdebug("not_this_keyring %d", sp); + if (sp <= 0) { + kleave(" = false"); + return false; } - key_ref = ERR_PTR(err); - goto error_2; + /* Resume the processing of a keyring higher up in the tree */ + sp--; + keyring = stack[sp].keyring; + node = stack[sp].node; + slot = stack[sp].slot + 1; + kdebug("ascend to %d [%d]", keyring->serial, slot); + goto ascend_to_node; - /* we found a viable match */ + /* We found a viable match */ found: - __key_get(key); - key->last_used_at = ctx->now.tv_sec; - keyring->last_used_at = ctx->now.tv_sec; - while (sp > 0) - stack[--sp].keyring->last_used_at = ctx->now.tv_sec; + key = key_ref_to_ptr(ctx->result); key_check(key); - key_ref = make_key_ref(key, ctx->possessed); -error_2: + if (!(ctx->flags & KEYRING_SEARCH_NO_UPDATE_TIME)) { + key->last_used_at = ctx->now.tv_sec; + keyring->last_used_at = ctx->now.tv_sec; + while (sp > 0) + stack[--sp].keyring->last_used_at = ctx->now.tv_sec; + } + kleave(" = true"); + return true; +} + +/** + * keyring_search_aux - Search a keyring tree for a key matching some criteria + * @keyring_ref: A pointer to the keyring with possession indicator. + * @ctx: The keyring search context. + * + * Search the supplied keyring tree for a key that matches the criteria given. + * The root keyring and any linked keyrings must grant Search permission to the + * caller to be searchable and keys can only be found if they too grant Search + * to the caller. The possession flag on the root keyring pointer controls use + * of the possessor bits in permissions checking of the entire tree. In + * addition, the LSM gets to forbid keyring searches and key matches. + * + * The search is performed as a breadth-then-depth search up to the prescribed + * limit (KEYRING_SEARCH_MAX_DEPTH). + * + * Keys are matched to the type provided and are then filtered by the match + * function, which is given the description to use in any way it sees fit. The + * match function may use any attributes of a key that it wishes to to + * determine the match. Normally the match function from the key type would be + * used. + * + * RCU can be used to prevent the keyring key lists from disappearing without + * the need to take lots of locks. + * + * Returns a pointer to the found key and increments the key usage count if + * successful; -EAGAIN if no matching keys were found, or if expired or revoked + * keys were found; -ENOKEY if only negative keys were found; -ENOTDIR if the + * specified keyring wasn't a keyring. + * + * In the case of a successful return, the possession attribute from + * @keyring_ref is propagated to the returned key reference. + */ +key_ref_t keyring_search_aux(key_ref_t keyring_ref, + struct keyring_search_context *ctx) +{ + struct key *keyring; + long err; + + ctx->iterator = keyring_search_iterator; + ctx->possessed = is_key_possessed(keyring_ref); + ctx->result = ERR_PTR(-EAGAIN); + + keyring = key_ref_to_ptr(keyring_ref); + key_check(keyring); + + if (keyring->type != &key_type_keyring) + return ERR_PTR(-ENOTDIR); + + if (!(ctx->flags & KEYRING_SEARCH_NO_CHECK_PERM)) { + err = key_task_permission(keyring_ref, ctx->cred, KEY_SEARCH); + if (err < 0) + return ERR_PTR(err); + } + + rcu_read_lock(); + ctx->now = current_kernel_time(); + if (search_nested_keyrings(keyring, ctx)) + __key_get(key_ref_to_ptr(ctx->result)); rcu_read_unlock(); -error: - return key_ref; + return ctx->result; } /** @@ -499,7 +863,7 @@ error: * @description: The name of the keyring we want to find. * * As keyring_search_aux() above, but using the current task's credentials and - * type's default matching function. + * type's default matching function and preferred search method. */ key_ref_t keyring_search(key_ref_t keyring, struct key_type *type, @@ -523,58 +887,49 @@ key_ref_t keyring_search(key_ref_t keyring, EXPORT_SYMBOL(keyring_search); /* - * Search the given keyring only (no recursion). + * Search the given keyring for a key that might be updated. * * The caller must guarantee that the keyring is a keyring and that the - * permission is granted to search the keyring as no check is made here. - * - * RCU is used to make it unnecessary to lock the keyring key list here. + * permission is granted to modify the keyring as no check is made here. The + * caller must also hold a lock on the keyring semaphore. * * Returns a pointer to the found key with usage count incremented if - * successful and returns -ENOKEY if not found. Revoked and invalidated keys - * are skipped over. + * successful and returns NULL if not found. Revoked and invalidated keys are + * skipped over. * * If successful, the possession indicator is propagated from the keyring ref * to the returned key reference. */ -key_ref_t __keyring_search_one(key_ref_t keyring_ref, - const struct keyring_index_key *index_key) +key_ref_t find_key_to_update(key_ref_t keyring_ref, + const struct keyring_index_key *index_key) { - struct keyring_list *klist; struct key *keyring, *key; - bool possessed; - int nkeys, loop; + const void *object; keyring = key_ref_to_ptr(keyring_ref); - possessed = is_key_possessed(keyring_ref); - rcu_read_lock(); + kenter("{%d},{%s,%s}", + keyring->serial, index_key->type->name, index_key->description); - klist = rcu_dereference(keyring->payload.subscriptions); - if (klist) { - nkeys = klist->nkeys; - smp_rmb(); - for (loop = 0; loop < nkeys ; loop++) { - key = rcu_dereference(klist->keys[loop]); - if (key->type == index_key->type && - (!key->type->match || - key->type->match(key, index_key->description)) && - !(key->flags & ((1 << KEY_FLAG_INVALIDATED) | - (1 << KEY_FLAG_REVOKED))) - ) - goto found; - } - } + object = assoc_array_find(&keyring->keys, &keyring_assoc_array_ops, + index_key); - rcu_read_unlock(); - return ERR_PTR(-ENOKEY); + if (object) + goto found; + + kleave(" = NULL"); + return NULL; found: + key = keyring_ptr_to_key(object); + if (key->flags & ((1 << KEY_FLAG_INVALIDATED) | + (1 << KEY_FLAG_REVOKED))) { + kleave(" = NULL [x]"); + return NULL; + } __key_get(key); - keyring->last_used_at = key->last_used_at = - current_kernel_time().tv_sec; - rcu_read_unlock(); - return make_key_ref(key, possessed); + kleave(" = {%d}", key->serial); + return make_key_ref(key, is_key_possessed(keyring_ref)); } /* @@ -637,6 +992,19 @@ out: return keyring; } +static int keyring_detect_cycle_iterator(const void *object, + void *iterator_data) +{ + struct keyring_search_context *ctx = iterator_data; + const struct key *key = keyring_ptr_to_key(object); + + kenter("{%d}", key->serial); + + BUG_ON(key != ctx->match_data); + ctx->result = ERR_PTR(-EDEADLK); + return 1; +} + /* * See if a cycle will will be created by inserting acyclic tree B in acyclic * tree A at the topmost level (ie: as a direct child of A). @@ -646,117 +1014,39 @@ out: */ static int keyring_detect_cycle(struct key *A, struct key *B) { - struct { - struct keyring_list *keylist; - int kix; - } stack[KEYRING_SEARCH_MAX_DEPTH]; - - struct keyring_list *keylist; - struct key *subtree, *key; - int sp, nkeys, kix, ret; + struct keyring_search_context ctx = { + .index_key = A->index_key, + .match_data = A, + .iterator = keyring_detect_cycle_iterator, + .flags = (KEYRING_SEARCH_LOOKUP_DIRECT | + KEYRING_SEARCH_NO_STATE_CHECK | + KEYRING_SEARCH_NO_UPDATE_TIME | + KEYRING_SEARCH_NO_CHECK_PERM | + KEYRING_SEARCH_DETECT_TOO_DEEP), + }; rcu_read_lock(); - - ret = -EDEADLK; - if (A == B) - goto cycle_detected; - - subtree = B; - sp = 0; - - /* start processing a new keyring */ -descend: - if (test_bit(KEY_FLAG_REVOKED, &subtree->flags)) - goto not_this_keyring; - - keylist = rcu_dereference(subtree->payload.subscriptions); - if (!keylist) - goto not_this_keyring; - kix = 0; - -ascend: - /* iterate through the remaining keys in this keyring */ - nkeys = keylist->nkeys; - smp_rmb(); - for (; kix < nkeys; kix++) { - key = rcu_dereference(keylist->keys[kix]); - - if (key == A) - goto cycle_detected; - - /* recursively check nested keyrings */ - if (key->type == &key_type_keyring) { - if (sp >= KEYRING_SEARCH_MAX_DEPTH) - goto too_deep; - - /* stack the current position */ - stack[sp].keylist = keylist; - stack[sp].kix = kix; - sp++; - - /* begin again with the new keyring */ - subtree = key; - goto descend; - } - } - - /* the keyring we're looking at was disqualified or didn't contain a - * matching key */ -not_this_keyring: - if (sp > 0) { - /* resume the checking of a keyring higher up in the tree */ - sp--; - keylist = stack[sp].keylist; - kix = stack[sp].kix + 1; - goto ascend; - } - - ret = 0; /* no cycles detected */ - -error: + search_nested_keyrings(B, &ctx); rcu_read_unlock(); - return ret; - -too_deep: - ret = -ELOOP; - goto error; - -cycle_detected: - ret = -EDEADLK; - goto error; -} - -/* - * Dispose of a keyring list after the RCU grace period, freeing the unlinked - * key - */ -static void keyring_unlink_rcu_disposal(struct rcu_head *rcu) -{ - struct keyring_list *klist = - container_of(rcu, struct keyring_list, rcu); - - if (klist->delkey != USHRT_MAX) - key_put(rcu_access_pointer(klist->keys[klist->delkey])); - kfree(klist); + return PTR_ERR(ctx.result) == -EAGAIN ? 0 : PTR_ERR(ctx.result); } /* * Preallocate memory so that a key can be linked into to a keyring. */ -int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_key, - unsigned long *_prealloc) +int __key_link_begin(struct key *keyring, + const struct keyring_index_key *index_key, + struct assoc_array_edit **_edit) __acquires(&keyring->sem) __acquires(&keyring_serialise_link_sem) { - struct keyring_list *klist, *nklist; - unsigned long prealloc; - unsigned max; - time_t lowest_lru; - size_t size; - int loop, lru, ret; + struct assoc_array_edit *edit; + int ret; kenter("%d,%s,%s,", - key_serial(keyring), index_key->type->name, index_key->description); + keyring->serial, index_key->type->name, index_key->description); + + BUG_ON(index_key->desc_len == 0); if (keyring->type != &key_type_keyring) return -ENOTDIR; @@ -772,88 +1062,25 @@ int __key_link_begin(struct key *keyring, const struct keyring_index_key *index_ if (index_key->type == &key_type_keyring) down_write(&keyring_serialise_link_sem); - klist = rcu_dereference_locked_keyring(keyring); - - /* see if there's a matching key we can displace */ - lru = -1; - if (klist && klist->nkeys > 0) { - lowest_lru = TIME_T_MAX; - for (loop = klist->nkeys - 1; loop >= 0; loop--) { - struct key *key = rcu_deref_link_locked(klist, loop, - keyring); - if (key->type == index_key->type && - strcmp(key->description, index_key->description) == 0) { - /* Found a match - we'll replace the link with - * one to the new key. We record the slot - * position. - */ - klist->delkey = loop; - prealloc = 0; - goto done; - } - if (key->last_used_at < lowest_lru) { - lowest_lru = key->last_used_at; - lru = loop; - } - } - } - - /* If the keyring is full then do an LRU discard */ - if (klist && - klist->nkeys == klist->maxkeys && - klist->maxkeys >= MAX_KEYRING_LINKS) { - kdebug("LRU discard %d\n", lru); - klist->delkey = lru; - prealloc = 0; - goto done; - } - /* check that we aren't going to overrun the user's quota */ ret = key_payload_reserve(keyring, keyring->datalen + KEYQUOTA_LINK_BYTES); if (ret < 0) goto error_sem; - if (klist && klist->nkeys < klist->maxkeys) { - /* there's sufficient slack space to append directly */ - klist->delkey = klist->nkeys; - prealloc = KEY_LINK_FIXQUOTA; - } else { - /* grow the key list */ - max = 4; - if (klist) { - max += klist->maxkeys; - if (max > MAX_KEYRING_LINKS) - max = MAX_KEYRING_LINKS; - BUG_ON(max <= klist->maxkeys); - } - - size = sizeof(*klist) + sizeof(struct key *) * max; - - ret = -ENOMEM; - nklist = kmalloc(size, GFP_KERNEL); - if (!nklist) - goto error_quota; - - nklist->maxkeys = max; - if (klist) { - memcpy(nklist->keys, klist->keys, - sizeof(struct key *) * klist->nkeys); - nklist->delkey = klist->nkeys; - nklist->nkeys = klist->nkeys + 1; - klist->delkey = USHRT_MAX; - } else { - nklist->nkeys = 1; - nklist->delkey = 0; - } - - /* add the key into the new space */ - RCU_INIT_POINTER(nklist->keys[nklist->delkey], NULL); - prealloc = (unsigned long)nklist | KEY_LINK_FIXQUOTA; + /* Create an edit script that will insert/replace the key in the + * keyring tree. + */ + edit = assoc_array_insert(&keyring->keys, + &keyring_assoc_array_ops, + index_key, + NULL); + if (IS_ERR(edit)) { + ret = PTR_ERR(edit); + goto error_quota; } -done: - *_prealloc = prealloc; + *_edit = edit; kleave(" = 0"); return 0; @@ -893,60 +1120,12 @@ int __key_link_check_live_key(struct key *keyring, struct key *key) * holds at most one link to any given key of a particular type+description * combination. */ -void __key_link(struct key *keyring, struct key *key, - unsigned long *_prealloc) +void __key_link(struct key *key, struct assoc_array_edit **_edit) { - struct keyring_list *klist, *nklist; - struct key *discard; - - nklist = (struct keyring_list *)(*_prealloc & ~KEY_LINK_FIXQUOTA); - *_prealloc = 0; - - kenter("%d,%d,%p", keyring->serial, key->serial, nklist); - - klist = rcu_dereference_locked_keyring(keyring); - __key_get(key); - keyring->last_used_at = key->last_used_at = - current_kernel_time().tv_sec; - - /* there's a matching key we can displace or an empty slot in a newly - * allocated list we can fill */ - if (nklist) { - kdebug("reissue %hu/%hu/%hu", - nklist->delkey, nklist->nkeys, nklist->maxkeys); - - RCU_INIT_POINTER(nklist->keys[nklist->delkey], key); - - rcu_assign_pointer(keyring->payload.subscriptions, nklist); - - /* dispose of the old keyring list and, if there was one, the - * displaced key */ - if (klist) { - kdebug("dispose %hu/%hu/%hu", - klist->delkey, klist->nkeys, klist->maxkeys); - call_rcu(&klist->rcu, keyring_unlink_rcu_disposal); - } - } else if (klist->delkey < klist->nkeys) { - kdebug("replace %hu/%hu/%hu", - klist->delkey, klist->nkeys, klist->maxkeys); - - discard = rcu_dereference_protected( - klist->keys[klist->delkey], - rwsem_is_locked(&keyring->sem)); - rcu_assign_pointer(klist->keys[klist->delkey], key); - /* The garbage collector will take care of RCU - * synchronisation */ - key_put(discard); - } else { - /* there's sufficient slack space to append directly */ - kdebug("append %hu/%hu/%hu", - klist->delkey, klist->nkeys, klist->maxkeys); - - RCU_INIT_POINTER(klist->keys[klist->delkey], key); - smp_wmb(); - klist->nkeys++; - } + assoc_array_insert_set_object(*_edit, keyring_key_to_ptr(key)); + assoc_array_apply_edit(*_edit); + *_edit = NULL; } /* @@ -956,23 +1135,20 @@ void __key_link(struct key *keyring, struct key *key, */ void __key_link_end(struct key *keyring, const struct keyring_index_key *index_key, - unsigned long prealloc) + struct assoc_array_edit *edit) __releases(&keyring->sem) __releases(&keyring_serialise_link_sem) { BUG_ON(index_key->type == NULL); - BUG_ON(index_key->type->name == NULL); - kenter("%d,%s,%lx", keyring->serial, index_key->type->name, prealloc); + kenter("%d,%s,", keyring->serial, index_key->type->name); if (index_key->type == &key_type_keyring) up_write(&keyring_serialise_link_sem); - if (prealloc) { - if (prealloc & KEY_LINK_FIXQUOTA) - key_payload_reserve(keyring, - keyring->datalen - - KEYQUOTA_LINK_BYTES); - kfree((struct keyring_list *)(prealloc & ~KEY_LINK_FIXQUOTA)); + if (edit) { + key_payload_reserve(keyring, + keyring->datalen - KEYQUOTA_LINK_BYTES); + assoc_array_cancel_edit(edit); } up_write(&keyring->sem); } @@ -999,20 +1175,24 @@ void __key_link_end(struct key *keyring, */ int key_link(struct key *keyring, struct key *key) { - unsigned long prealloc; + struct assoc_array_edit *edit; int ret; + kenter("{%d,%d}", keyring->serial, atomic_read(&keyring->usage)); + key_check(keyring); key_check(key); - ret = __key_link_begin(keyring, &key->index_key, &prealloc); + ret = __key_link_begin(keyring, &key->index_key, &edit); if (ret == 0) { + kdebug("begun {%d,%d}", keyring->serial, atomic_read(&keyring->usage)); ret = __key_link_check_live_key(keyring, key); if (ret == 0) - __key_link(keyring, key, &prealloc); - __key_link_end(keyring, &key->index_key, prealloc); + __key_link(key, &edit); + __key_link_end(keyring, &key->index_key, edit); } + kleave(" = %d {%d,%d}", ret, keyring->serial, atomic_read(&keyring->usage)); return ret; } EXPORT_SYMBOL(key_link); @@ -1036,90 +1216,36 @@ EXPORT_SYMBOL(key_link); */ int key_unlink(struct key *keyring, struct key *key) { - struct keyring_list *klist, *nklist; - int loop, ret; + struct assoc_array_edit *edit; + int ret; key_check(keyring); key_check(key); - ret = -ENOTDIR; if (keyring->type != &key_type_keyring) - goto error; + return -ENOTDIR; down_write(&keyring->sem); - klist = rcu_dereference_locked_keyring(keyring); - if (klist) { - /* search the keyring for the key */ - for (loop = 0; loop < klist->nkeys; loop++) - if (rcu_access_pointer(klist->keys[loop]) == key) - goto key_is_present; + edit = assoc_array_delete(&keyring->keys, &keyring_assoc_array_ops, + &key->index_key); + if (IS_ERR(edit)) { + ret = PTR_ERR(edit); + goto error; } - - up_write(&keyring->sem); ret = -ENOENT; - goto error; - -key_is_present: - /* we need to copy the key list for RCU purposes */ - nklist = kmalloc(sizeof(*klist) + - sizeof(struct key *) * klist->maxkeys, - GFP_KERNEL); - if (!nklist) - goto nomem; - nklist->maxkeys = klist->maxkeys; - nklist->nkeys = klist->nkeys - 1; - - if (loop > 0) - memcpy(&nklist->keys[0], - &klist->keys[0], - loop * sizeof(struct key *)); - - if (loop < nklist->nkeys) - memcpy(&nklist->keys[loop], - &klist->keys[loop + 1], - (nklist->nkeys - loop) * sizeof(struct key *)); - - /* adjust the user's quota */ - key_payload_reserve(keyring, - keyring->datalen - KEYQUOTA_LINK_BYTES); - - rcu_assign_pointer(keyring->payload.subscriptions, nklist); - - up_write(&keyring->sem); - - /* schedule for later cleanup */ - klist->delkey = loop; - call_rcu(&klist->rcu, keyring_unlink_rcu_disposal); + if (edit == NULL) + goto error; + assoc_array_apply_edit(edit); ret = 0; error: - return ret; -nomem: - ret = -ENOMEM; up_write(&keyring->sem); - goto error; + return ret; } EXPORT_SYMBOL(key_unlink); -/* - * Dispose of a keyring list after the RCU grace period, releasing the keys it - * links to. - */ -static void keyring_clear_rcu_disposal(struct rcu_head *rcu) -{ - struct keyring_list *klist; - int loop; - - klist = container_of(rcu, struct keyring_list, rcu); - - for (loop = klist->nkeys - 1; loop >= 0; loop--) - key_put(rcu_access_pointer(klist->keys[loop])); - - kfree(klist); -} - /** * keyring_clear - Clear a keyring * @keyring: The keyring to clear. @@ -1130,33 +1256,25 @@ static void keyring_clear_rcu_disposal(struct rcu_head *rcu) */ int keyring_clear(struct key *keyring) { - struct keyring_list *klist; + struct assoc_array_edit *edit; int ret; - ret = -ENOTDIR; - if (keyring->type == &key_type_keyring) { - /* detach the pointer block with the locks held */ - down_write(&keyring->sem); - - klist = rcu_dereference_locked_keyring(keyring); - if (klist) { - /* adjust the quota */ - key_payload_reserve(keyring, - sizeof(struct keyring_list)); - - rcu_assign_pointer(keyring->payload.subscriptions, - NULL); - } - - up_write(&keyring->sem); + if (keyring->type != &key_type_keyring) + return -ENOTDIR; - /* free the keys after the locks have been dropped */ - if (klist) - call_rcu(&klist->rcu, keyring_clear_rcu_disposal); + down_write(&keyring->sem); + edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); + if (IS_ERR(edit)) { + ret = PTR_ERR(edit); + } else { + if (edit) + assoc_array_apply_edit(edit); + key_payload_reserve(keyring, 0); ret = 0; } + up_write(&keyring->sem); return ret; } EXPORT_SYMBOL(keyring_clear); @@ -1168,17 +1286,25 @@ EXPORT_SYMBOL(keyring_clear); */ static void keyring_revoke(struct key *keyring) { - struct keyring_list *klist; + struct assoc_array_edit *edit; - klist = rcu_dereference_locked_keyring(keyring); + edit = assoc_array_clear(&keyring->keys, &keyring_assoc_array_ops); + if (!IS_ERR(edit)) { + if (edit) + assoc_array_apply_edit(edit); + key_payload_reserve(keyring, 0); + } +} - /* adjust the quota */ - key_payload_reserve(keyring, 0); +static bool gc_iterator(void *object, void *iterator_data) +{ + struct key *key = keyring_ptr_to_key(object); + time_t *limit = iterator_data; - if (klist) { - rcu_assign_pointer(keyring->payload.subscriptions, NULL); - call_rcu(&klist->rcu, keyring_clear_rcu_disposal); - } + if (key_is_dead(key, *limit)) + return false; + key_get(key); + return true; } /* @@ -1191,88 +1317,12 @@ static void keyring_revoke(struct key *keyring) */ void keyring_gc(struct key *keyring, time_t limit) { - struct keyring_list *klist, *new; - struct key *key; - int loop, keep, max; - kenter("{%x,%s}", key_serial(keyring), keyring->description); down_write(&keyring->sem); - - klist = rcu_dereference_locked_keyring(keyring); - if (!klist) - goto no_klist; - - /* work out how many subscriptions we're keeping */ - keep = 0; - for (loop = klist->nkeys - 1; loop >= 0; loop--) - if (!key_is_dead(rcu_deref_link_locked(klist, loop, keyring), - limit)) - keep++; - - if (keep == klist->nkeys) - goto just_return; - - /* allocate a new keyring payload */ - max = roundup(keep, 4); - new = kmalloc(sizeof(struct keyring_list) + max * sizeof(struct key *), - GFP_KERNEL); - if (!new) - goto nomem; - new->maxkeys = max; - new->nkeys = 0; - new->delkey = 0; - - /* install the live keys - * - must take care as expired keys may be updated back to life - */ - keep = 0; - for (loop = klist->nkeys - 1; loop >= 0; loop--) { - key = rcu_deref_link_locked(klist, loop, keyring); - if (!key_is_dead(key, limit)) { - if (keep >= max) - goto discard_new; - RCU_INIT_POINTER(new->keys[keep++], key_get(key)); - } - } - new->nkeys = keep; - - /* adjust the quota */ - key_payload_reserve(keyring, - sizeof(struct keyring_list) + - KEYQUOTA_LINK_BYTES * keep); - - if (keep == 0) { - rcu_assign_pointer(keyring->payload.subscriptions, NULL); - kfree(new); - } else { - rcu_assign_pointer(keyring->payload.subscriptions, new); - } - - up_write(&keyring->sem); - - call_rcu(&klist->rcu, keyring_clear_rcu_disposal); - kleave(" [yes]"); - return; - -discard_new: - new->nkeys = keep; - keyring_clear_rcu_disposal(&new->rcu); + assoc_array_gc(&keyring->keys, &keyring_assoc_array_ops, + gc_iterator, &limit); up_write(&keyring->sem); - kleave(" [discard]"); - return; -just_return: - up_write(&keyring->sem); - kleave(" [no dead]"); - return; - -no_klist: - up_write(&keyring->sem); - kleave(" [no_klist]"); - return; - -nomem: - up_write(&keyring->sem); - kleave(" [oom]"); + kleave(""); } diff --git a/security/keys/request_key.c b/security/keys/request_key.c index ab75df4745af..df94827103d0 100644 --- a/security/keys/request_key.c +++ b/security/keys/request_key.c @@ -351,7 +351,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx, struct key_user *user, struct key **_key) { - unsigned long prealloc; + struct assoc_array_edit *edit; struct key *key; key_perm_t perm; key_ref_t key_ref; @@ -380,7 +380,7 @@ static int construct_alloc_key(struct keyring_search_context *ctx, set_bit(KEY_FLAG_USER_CONSTRUCT, &key->flags); if (dest_keyring) { - ret = __key_link_begin(dest_keyring, &ctx->index_key, &prealloc); + ret = __key_link_begin(dest_keyring, &ctx->index_key, &edit); if (ret < 0) goto link_prealloc_failed; } @@ -395,11 +395,11 @@ static int construct_alloc_key(struct keyring_search_context *ctx, goto key_already_present; if (dest_keyring) - __key_link(dest_keyring, key, &prealloc); + __key_link(key, &edit); mutex_unlock(&key_construction_mutex); if (dest_keyring) - __key_link_end(dest_keyring, &ctx->index_key, prealloc); + __key_link_end(dest_keyring, &ctx->index_key, edit); mutex_unlock(&user->cons_lock); *_key = key; kleave(" = 0 [%d]", key_serial(key)); @@ -414,8 +414,8 @@ key_already_present: if (dest_keyring) { ret = __key_link_check_live_key(dest_keyring, key); if (ret == 0) - __key_link(dest_keyring, key, &prealloc); - __key_link_end(dest_keyring, &ctx->index_key, prealloc); + __key_link(key, &edit); + __key_link_end(dest_keyring, &ctx->index_key, edit); if (ret < 0) goto link_check_failed; } -- cgit v1.2.3 From 4ff158229770d245ce0a961a594adfc0e86d1cc5 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Tue, 17 Sep 2013 15:14:52 +0400 Subject: MPILIB: add module description and license This patch fixes lack of license, otherwise mpi.ko taints kernel. Signed-off-by: Konstantin Khlebnikov Signed-off-by: David Howells --- lib/mpi/mpiutil.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c index 657979f71bef..bf076d281d40 100644 --- a/lib/mpi/mpiutil.c +++ b/lib/mpi/mpiutil.c @@ -121,3 +121,6 @@ void mpi_free(MPI a) kfree(a); } EXPORT_SYMBOL_GPL(mpi_free); + +MODULE_DESCRIPTION("Multiprecision maths library"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3