summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorMatthew Wilcox (Oracle) <willy@infradead.org>2019-05-14 16:05:45 -0400
committerMatthew Wilcox (Oracle) <willy@infradead.org>2019-06-02 23:00:24 -0400
commit5c089fd0c73411f2170ab795c9ffc16718c7d007 (patch)
tree896b8028b37e5fe8705f113a7a628c3e1da85de4
parent7b785645e8f13e17cbce492708cf6e7039d32e46 (diff)
downloadlinux-5c089fd0c73411f2170ab795c9ffc16718c7d007.tar.bz2
idr: Fix idr_get_next race with idr_remove
If the entry is deleted from the IDR between the call to radix_tree_iter_find() and rcu_dereference_raw(), idr_get_next() will return NULL, which will end the iteration prematurely. We should instead continue to the next entry in the IDR. This only happens if the iteration is protected by the RCU lock. Most IDR users use a spinlock or semaphore to exclude simultaneous modifications. It was noticed once the PID allocator was converted to use the IDR, as it uses the RCU lock, but there may be other users elsewhere in the kernel. We can't use the normal pattern of calling radix_tree_deref_retry() (which catches both a retry entry in a leaf node and a node entry in the root) as the IDR supports storing entries which are unaligned, which will trigger an infinite loop if they are encountered. Instead, we have to explicitly check whether the entry is a retry entry. Fixes: 0a835c4f090a ("Reimplement IDR and IDA using the radix tree") Reported-by: Brendan Gregg <bgregg@netflix.com> Tested-by: Brendan Gregg <bgregg@netflix.com> Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
-rw-r--r--lib/idr.c14
-rw-r--r--tools/testing/radix-tree/idr-test.c46
2 files changed, 58 insertions, 2 deletions
diff --git a/lib/idr.c b/lib/idr.c
index c34e256d2f01..66a374892482 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -228,11 +228,21 @@ void *idr_get_next(struct idr *idr, int *nextid)
{
struct radix_tree_iter iter;
void __rcu **slot;
+ void *entry = NULL;
unsigned long base = idr->idr_base;
unsigned long id = *nextid;
id = (id < base) ? 0 : id - base;
- slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
+ radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, id) {
+ entry = rcu_dereference_raw(*slot);
+ if (!entry)
+ continue;
+ if (!xa_is_internal(entry))
+ break;
+ if (slot != &idr->idr_rt.xa_head && !xa_is_retry(entry))
+ break;
+ slot = radix_tree_iter_retry(&iter);
+ }
if (!slot)
return NULL;
id = iter.index + base;
@@ -241,7 +251,7 @@ void *idr_get_next(struct idr *idr, int *nextid)
return NULL;
*nextid = id;
- return rcu_dereference_raw(*slot);
+ return entry;
}
EXPORT_SYMBOL(idr_get_next);
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 1b63bdb7688f..fe33be4c2475 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -287,6 +287,51 @@ static void idr_align_test(struct idr *idr)
}
}
+DEFINE_IDR(find_idr);
+
+static void *idr_throbber(void *arg)
+{
+ time_t start = time(NULL);
+ int id = *(int *)arg;
+
+ rcu_register_thread();
+ do {
+ idr_alloc(&find_idr, xa_mk_value(id), id, id + 1, GFP_KERNEL);
+ idr_remove(&find_idr, id);
+ } while (time(NULL) < start + 10);
+ rcu_unregister_thread();
+
+ return NULL;
+}
+
+void idr_find_test_1(int anchor_id, int throbber_id)
+{
+ pthread_t throbber;
+ time_t start = time(NULL);
+
+ pthread_create(&throbber, NULL, idr_throbber, &throbber_id);
+
+ BUG_ON(idr_alloc(&find_idr, xa_mk_value(anchor_id), anchor_id,
+ anchor_id + 1, GFP_KERNEL) != anchor_id);
+
+ do {
+ int id = 0;
+ void *entry = idr_get_next(&find_idr, &id);
+ BUG_ON(entry != xa_mk_value(id));
+ } while (time(NULL) < start + 11);
+
+ pthread_join(throbber, NULL);
+
+ idr_remove(&find_idr, anchor_id);
+ BUG_ON(!idr_is_empty(&find_idr));
+}
+
+void idr_find_test(void)
+{
+ idr_find_test_1(100000, 0);
+ idr_find_test_1(0, 100000);
+}
+
void idr_checks(void)
{
unsigned long i;
@@ -368,6 +413,7 @@ void idr_checks(void)
idr_u32_test(1);
idr_u32_test(0);
idr_align_test(&idr);
+ idr_find_test();
}
#define module_init(x)