summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2016-06-15 23:42:58 -0700
committerDavid S. Miller <davem@davemloft.net>2016-06-15 23:42:58 -0700
commitf0362eab22f20e7060870321c837e1e5f68c45fb (patch)
treeb4f7d122f21e841f0057c624e064f8ca30622e48
parentb478af0cd7957faca83779fe6832abae163f7159 (diff)
parent3b1efb196eee45b2f0c4994e0c43edb5e367f620 (diff)
downloadlinux-f0362eab22f20e7060870321c837e1e5f68c45fb.tar.bz2
Merge branch 'bpf-fd-array-release'
Daniel Borkmann says: ==================== bpf: improve fd array release This set improves BPF perf fd array map release wrt to purging entries, first two extend the API as needed. Please see individual patches for more details. ==================== Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--include/linux/bpf.h24
-rw-r--r--kernel/bpf/arraymap.c116
-rw-r--r--kernel/bpf/syscall.c13
-rw-r--r--kernel/trace/bpf_trace.c18
4 files changed, 122 insertions, 49 deletions
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1bcae82c6cb1..9adfef694a25 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -13,13 +13,15 @@
#include <linux/percpu.h>
#include <linux/err.h>
+struct perf_event;
struct bpf_map;
/* map is generic key/value storage optionally accesible by eBPF programs */
struct bpf_map_ops {
/* funcs callable from userspace (via syscall) */
struct bpf_map *(*map_alloc)(union bpf_attr *attr);
- void (*map_free)(struct bpf_map *);
+ void (*map_release)(struct bpf_map *map, struct file *map_file);
+ void (*map_free)(struct bpf_map *map);
int (*map_get_next_key)(struct bpf_map *map, void *key, void *next_key);
/* funcs callable from userspace and from eBPF programs */
@@ -28,8 +30,9 @@ struct bpf_map_ops {
int (*map_delete_elem)(struct bpf_map *map, void *key);
/* funcs called by prog_array and perf_event_array map */
- void *(*map_fd_get_ptr) (struct bpf_map *map, int fd);
- void (*map_fd_put_ptr) (void *ptr);
+ void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file,
+ int fd);
+ void (*map_fd_put_ptr)(void *ptr);
};
struct bpf_map {
@@ -164,11 +167,19 @@ struct bpf_array {
void __percpu *pptrs[0] __aligned(8);
};
};
+
#define MAX_TAIL_CALL_CNT 32
+struct bpf_event_entry {
+ struct perf_event *event;
+ struct file *perf_file;
+ struct file *map_file;
+ struct rcu_head rcu;
+};
+
u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5);
u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
-void bpf_fd_array_map_clear(struct bpf_map *map);
+
bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp);
const struct bpf_func_proto *bpf_get_trace_printk_proto(void);
@@ -206,8 +217,13 @@ int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
u64 flags);
int bpf_percpu_array_update(struct bpf_map *map, void *key, void *value,
u64 flags);
+
int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value);
+int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
+ void *key, void *value, u64 map_flags);
+void bpf_fd_array_map_clear(struct bpf_map *map);
+
/* memcpy that is used with 8-byte aligned pointers, power-of-8 size and
* forced to use 'long' read/writes to try to atomically copy long counters.
* Best-effort only. No barriers here, since it _will_ race with concurrent
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 76d5a794e426..5af30732697b 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -328,8 +328,8 @@ static void *fd_array_map_lookup_elem(struct bpf_map *map, void *key)
}
/* only called from syscall */
-static int fd_array_map_update_elem(struct bpf_map *map, void *key,
- void *value, u64 map_flags)
+int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file,
+ void *key, void *value, u64 map_flags)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
void *new_ptr, *old_ptr;
@@ -342,7 +342,7 @@ static int fd_array_map_update_elem(struct bpf_map *map, void *key,
return -E2BIG;
ufd = *(u32 *)value;
- new_ptr = map->ops->map_fd_get_ptr(map, ufd);
+ new_ptr = map->ops->map_fd_get_ptr(map, map_file, ufd);
if (IS_ERR(new_ptr))
return PTR_ERR(new_ptr);
@@ -371,10 +371,12 @@ static int fd_array_map_delete_elem(struct bpf_map *map, void *key)
}
}
-static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
+static void *prog_fd_array_get_ptr(struct bpf_map *map,
+ struct file *map_file, int fd)
{
struct bpf_array *array = container_of(map, struct bpf_array, map);
struct bpf_prog *prog = bpf_prog_get(fd);
+
if (IS_ERR(prog))
return prog;
@@ -382,6 +384,7 @@ static void *prog_fd_array_get_ptr(struct bpf_map *map, int fd)
bpf_prog_put(prog);
return ERR_PTR(-EINVAL);
}
+
return prog;
}
@@ -407,7 +410,6 @@ static const struct bpf_map_ops prog_array_ops = {
.map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
- .map_update_elem = fd_array_map_update_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = prog_fd_array_get_ptr,
.map_fd_put_ptr = prog_fd_array_put_ptr,
@@ -425,59 +427,105 @@ static int __init register_prog_array_map(void)
}
late_initcall(register_prog_array_map);
-static void perf_event_array_map_free(struct bpf_map *map)
+static struct bpf_event_entry *bpf_event_entry_gen(struct file *perf_file,
+ struct file *map_file)
{
- bpf_fd_array_map_clear(map);
- fd_array_map_free(map);
+ struct bpf_event_entry *ee;
+
+ ee = kzalloc(sizeof(*ee), GFP_KERNEL);
+ if (ee) {
+ ee->event = perf_file->private_data;
+ ee->perf_file = perf_file;
+ ee->map_file = map_file;
+ }
+
+ return ee;
}
-static void *perf_event_fd_array_get_ptr(struct bpf_map *map, int fd)
+static void __bpf_event_entry_free(struct rcu_head *rcu)
{
- struct perf_event *event;
- const struct perf_event_attr *attr;
- struct file *file;
+ struct bpf_event_entry *ee;
- file = perf_event_get(fd);
- if (IS_ERR(file))
- return file;
+ ee = container_of(rcu, struct bpf_event_entry, rcu);
+ fput(ee->perf_file);
+ kfree(ee);
+}
- event = file->private_data;
+static void bpf_event_entry_free_rcu(struct bpf_event_entry *ee)
+{
+ call_rcu(&ee->rcu, __bpf_event_entry_free);
+}
- attr = perf_event_attrs(event);
- if (IS_ERR(attr))
- goto err;
+static void *perf_event_fd_array_get_ptr(struct bpf_map *map,
+ struct file *map_file, int fd)
+{
+ const struct perf_event_attr *attr;
+ struct bpf_event_entry *ee;
+ struct perf_event *event;
+ struct file *perf_file;
- if (attr->inherit)
- goto err;
+ perf_file = perf_event_get(fd);
+ if (IS_ERR(perf_file))
+ return perf_file;
- if (attr->type == PERF_TYPE_RAW)
- return file;
+ event = perf_file->private_data;
+ ee = ERR_PTR(-EINVAL);
- if (attr->type == PERF_TYPE_HARDWARE)
- return file;
+ attr = perf_event_attrs(event);
+ if (IS_ERR(attr) || attr->inherit)
+ goto err_out;
+
+ switch (attr->type) {
+ case PERF_TYPE_SOFTWARE:
+ if (attr->config != PERF_COUNT_SW_BPF_OUTPUT)
+ goto err_out;
+ /* fall-through */
+ case PERF_TYPE_RAW:
+ case PERF_TYPE_HARDWARE:
+ ee = bpf_event_entry_gen(perf_file, map_file);
+ if (ee)
+ return ee;
+ ee = ERR_PTR(-ENOMEM);
+ /* fall-through */
+ default:
+ break;
+ }
- if (attr->type == PERF_TYPE_SOFTWARE &&
- attr->config == PERF_COUNT_SW_BPF_OUTPUT)
- return file;
-err:
- fput(file);
- return ERR_PTR(-EINVAL);
+err_out:
+ fput(perf_file);
+ return ee;
}
static void perf_event_fd_array_put_ptr(void *ptr)
{
- fput((struct file *)ptr);
+ bpf_event_entry_free_rcu(ptr);
+}
+
+static void perf_event_fd_array_release(struct bpf_map *map,
+ struct file *map_file)
+{
+ struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct bpf_event_entry *ee;
+ int i;
+
+ rcu_read_lock();
+ for (i = 0; i < array->map.max_entries; i++) {
+ ee = READ_ONCE(array->ptrs[i]);
+ if (ee && ee->map_file == map_file)
+ fd_array_map_delete_elem(map, &i);
+ }
+ rcu_read_unlock();
}
static const struct bpf_map_ops perf_event_array_ops = {
.map_alloc = fd_array_map_alloc,
- .map_free = perf_event_array_map_free,
+ .map_free = fd_array_map_free,
.map_get_next_key = array_map_get_next_key,
.map_lookup_elem = fd_array_map_lookup_elem,
- .map_update_elem = fd_array_map_update_elem,
.map_delete_elem = fd_array_map_delete_elem,
.map_fd_get_ptr = perf_event_fd_array_get_ptr,
.map_fd_put_ptr = perf_event_fd_array_put_ptr,
+ .map_release = perf_event_fd_array_release,
};
static struct bpf_map_type_list perf_event_array_type __read_mostly = {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 46ecce4b79ed..c23a4e9311b3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -124,7 +124,12 @@ void bpf_map_put_with_uref(struct bpf_map *map)
static int bpf_map_release(struct inode *inode, struct file *filp)
{
- bpf_map_put_with_uref(filp->private_data);
+ struct bpf_map *map = filp->private_data;
+
+ if (map->ops->map_release)
+ map->ops->map_release(map, filp);
+
+ bpf_map_put_with_uref(map);
return 0;
}
@@ -387,6 +392,12 @@ static int map_update_elem(union bpf_attr *attr)
err = bpf_percpu_hash_update(map, key, value, attr->flags);
} else if (map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY) {
err = bpf_percpu_array_update(map, key, value, attr->flags);
+ } else if (map->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY ||
+ map->map_type == BPF_MAP_TYPE_PROG_ARRAY) {
+ rcu_read_lock();
+ err = bpf_fd_array_map_update_elem(map, f.file, key, value,
+ attr->flags);
+ rcu_read_unlock();
} else {
rcu_read_lock();
err = map->ops->map_update_elem(map, key, value, attr->flags);
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 720b7bb01d43..037ea6ea3cb2 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -192,18 +192,17 @@ static u64 bpf_perf_event_read(u64 r1, u64 index, u64 r3, u64 r4, u64 r5)
{
struct bpf_map *map = (struct bpf_map *) (unsigned long) r1;
struct bpf_array *array = container_of(map, struct bpf_array, map);
+ struct bpf_event_entry *ee;
struct perf_event *event;
- struct file *file;
if (unlikely(index >= array->map.max_entries))
return -E2BIG;
- file = READ_ONCE(array->ptrs[index]);
- if (unlikely(!file))
+ ee = READ_ONCE(array->ptrs[index]);
+ if (unlikely(!ee))
return -ENOENT;
- event = file->private_data;
-
+ event = ee->event;
/* make sure event is local and doesn't have pmu::count */
if (event->oncpu != smp_processor_id() ||
event->pmu->count)
@@ -233,8 +232,8 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
u64 index = flags & BPF_F_INDEX_MASK;
void *data = (void *) (long) r4;
struct perf_sample_data sample_data;
+ struct bpf_event_entry *ee;
struct perf_event *event;
- struct file *file;
struct perf_raw_record raw = {
.size = size,
.data = data,
@@ -247,12 +246,11 @@ static u64 bpf_perf_event_output(u64 r1, u64 r2, u64 flags, u64 r4, u64 size)
if (unlikely(index >= array->map.max_entries))
return -E2BIG;
- file = READ_ONCE(array->ptrs[index]);
- if (unlikely(!file))
+ ee = READ_ONCE(array->ptrs[index]);
+ if (unlikely(!ee))
return -ENOENT;
- event = file->private_data;
-
+ event = ee->event;
if (unlikely(event->attr.type != PERF_TYPE_SOFTWARE ||
event->attr.config != PERF_COUNT_SW_BPF_OUTPUT))
return -EINVAL;