summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorDavid S. Miller <davem@davemloft.net>2017-05-03 09:30:25 -0400
committerDavid S. Miller <davem@davemloft.net>2017-05-03 09:30:25 -0400
commit8b8e3ad0c840f64a5de2c292cba33976571b9b73 (patch)
tree31ee4d76035341b8cbab10e224651afb7cc4f53f
parent89c9fea3c8034cdb2fd745f551cde0b507fd6893 (diff)
parent9178b4c17d1b8b21738771d53d999c990e69f538 (diff)
downloadlinux-8b8e3ad0c840f64a5de2c292cba33976571b9b73.tar.bz2
Merge branch 'sample-bpf-loader-fixes'
Jesper Dangaard Brouer says: ==================== Improve bpf ELF-loader under samples/bpf This series improves and fixes bpf ELF loader and programs under samples/bpf. The bpf_load.c created some hard to debug issues when the struct (bpf_map_def) used in the ELF maps section format changed in commit fb30d4b71214 ("bpf: Add tests for map-in-map"). This was hotfixed in commit 409526bea3c3 ("samples/bpf: bpf_load.c detect and abort if ELF maps section size is wrong") by detecting the issue and aborting the program. In most situations the bpf-loader should be able to handle these kind of changes to the struct size. This patch series aim to do proper backward and forward compabilility handling when loading ELF files. This series also adjust the callback that was introduced in commit 9fd63d05f3e8 ("bpf: Allow bpf sample programs (*_user.c) to change bpf_map_def") to use the new bpf_map_data structure, before more users start to use this callback. Hoping these changes can make the merge window, as above mentioned commits have not been merged yet, and it would be good to avoid users hitting these issues. ==================== Acked-by: Daniel Borkmann <daniel@iogearbox.net> Signed-off-by: David S. Miller <davem@davemloft.net>
-rw-r--r--samples/bpf/bpf_load.c229
-rw-r--r--samples/bpf/bpf_load.h18
-rw-r--r--samples/bpf/map_perf_test_user.c14
-rw-r--r--samples/bpf/tracex2_user.c7
-rw-r--r--samples/bpf/tracex3_user.c7
-rw-r--r--samples/bpf/tracex4_user.c8
6 files changed, 201 insertions, 82 deletions
diff --git a/samples/bpf/bpf_load.c b/samples/bpf/bpf_load.c
index 4221dc359453..74456b3eb89a 100644
--- a/samples/bpf/bpf_load.c
+++ b/samples/bpf/bpf_load.c
@@ -39,6 +39,9 @@ int event_fd[MAX_PROGS];
int prog_cnt;
int prog_array_fd = -1;
+struct bpf_map_data map_data[MAX_MAPS];
+int map_data_count = 0;
+
static int populate_prog_array(const char *event, int prog_fd)
{
int ind = atoi(event), err;
@@ -186,42 +189,45 @@ static int load_and_attach(const char *event, struct bpf_insn *prog, int size)
return 0;
}
-static int load_maps(struct bpf_map_def *maps, int nr_maps,
- const char **map_names, fixup_map_cb fixup_map)
+static int load_maps(struct bpf_map_data *maps, int nr_maps,
+ fixup_map_cb fixup_map)
{
int i;
- /*
- * Warning: Using "maps" pointing to ELF data_maps->d_buf as
- * an array of struct bpf_map_def is a wrong assumption about
- * the ELF maps section format.
- */
+
for (i = 0; i < nr_maps; i++) {
- if (fixup_map)
- fixup_map(&maps[i], map_names[i], i);
-
- if (maps[i].type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
- maps[i].type == BPF_MAP_TYPE_HASH_OF_MAPS) {
- int inner_map_fd = map_fd[maps[i].inner_map_idx];
-
- map_fd[i] = bpf_create_map_in_map(maps[i].type,
- maps[i].key_size,
- inner_map_fd,
- maps[i].max_entries,
- maps[i].map_flags);
+ if (fixup_map) {
+ fixup_map(&maps[i], i);
+ /* Allow userspace to assign map FD prior to creation */
+ if (maps[i].fd != -1) {
+ map_fd[i] = maps[i].fd;
+ continue;
+ }
+ }
+
+ if (maps[i].def.type == BPF_MAP_TYPE_ARRAY_OF_MAPS ||
+ maps[i].def.type == BPF_MAP_TYPE_HASH_OF_MAPS) {
+ int inner_map_fd = map_fd[maps[i].def.inner_map_idx];
+
+ map_fd[i] = bpf_create_map_in_map(maps[i].def.type,
+ maps[i].def.key_size,
+ inner_map_fd,
+ maps[i].def.max_entries,
+ maps[i].def.map_flags);
} else {
- map_fd[i] = bpf_create_map(maps[i].type,
- maps[i].key_size,
- maps[i].value_size,
- maps[i].max_entries,
- maps[i].map_flags);
+ map_fd[i] = bpf_create_map(maps[i].def.type,
+ maps[i].def.key_size,
+ maps[i].def.value_size,
+ maps[i].def.max_entries,
+ maps[i].def.map_flags);
}
if (map_fd[i] < 0) {
printf("failed to create a map: %d %s\n",
errno, strerror(errno));
return 1;
}
+ maps[i].fd = map_fd[i];
- if (maps[i].type == BPF_MAP_TYPE_PROG_ARRAY)
+ if (maps[i].def.type == BPF_MAP_TYPE_PROG_ARRAY)
prog_array_fd = map_fd[i];
}
return 0;
@@ -251,7 +257,8 @@ static int get_sec(Elf *elf, int i, GElf_Ehdr *ehdr, char **shname,
}
static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
- GElf_Shdr *shdr, struct bpf_insn *insn)
+ GElf_Shdr *shdr, struct bpf_insn *insn,
+ struct bpf_map_data *maps, int nr_maps)
{
int i, nrels;
@@ -261,6 +268,8 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
GElf_Sym sym;
GElf_Rel rel;
unsigned int insn_idx;
+ bool match = false;
+ int j, map_idx;
gelf_getrel(data, i, &rel);
@@ -274,11 +283,21 @@ static int parse_relo_and_apply(Elf_Data *data, Elf_Data *symbols,
return 1;
}
insn[insn_idx].src_reg = BPF_PSEUDO_MAP_FD;
- /*
- * Warning: Using sizeof(struct bpf_map_def) here is a
- * wrong assumption about ELF maps section format
- */
- insn[insn_idx].imm = map_fd[sym.st_value / sizeof(struct bpf_map_def)];
+
+ /* Match FD relocation against recorded map_data[] offset */
+ for (map_idx = 0; map_idx < nr_maps; map_idx++) {
+ if (maps[map_idx].elf_offset == sym.st_value) {
+ match = true;
+ break;
+ }
+ }
+ if (match) {
+ insn[insn_idx].imm = maps[map_idx].fd;
+ } else {
+ printf("invalid relo for insn[%d] no map_data match\n",
+ insn_idx);
+ return 1;
+ }
}
return 0;
@@ -297,40 +316,112 @@ static int cmp_symbols(const void *l, const void *r)
return 0;
}
-static int get_sorted_map_names(Elf *elf, Elf_Data *symbols, int maps_shndx,
- int strtabidx, char **map_names)
+static int load_elf_maps_section(struct bpf_map_data *maps, int maps_shndx,
+ Elf *elf, Elf_Data *symbols, int strtabidx)
{
- GElf_Sym map_symbols[MAX_MAPS];
- int i, nr_maps = 0;
+ int map_sz_elf, map_sz_copy;
+ bool validate_zero = false;
+ Elf_Data *data_maps;
+ int i, nr_maps;
+ GElf_Sym *sym;
+ Elf_Scn *scn;
+ int copy_sz;
+
+ if (maps_shndx < 0)
+ return -EINVAL;
+ if (!symbols)
+ return -EINVAL;
+
+ /* Get data for maps section via elf index */
+ scn = elf_getscn(elf, maps_shndx);
+ if (scn)
+ data_maps = elf_getdata(scn, NULL);
+ if (!scn || !data_maps) {
+ printf("Failed to get Elf_Data from maps section %d\n",
+ maps_shndx);
+ return -EINVAL;
+ }
- for (i = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
- assert(nr_maps < MAX_MAPS);
- if (!gelf_getsym(symbols, i, &map_symbols[nr_maps]))
+ /* For each map get corrosponding symbol table entry */
+ sym = calloc(MAX_MAPS+1, sizeof(GElf_Sym));
+ for (i = 0, nr_maps = 0; i < symbols->d_size / sizeof(GElf_Sym); i++) {
+ assert(nr_maps < MAX_MAPS+1);
+ if (!gelf_getsym(symbols, i, &sym[nr_maps]))
continue;
- if (map_symbols[nr_maps].st_shndx != maps_shndx)
+ if (sym[nr_maps].st_shndx != maps_shndx)
continue;
+ /* Only increment iif maps section */
nr_maps++;
}
- qsort(map_symbols, nr_maps, sizeof(GElf_Sym), cmp_symbols);
+ /* Align to map_fd[] order, via sort on offset in sym.st_value */
+ qsort(sym, nr_maps, sizeof(GElf_Sym), cmp_symbols);
+
+ /* Keeping compatible with ELF maps section changes
+ * ------------------------------------------------
+ * The program size of struct bpf_map_def is known by loader
+ * code, but struct stored in ELF file can be different.
+ *
+ * Unfortunately sym[i].st_size is zero. To calculate the
+ * struct size stored in the ELF file, assume all struct have
+ * the same size, and simply divide with number of map
+ * symbols.
+ */
+ map_sz_elf = data_maps->d_size / nr_maps;
+ map_sz_copy = sizeof(struct bpf_map_def);
+ if (map_sz_elf < map_sz_copy) {
+ /*
+ * Backward compat, loading older ELF file with
+ * smaller struct, keeping remaining bytes zero.
+ */
+ map_sz_copy = map_sz_elf;
+ } else if (map_sz_elf > map_sz_copy) {
+ /*
+ * Forward compat, loading newer ELF file with larger
+ * struct with unknown features. Assume zero means
+ * feature not used. Thus, validate rest of struct
+ * data is zero.
+ */
+ validate_zero = true;
+ }
+ /* Memcpy relevant part of ELF maps data to loader maps */
for (i = 0; i < nr_maps; i++) {
- char *map_name;
-
- map_name = elf_strptr(elf, strtabidx, map_symbols[i].st_name);
- if (!map_name) {
- printf("cannot get map symbol\n");
- return -1;
- }
-
- map_names[i] = strdup(map_name);
- if (!map_names[i]) {
+ unsigned char *addr, *end;
+ struct bpf_map_def *def;
+ const char *map_name;
+ size_t offset;
+
+ map_name = elf_strptr(elf, strtabidx, sym[i].st_name);
+ maps[i].name = strdup(map_name);
+ if (!maps[i].name) {
printf("strdup(%s): %s(%d)\n", map_name,
strerror(errno), errno);
- return -1;
+ free(sym);
+ return -errno;
+ }
+
+ /* Symbol value is offset into ELF maps section data area */
+ offset = sym[i].st_value;
+ def = (struct bpf_map_def *)(data_maps->d_buf + offset);
+ maps[i].elf_offset = offset;
+ memset(&maps[i].def, 0, sizeof(struct bpf_map_def));
+ memcpy(&maps[i].def, def, map_sz_copy);
+
+ /* Verify no newer features were requested */
+ if (validate_zero) {
+ addr = (unsigned char*) def + map_sz_copy;
+ end = (unsigned char*) def + map_sz_elf;
+ for (; addr < end; addr++) {
+ if (*addr != 0) {
+ free(sym);
+ return -EFBIG;
+ }
+ }
}
}
+ free(sym);
return nr_maps;
}
@@ -341,7 +432,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
GElf_Ehdr ehdr;
GElf_Shdr shdr, shdr_prog;
Elf_Data *data, *data_prog, *data_maps = NULL, *symbols = NULL;
- char *shname, *shname_prog, *map_names[MAX_MAPS] = { NULL };
+ char *shname, *shname_prog;
+ int nr_maps = 0;
/* reset global variables */
kern_version = 0;
@@ -389,8 +481,12 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
}
memcpy(&kern_version, data->d_buf, sizeof(int));
} else if (strcmp(shname, "maps") == 0) {
+ int j;
+
maps_shndx = i;
data_maps = data;
+ for (j = 0; j < MAX_MAPS; j++)
+ map_data[j].fd = -1;
} else if (shdr.sh_type == SHT_SYMTAB) {
strtabidx = shdr.sh_link;
symbols = data;
@@ -405,27 +501,17 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
}
if (data_maps) {
- int nr_maps;
- int prog_elf_map_sz;
-
- nr_maps = get_sorted_map_names(elf, symbols, maps_shndx,
- strtabidx, map_names);
- if (nr_maps < 0)
- goto done;
-
- /* Deduce map struct size stored in ELF maps section */
- prog_elf_map_sz = data_maps->d_size / nr_maps;
- if (prog_elf_map_sz != sizeof(struct bpf_map_def)) {
- printf("Error: ELF maps sec wrong size (%d/%lu),"
- " old kern.o file?\n",
- prog_elf_map_sz, sizeof(struct bpf_map_def));
+ nr_maps = load_elf_maps_section(map_data, maps_shndx,
+ elf, symbols, strtabidx);
+ if (nr_maps < 0) {
+ printf("Error: Failed loading ELF maps (errno:%d):%s\n",
+ nr_maps, strerror(-nr_maps));
ret = 1;
goto done;
}
-
- if (load_maps(data_maps->d_buf, nr_maps,
- (const char **)map_names, fixup_map))
+ if (load_maps(map_data, nr_maps, fixup_map))
goto done;
+ map_data_count = nr_maps;
processed_sec[maps_shndx] = true;
}
@@ -453,7 +539,8 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
processed_sec[shdr.sh_info] = true;
processed_sec[i] = true;
- if (parse_relo_and_apply(data, symbols, &shdr, insns))
+ if (parse_relo_and_apply(data, symbols, &shdr, insns,
+ map_data, nr_maps))
continue;
if (memcmp(shname_prog, "kprobe/", 7) == 0 ||
@@ -488,8 +575,6 @@ static int do_load_bpf_file(const char *path, fixup_map_cb fixup_map)
ret = 0;
done:
- for (i = 0; i < MAX_MAPS; i++)
- free(map_names[i]);
close(fd);
return ret;
}
diff --git a/samples/bpf/bpf_load.h b/samples/bpf/bpf_load.h
index 05822f83173a..ca0563d04744 100644
--- a/samples/bpf/bpf_load.h
+++ b/samples/bpf/bpf_load.h
@@ -15,15 +15,27 @@ struct bpf_map_def {
unsigned int inner_map_idx;
};
-typedef void (*fixup_map_cb)(struct bpf_map_def *map, const char *map_name,
- int idx);
+struct bpf_map_data {
+ int fd;
+ char *name;
+ size_t elf_offset;
+ struct bpf_map_def def;
+};
+
+typedef void (*fixup_map_cb)(struct bpf_map_data *map, int idx);
-extern int map_fd[MAX_MAPS];
extern int prog_fd[MAX_PROGS];
extern int event_fd[MAX_PROGS];
extern char bpf_log_buf[BPF_LOG_BUF_SIZE];
extern int prog_cnt;
+/* There is a one-to-one mapping between map_fd[] and map_data[].
+ * The map_data[] just contains more rich info on the given map.
+ */
+extern int map_fd[MAX_MAPS];
+extern struct bpf_map_data map_data[MAX_MAPS];
+extern int map_data_count;
+
/* parses elf file compiled by llvm .c->.o
* . parses 'maps' section and creates maps via BPF syscall
* . parses 'license' section and passes it to syscall
diff --git a/samples/bpf/map_perf_test_user.c b/samples/bpf/map_perf_test_user.c
index 6ac778153315..1a8894b5ac51 100644
--- a/samples/bpf/map_perf_test_user.c
+++ b/samples/bpf/map_perf_test_user.c
@@ -320,21 +320,21 @@ static void fill_lpm_trie(void)
assert(!r);
}
-static void fixup_map(struct bpf_map_def *map, const char *name, int idx)
+static void fixup_map(struct bpf_map_data *map, int idx)
{
int i;
- if (!strcmp("inner_lru_hash_map", name)) {
+ if (!strcmp("inner_lru_hash_map", map->name)) {
inner_lru_hash_idx = idx;
- inner_lru_hash_size = map->max_entries;
+ inner_lru_hash_size = map->def.max_entries;
}
- if (!strcmp("array_of_lru_hashs", name)) {
+ if (!strcmp("array_of_lru_hashs", map->name)) {
if (inner_lru_hash_idx == -1) {
printf("inner_lru_hash_map must be defined before array_of_lru_hashs\n");
exit(1);
}
- map->inner_map_idx = inner_lru_hash_idx;
+ map->def.inner_map_idx = inner_lru_hash_idx;
array_of_lru_hashs_idx = idx;
}
@@ -345,9 +345,9 @@ static void fixup_map(struct bpf_map_def *map, const char *name, int idx)
/* Only change the max_entries for the enabled test(s) */
for (i = 0; i < NR_TESTS; i++) {
- if (!strcmp(test_map_names[i], name) &&
+ if (!strcmp(test_map_names[i], map->name) &&
(check_test_flags(i))) {
- map->max_entries = num_map_entries;
+ map->def.max_entries = num_map_entries;
}
}
}
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index ded9804c5034..7fee0f1ba9a3 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -4,6 +4,7 @@
#include <signal.h>
#include <linux/bpf.h>
#include <string.h>
+#include <sys/resource.h>
#include "libbpf.h"
#include "bpf_load.h"
@@ -112,6 +113,7 @@ static void int_exit(int sig)
int main(int ac, char **argv)
{
+ struct rlimit r = {1024*1024, RLIM_INFINITY};
char filename[256];
long key, next_key, value;
FILE *f;
@@ -119,6 +121,11 @@ int main(int ac, char **argv)
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
signal(SIGINT, int_exit);
/* start 'ping' in the background to have some kfree_skb events */
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index 8f7d199d5945..fe372239d505 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -11,6 +11,7 @@
#include <stdbool.h>
#include <string.h>
#include <linux/bpf.h>
+#include <sys/resource.h>
#include "libbpf.h"
#include "bpf_load.h"
@@ -112,11 +113,17 @@ static void print_hist(int fd)
int main(int ac, char **argv)
{
+ struct rlimit r = {1024*1024, RLIM_INFINITY};
char filename[256];
int i;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK)");
+ return 1;
+ }
+
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index 03449f773cb1..22c644f1f4c3 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -12,6 +12,8 @@
#include <string.h>
#include <time.h>
#include <linux/bpf.h>
+#include <sys/resource.h>
+
#include "libbpf.h"
#include "bpf_load.h"
@@ -50,11 +52,17 @@ static void print_old_objects(int fd)
int main(int ac, char **argv)
{
+ struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
char filename[256];
int i;
snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+ if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+ perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
+ return 1;
+ }
+
if (load_bpf_file(filename)) {
printf("%s", bpf_log_buf);
return 1;