From 51de082570e5374d4578cb159738485ddb0fddfe Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 30 Nov 2017 09:02:29 -0800 Subject: samples/bpf: Convert magic numbers to names in multi-prog cgroup test case Attach flag 1 == BPF_F_ALLOW_OVERRIDE; attach flag 2 == BPF_F_ALLOW_MULTI. Update the calls to bpf_prog_attach() in test_cgrp2_attach2.c to use the names over the magic numbers. Fixes: 39323e788cb67 ("samples/bpf: add multi-prog cgroup test case") Signed-off-by: David Ahern Signed-off-by: Daniel Borkmann --- samples/bpf/test_cgrp2_attach2.c | 36 ++++++++++++++++++++++++------------ 1 file changed, 24 insertions(+), 12 deletions(-) diff --git a/samples/bpf/test_cgrp2_attach2.c b/samples/bpf/test_cgrp2_attach2.c index 3e8232cc04a8..1af412ec6007 100644 --- a/samples/bpf/test_cgrp2_attach2.c +++ b/samples/bpf/test_cgrp2_attach2.c @@ -78,7 +78,8 @@ static int test_foo_bar(void) if (join_cgroup(FOO)) goto err; - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { + if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { log_err("Attaching prog to /foo"); goto err; } @@ -97,7 +98,8 @@ static int test_foo_bar(void) printf("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); assert(system(PING_CMD) != 0); - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { log_err("Attaching prog to /foo/bar"); goto err; } @@ -114,7 +116,8 @@ static int test_foo_bar(void) "This ping in cgroup /foo/bar should fail...\n"); assert(system(PING_CMD) != 0); - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { log_err("Attaching prog to /foo/bar"); goto err; } @@ -128,7 +131,8 @@ static int test_foo_bar(void) "This ping in cgroup /foo/bar should pass...\n"); assert(system(PING_CMD) == 0); - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { + if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { log_err("Attaching prog to /foo/bar"); goto err; } @@ -161,13 +165,15 @@ static int test_foo_bar(void) goto err; } - if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 1)) { + if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { errno = 0; log_err("Unexpected success attaching overridable prog to /foo/bar"); goto err; } - if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 1)) { + if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { errno = 0; log_err("Unexpected success attaching overridable prog to /foo"); goto err; @@ -273,27 +279,33 @@ static int test_multiprog(void) if (join_cgroup("/cg1/cg2/cg3/cg4/cg5")) goto err; - if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) { + if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI)) { log_err("Attaching prog to cg1"); goto err; } - if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, 2)) { + if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI)) { log_err("Unexpected success attaching the same prog to cg1"); goto err; } - if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, 2)) { + if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI)) { log_err("Attaching prog2 to cg1"); goto err; } - if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, 1)) { + if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { log_err("Attaching prog to cg2"); goto err; } - if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, 2)) { + if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI)) { log_err("Attaching prog to cg3"); goto err; } - if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, 1)) { + if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE)) { log_err("Attaching prog to cg4"); goto err; } -- cgit v1.2.3 From 12a3cc8424fe1237aaeb982dec4f0914ddd22f3e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Nov 2017 21:31:35 -0800 Subject: bpf: fix stack state printing in verifier log fix incorrect stack state prints in print_verifier_state() Fixes: 638f5b90d460 ("bpf: reduce verifier memory consumption") Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d4593571c404..71a9429fdbb5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -279,7 +279,7 @@ static void print_verifier_state(struct bpf_verifier_env *env, for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { if (state->stack[i].slot_type[0] == STACK_SPILL) verbose(env, " fp%d=%s", - -MAX_BPF_STACK + i * BPF_REG_SIZE, + (-i - 1) * BPF_REG_SIZE, reg_type_str[state->stack[i].spilled_ptr.type]); } verbose(env, "\n"); -- cgit v1.2.3 From 4e92024a48ecbd06fba3ccfb2174abd3d2f54a83 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Nov 2017 21:31:36 -0800 Subject: bpf: print liveness info to verifier log let verifier print register and stack liveness information into verifier log Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 71a9429fdbb5..f7229390c279 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -216,6 +216,17 @@ static const char * const reg_type_str[] = { [PTR_TO_PACKET_END] = "pkt_end", }; +static void print_liveness(struct bpf_verifier_env *env, + enum bpf_reg_liveness live) +{ + if (live & (REG_LIVE_READ | REG_LIVE_WRITTEN)) + verbose(env, "_"); + if (live & REG_LIVE_READ) + verbose(env, "r"); + if (live & REG_LIVE_WRITTEN) + verbose(env, "w"); +} + static void print_verifier_state(struct bpf_verifier_env *env, struct bpf_verifier_state *state) { @@ -228,7 +239,9 @@ static void print_verifier_state(struct bpf_verifier_env *env, t = reg->type; if (t == NOT_INIT) continue; - verbose(env, " R%d=%s", i, reg_type_str[t]); + verbose(env, " R%d", i); + print_liveness(env, reg->live); + verbose(env, "=%s", reg_type_str[t]); if ((t == SCALAR_VALUE || t == PTR_TO_STACK) && tnum_is_const(reg->var_off)) { /* reg->off should be 0 for SCALAR_VALUE */ @@ -277,10 +290,13 @@ static void print_verifier_state(struct bpf_verifier_env *env, } } for (i = 0; i < state->allocated_stack / BPF_REG_SIZE; i++) { - if (state->stack[i].slot_type[0] == STACK_SPILL) - verbose(env, " fp%d=%s", - (-i - 1) * BPF_REG_SIZE, + if (state->stack[i].slot_type[0] == STACK_SPILL) { + verbose(env, " fp%d", + (-i - 1) * BPF_REG_SIZE); + print_liveness(env, state->stack[i].spilled_ptr.live); + verbose(env, "=%s", reg_type_str[state->stack[i].spilled_ptr.type]); + } } verbose(env, "\n"); } -- cgit v1.2.3 From 19ceb4178d31e543479d75e20c2f9df08f16632f Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Nov 2017 21:31:37 -0800 Subject: bpf: don't mark FP reg as uninit when verifier hits an internal bug don't mark register R10==FP as uninit, since it's read only register and it's not technically correct to let verifier run further, since it may assume that R10 has valid auxiliary state. While developing subsequent patches this issue was discovered, though the code eventually changed that aux reg state doesn't have pointers any more it is still safer to avoid clearing readonly register. Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index f7229390c279..14ad7c6e806a 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -584,8 +584,8 @@ static void mark_reg_unknown(struct bpf_verifier_env *env, { if (WARN_ON(regno >= MAX_BPF_REG)) { verbose(env, "mark_reg_unknown(regs, %u)\n", regno); - /* Something bad happened, let's kill all regs */ - for (regno = 0; regno < MAX_BPF_REG; regno++) + /* Something bad happened, let's kill all regs except FP */ + for (regno = 0; regno < BPF_REG_FP; regno++) __mark_reg_not_init(regs + regno); return; } @@ -603,8 +603,8 @@ static void mark_reg_not_init(struct bpf_verifier_env *env, { if (WARN_ON(regno >= MAX_BPF_REG)) { verbose(env, "mark_reg_not_init(regs, %u)\n", regno); - /* Something bad happened, let's kill all regs */ - for (regno = 0; regno < MAX_BPF_REG; regno++) + /* Something bad happened, let's kill all regs except FP */ + for (regno = 0; regno < BPF_REG_FP; regno++) __mark_reg_not_init(regs + regno); return; } -- cgit v1.2.3 From 2f18f62ee1648b2c93e6ab4a58d548010b0a67e4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Nov 2017 21:31:38 -0800 Subject: bpf: improve verifier liveness marks registers with pointers filled from stack were missing live_written marks which caused liveness propagation to unnecessary mark more registers as live_read and miss state pruning opportunities later on. before after bpf_lb-DLB_L3.o 2285 2270 bpf_lb-DLB_L4.o 3723 3682 bpf_lb-DUNKNOWN.o 1110 1110 bpf_lxc-DDROP_ALL.o 27954 27876 bpf_lxc-DUNKNOWN.o 38954 38780 bpf_netdev.o 16943 16937 bpf_overlay.o 7929 7929 Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 14ad7c6e806a..46ff4e5b3fb7 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -795,6 +795,11 @@ static int check_stack_read(struct bpf_verifier_env *env, if (value_regno >= 0) { /* restore register state from stack */ state->regs[value_regno] = state->stack[spi].spilled_ptr; + /* mark reg as written since spilled pointer state likely + * has its liveness marks cleared by is_state_visited() + * which resets stack/reg liveness for state transitions + */ + state->regs[value_regno].live |= REG_LIVE_WRITTEN; mark_stack_slot_read(state, spi); } return 0; -- cgit v1.2.3 From 3bf15921c58df982f9b15d64754c483785bf66f3 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Nov 2017 21:31:39 -0800 Subject: bpf: improve JEQ/JNE path walking verifier knows how to trim paths that are known not to be taken at run-time when register containing run-time constant is compared with another constant. It was done only for JEQ comparison. Extend it to include JNE as well. More cases can be added in the future. before after bpf_lb-DLB_L3.o 2270 2051 bpf_lb-DLB_L4.o 3682 3287 bpf_lb-DUNKNOWN.o 1110 1080 bpf_lxc-DDROP_ALL.o 27876 24980 bpf_lxc-DUNKNOWN.o 38780 34308 bpf_netdev.o 16937 15404 bpf_overlay.o 7929 7191 Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 46ff4e5b3fb7..afe9a1a0a5fe 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -2955,8 +2955,9 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env, if (BPF_SRC(insn->code) == BPF_K && (opcode == BPF_JEQ || opcode == BPF_JNE) && dst_reg->type == SCALAR_VALUE && - tnum_equals_const(dst_reg->var_off, insn->imm)) { - if (opcode == BPF_JEQ) { + tnum_is_const(dst_reg->var_off)) { + if ((opcode == BPF_JEQ && dst_reg->var_off.value == insn->imm) || + (opcode == BPF_JNE && dst_reg->var_off.value != insn->imm)) { /* if (imm == imm) goto pc+off; * only follow the goto, ignore fall-through */ -- cgit v1.2.3 From 914cb781ee1a35f4c7a5173a668d6ba2c4734b91 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Nov 2017 21:31:40 -0800 Subject: bpf: cleanup register_is_null() don't pass large struct bpf_reg_state by value. Instead pass it by pointer. Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- kernel/bpf/verifier.c | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index afe9a1a0a5fe..7afa92e9b409 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1265,9 +1265,9 @@ static int check_xadd(struct bpf_verifier_env *env, int insn_idx, struct bpf_ins } /* Does this register contain a constant zero? */ -static bool register_is_null(struct bpf_reg_state reg) +static bool register_is_null(struct bpf_reg_state *reg) { - return reg.type == SCALAR_VALUE && tnum_equals_const(reg.var_off, 0); + return reg->type == SCALAR_VALUE && tnum_equals_const(reg->var_off, 0); } /* when register 'regno' is passed into function that will read 'access_size' @@ -1280,31 +1280,31 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, int access_size, bool zero_size_allowed, struct bpf_call_arg_meta *meta) { + struct bpf_reg_state *reg = cur_regs(env) + regno; struct bpf_verifier_state *state = env->cur_state; - struct bpf_reg_state *regs = state->regs; int off, i, slot, spi; - if (regs[regno].type != PTR_TO_STACK) { + if (reg->type != PTR_TO_STACK) { /* Allow zero-byte read from NULL, regardless of pointer type */ if (zero_size_allowed && access_size == 0 && - register_is_null(regs[regno])) + register_is_null(reg)) return 0; verbose(env, "R%d type=%s expected=%s\n", regno, - reg_type_str[regs[regno].type], + reg_type_str[reg->type], reg_type_str[PTR_TO_STACK]); return -EACCES; } /* Only allow fixed-offset stack reads */ - if (!tnum_is_const(regs[regno].var_off)) { + if (!tnum_is_const(reg->var_off)) { char tn_buf[48]; - tnum_strn(tn_buf, sizeof(tn_buf), regs[regno].var_off); + tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off); verbose(env, "invalid variable stack read R%d var_off=%s\n", regno, tn_buf); } - off = regs[regno].off + regs[regno].var_off.value; + off = reg->off + reg->var_off.value; if (off >= 0 || off < -MAX_BPF_STACK || off + access_size > 0 || access_size < 0 || (access_size == 0 && !zero_size_allowed)) { verbose(env, "invalid stack type R%d off=%d access_size=%d\n", @@ -1412,7 +1412,7 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno, * passed in as argument, it's a SCALAR_VALUE type. Final test * happens during stack boundary checking. */ - if (register_is_null(*reg) && + if (register_is_null(reg) && arg_type == ARG_PTR_TO_MEM_OR_NULL) /* final test in check_stack_boundary() */; else if (!type_is_pkt_pointer(type) && -- cgit v1.2.3 From 6a28b446b7d2d5455080d5b772c50b99859d6cf5 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Nov 2017 21:31:41 -0800 Subject: selftests/bpf: adjust test_align expected output since verifier started to print liveness state of the registers adjust expected output of test_align. Now this test checks for both proper alignment handling by verifier and correctness of liveness marks. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/test_align.c | 156 +++++++++++++++---------------- 1 file changed, 78 insertions(+), 78 deletions(-) diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c index 8591c89c0828..fe916d29e166 100644 --- a/tools/testing/selftests/bpf/test_align.c +++ b/tools/testing/selftests/bpf/test_align.c @@ -64,11 +64,11 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv2"}, - {2, "R3=inv4"}, - {3, "R3=inv8"}, - {4, "R3=inv16"}, - {5, "R3=inv32"}, + {1, "R3_w=inv2"}, + {2, "R3_w=inv4"}, + {3, "R3_w=inv8"}, + {4, "R3_w=inv16"}, + {5, "R3_w=inv32"}, }, }, { @@ -92,17 +92,17 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv1"}, - {2, "R3=inv2"}, - {3, "R3=inv4"}, - {4, "R3=inv8"}, - {5, "R3=inv16"}, - {6, "R3=inv1"}, - {7, "R4=inv32"}, - {8, "R4=inv16"}, - {9, "R4=inv8"}, - {10, "R4=inv4"}, - {11, "R4=inv2"}, + {1, "R3_w=inv1"}, + {2, "R3_w=inv2"}, + {3, "R3_w=inv4"}, + {4, "R3_w=inv8"}, + {5, "R3_w=inv16"}, + {6, "R3_w=inv1"}, + {7, "R4_w=inv32"}, + {8, "R4_w=inv16"}, + {9, "R4_w=inv8"}, + {10, "R4_w=inv4"}, + {11, "R4_w=inv2"}, }, }, { @@ -121,12 +121,12 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv4"}, - {2, "R3=inv8"}, - {3, "R3=inv10"}, - {4, "R4=inv8"}, - {5, "R4=inv12"}, - {6, "R4=inv14"}, + {1, "R3_w=inv4"}, + {2, "R3_w=inv8"}, + {3, "R3_w=inv10"}, + {4, "R4_w=inv8"}, + {5, "R4_w=inv12"}, + {6, "R4_w=inv14"}, }, }, { @@ -143,10 +143,10 @@ static struct bpf_align_test tests[] = { .matches = { {1, "R1=ctx(id=0,off=0,imm=0)"}, {1, "R10=fp0"}, - {1, "R3=inv7"}, - {2, "R3=inv7"}, - {3, "R3=inv14"}, - {4, "R3=inv56"}, + {1, "R3_w=inv7"}, + {2, "R3_w=inv7"}, + {3, "R3_w=inv14"}, + {4, "R3_w=inv56"}, }, }, @@ -185,18 +185,18 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { {7, "R0=pkt(id=0,off=8,r=8,imm=0)"}, - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R3=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {9, "R3=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {10, "R3=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {11, "R3=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, {18, "R3=pkt_end(id=0,off=0,imm=0)"}, - {18, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {19, "R4=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, - {20, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, - {21, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {22, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {23, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"}, + {20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, }, }, { @@ -217,16 +217,16 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {7, "R3=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {8, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {9, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {11, "R4=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, - {12, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {13, "R4=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, - {14, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {15, "R4=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, - {16, "R4=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, + {7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"}, + {12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"}, + {16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"}, }, }, { @@ -257,14 +257,14 @@ static struct bpf_align_test tests[] = { }, .prog_type = BPF_PROG_TYPE_SCHED_CLS, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, - {5, "R5=pkt(id=0,off=14,r=0,imm=0)"}, - {6, "R4=pkt(id=0,off=14,r=0,imm=0)"}, + {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, + {5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"}, + {6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"}, {10, "R2=pkt(id=0,off=0,r=18,imm=0)"}, {10, "R5=pkt(id=0,off=14,r=18,imm=0)"}, - {10, "R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, - {14, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, - {15, "R4=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"}, + {14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, + {15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"}, }, }, { @@ -320,11 +320,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Offset is added to packet pointer R5, resulting in * known fixed offset, and variable offset from R6. */ - {11, "R5=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * it's total offset is NET_IP_ALIGN + reg->off (0) + * reg->aux_off (14) which is 16. Then the variable @@ -336,11 +336,11 @@ static struct bpf_align_test tests[] = { /* Variable offset is added to R5 packet pointer, * resulting in auxiliary alignment of 4. */ - {18, "R5=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant offset is added to R5, resulting in * reg->off of 14. */ - {19, "R5=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off * (14) which is 16. Then the variable offset is 4-byte @@ -352,18 +352,18 @@ static struct bpf_align_test tests[] = { /* Constant offset is added to R5 packet pointer, * resulting in reg->off value of 14. */ - {26, "R5=pkt(id=0,off=14,r=8"}, + {26, "R5_w=pkt(id=0,off=14,r=8"}, /* Variable offset is added to R5, resulting in a * variable offset of (4n). */ - {27, "R5=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Constant is added to R5 again, setting reg->off to 18. */ - {28, "R5=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* And once more we add a variable; resulting var_off * is still (4n), fixed offset is not changed. * Also, we create a new reg->id. */ - {29, "R5=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, + {29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (18) * which is 20. Then the variable offset is (4n), so @@ -410,11 +410,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {8, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {8, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {9, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* Packet pointer has (4n+2) offset */ - {11, "R5=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, {13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -426,11 +426,11 @@ static struct bpf_align_test tests[] = { /* Newly read value in R6 was shifted left by 2, so has * known alignment of 4. */ - {18, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Added (4n) to packet pointer's (4n+2) var_off, giving * another (4n+2). */ - {19, "R5=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, + {19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, {21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) @@ -473,11 +473,11 @@ static struct bpf_align_test tests[] = { .prog_type = BPF_PROG_TYPE_SCHED_CLS, .result = REJECT, .matches = { - {4, "R5=pkt(id=0,off=0,r=0,imm=0)"}, + {4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"}, /* ptr & 0x40 == either 0 or 0x40 */ - {5, "R5=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"}, + {5, "R5_w=inv(id=0,umax_value=64,var_off=(0x0; 0x40))"}, /* ptr << 2 == unknown, (4n) */ - {7, "R5=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, + {7, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"}, /* (4n) + 14 == (4n+2). We blow our bounds, because * the add could overflow. */ @@ -485,7 +485,7 @@ static struct bpf_align_test tests[] = { /* Checked s>=0 */ {10, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, /* packet pointer + nonnegative (4n+2) */ - {12, "R6=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, + {12, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, {14, "R4=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"}, /* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine. * We checked the bounds, but it might have been able @@ -530,11 +530,11 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {9, "R6=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Adding 14 makes R6 be (4n+2) */ - {10, "R6=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, + {10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"}, /* New unknown value in R7 is (4n) */ - {11, "R7=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, + {11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"}, /* Subtracting it from R6 blows our unsigned bounds */ {12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"}, /* Checked s>= 0 */ @@ -583,15 +583,15 @@ static struct bpf_align_test tests[] = { * alignment of 4. */ {7, "R2=pkt(id=0,off=0,r=8,imm=0)"}, - {10, "R6=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, + {10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"}, /* Adding 14 makes R6 be (4n+2) */ - {11, "R6=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, + {11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"}, /* Subtracting from packet pointer overflows ubounds */ - {13, "R5=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, + {13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"}, /* New unknown value in R7 is (4n), >= 76 */ - {15, "R7=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, + {15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"}, /* Adding it to packet pointer gives nice bounds again */ - {16, "R5=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, + {16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"}, /* At the time the word size load is performed from R5, * its total fixed offset is NET_IP_ALIGN + reg->off (0) * which is 2. Then the variable offset is (4n+2), so -- cgit v1.2.3 From a646c9b2dabae19138926b888c979e43fb85362b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 Nov 2017 21:32:48 -0800 Subject: nfp: fix old kdoc issues Since commit 3a025e1d1c2e ("Add optional check for bad kernel-doc comments") when built with W=1 build will complain about kdoc errors. Fix the kdoc issues we have. kdoc is still confused by defines in nfp_net_ctrl.h but those are not really errors. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/nfp_net.h | 2 ++ drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c | 9 +++------ 2 files changed, 5 insertions(+), 6 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net.h b/drivers/net/ethernet/netronome/nfp/nfp_net.h index 7f9857c276b1..3801c52098d5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_net.h @@ -548,6 +548,8 @@ struct nfp_net_dp { * @max_r_vecs: Number of allocated interrupt vectors for RX/TX * @max_tx_rings: Maximum number of TX rings supported by the Firmware * @max_rx_rings: Maximum number of RX rings supported by the Firmware + * @stride_rx: Queue controller RX queue spacing + * @stride_tx: Queue controller TX queue spacing * @r_vecs: Pre-allocated array of ring vectors * @irq_entries: Pre-allocated array of MSI-X entries * @lsc_handler: Handler for Link State Change interrupt diff --git a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c index 04dd5758ecf5..3fcb522d2e85 100644 --- a/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c +++ b/drivers/net/ethernet/netronome/nfp/nfpcore/nfp_cppcore.c @@ -372,8 +372,7 @@ nfp_cpp_area_alloc(struct nfp_cpp *cpp, u32 dest, * that it can be accessed directly. * * NOTE: @address and @size must be 32-bit aligned values. - * - * NOTE: The area must also be 'released' when the structure is freed. + * The area must also be 'released' when the structure is freed. * * Return: NFP CPP Area handle, or NULL */ @@ -536,8 +535,7 @@ void nfp_cpp_area_release_free(struct nfp_cpp_area *area) * Read data from indicated CPP region. * * NOTE: @offset and @length must be 32-bit aligned values. - * - * NOTE: Area must have been locked down with an 'acquire'. + * Area must have been locked down with an 'acquire'. * * Return: length of io, or -ERRNO */ @@ -558,8 +556,7 @@ int nfp_cpp_area_read(struct nfp_cpp_area *area, * Write data to indicated CPP region. * * NOTE: @offset and @length must be 32-bit aligned values. - * - * NOTE: Area must have been locked down with an 'acquire'. + * Area must have been locked down with an 'acquire'. * * Return: length of io, or -ERRNO */ -- cgit v1.2.3 From 854dc87d1a7be1f11b2d079a80a822742d6e560e Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:32:49 -0800 Subject: nfp: bpf: support backward jump This patch adds support for backward jump on NFP. - restrictions on backward jump in various functions have been removed. - nfp_fixup_branches now supports backward jump. There is one thing to note, currently an input eBPF JMP insn may generate several NFP insns, for example, NFP imm move insn A \ NFP compare insn B --> 3 NFP insn jited from eBPF JMP insn M NFP branch insn C / --- NFP insn X --> 1 NFP insn jited from eBPF insn N --- ... therefore, we are doing sanity check to make sure the last jited insn from an eBPF JMP is a NFP branch instruction. Once backward jump is allowed, it is possible an eBPF JMP insn is at the end of the program. This is however causing trouble for the sanity check. Because the sanity check requires the end index of the NFP insns jited from one eBPF insn while only the start index is recorded before this patch that we can only get the end index by: start_index_of_the_next_eBPF_insn - 1 or for the above example: start_index_of_eBPF_insn_N (which is the index of NFP insn X) - 1 nfp_fixup_branches was using nfp_for_each_insn_walk2 to expose *next* insn to each iteration during the traversal so the last index could be calculated from which. Now, it needs some extra code to handle the last insn. Meanwhile, the use of walk2 is actually unnecessary, we could simply use generic single instruction walk to do this, the next insn could be easily calculated using list_next_entry. So, this patch migrates the jump fixup traversal method to *list_for_each_entry*, this simplifies the code logic a little bit. The other thing to note is a new state variable "last_bpf_off" is introduced to track the index of the last jited NFP insn. This is necessary because NFP is generating special purposes epilogue sequences, so the index of the last jited NFP insn is *not* always nfp_prog->prog_len - 1. Suggested-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 66 +++++++++++++++------------ drivers/net/ethernet/netronome/nfp/bpf/main.h | 4 +- 2 files changed, 40 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 995e95410b11..20daf6b95601 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016 Netronome Systems, Inc. + * Copyright (C) 2016-2017 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -975,9 +975,6 @@ wrp_test_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, { const struct bpf_insn *insn = &meta->insn; - if (insn->off < 0) /* TODO */ - return -EOPNOTSUPP; - wrp_test_reg_one(nfp_prog, insn->dst_reg * 2, alu_op, insn->src_reg * 2, br_mask, insn->off); wrp_test_reg_one(nfp_prog, insn->dst_reg * 2 + 1, alu_op, @@ -995,9 +992,6 @@ wrp_cmp_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, u8 reg = insn->dst_reg * 2; swreg tmp_reg; - if (insn->off < 0) /* TODO */ - return -EOPNOTSUPP; - tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); if (!swap) emit_alu(nfp_prog, reg_none(), reg_a(reg), ALU_OP_SUB, tmp_reg); @@ -1027,9 +1021,6 @@ wrp_cmp_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, areg = insn->dst_reg * 2; breg = insn->src_reg * 2; - if (insn->off < 0) /* TODO */ - return -EOPNOTSUPP; - if (swap) { areg ^= breg; breg ^= areg; @@ -1630,8 +1621,6 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { - if (meta->insn.off < 0) /* TODO */ - return -EOPNOTSUPP; emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); return 0; @@ -1646,9 +1635,6 @@ static int jeq_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) or1 = reg_a(insn->dst_reg * 2); or2 = reg_b(insn->dst_reg * 2 + 1); - if (insn->off < 0) /* TODO */ - return -EOPNOTSUPP; - if (imm & ~0U) { tmp_reg = ur_load_imm_any(nfp_prog, imm & ~0U, imm_b(nfp_prog)); emit_alu(nfp_prog, imm_a(nfp_prog), @@ -1695,9 +1681,6 @@ static int jset_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u64 imm = insn->imm; /* sign extend */ swreg tmp_reg; - if (insn->off < 0) /* TODO */ - return -EOPNOTSUPP; - if (!imm) { meta->skip = true; return 0; @@ -1726,9 +1709,6 @@ static int jne_imm(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) u64 imm = insn->imm; /* sign extend */ swreg tmp_reg; - if (insn->off < 0) /* TODO */ - return -EOPNOTSUPP; - if (!imm) { emit_alu(nfp_prog, reg_none(), reg_a(insn->dst_reg * 2), ALU_OP_OR, reg_b(insn->dst_reg * 2 + 1)); @@ -1753,9 +1733,6 @@ static int jeq_reg(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { const struct bpf_insn *insn = &meta->insn; - if (insn->off < 0) /* TODO */ - return -EOPNOTSUPP; - emit_alu(nfp_prog, imm_a(nfp_prog), reg_a(insn->dst_reg * 2), ALU_OP_XOR, reg_b(insn->src_reg * 2)); emit_alu(nfp_prog, imm_b(nfp_prog), reg_a(insn->dst_reg * 2 + 1), @@ -1888,16 +1865,25 @@ static void br_set_offset(u64 *instr, u16 offset) static int nfp_fixup_branches(struct nfp_prog *nfp_prog) { struct nfp_insn_meta *meta, *next; - u32 off, br_idx; - u32 idx; + u32 idx, br_idx; + int off; - nfp_for_each_insn_walk2(nfp_prog, meta, next) { + list_for_each_entry(meta, &nfp_prog->insns, l) { if (meta->skip) continue; if (BPF_CLASS(meta->insn.code) != BPF_JMP) continue; - br_idx = nfp_prog_offset_to_index(nfp_prog, next->off) - 1; + if (list_is_last(&meta->l, &nfp_prog->insns)) { + next = NULL; + idx = nfp_prog->last_bpf_off; + } else { + next = list_next_entry(meta, l); + idx = next->off - 1; + } + + br_idx = nfp_prog_offset_to_index(nfp_prog, idx); + if (!nfp_is_br(nfp_prog->prog[br_idx])) { pr_err("Fixup found block not ending in branch %d %02x %016llx!!\n", br_idx, meta->insn.code, nfp_prog->prog[br_idx]); @@ -1914,10 +1900,30 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) return -ELOOP; } - while (off && nfp_meta_has_next(nfp_prog, next)) { + if (!next) { + /* When "next" is NULL, "meta" is the last node in the + * list. Given it is an JMP, it then must be a backward + * jump. + * + * For eBPF, the jump offset is against pc + 1, so we + * need to compensate the offset by 1 as we are pointing + * "next" to the current node "meta". + */ + if (WARN_ON_ONCE(off > -2)) + return -ELOOP; + + next = meta; + off += 1; + } + + while (off > 0 && nfp_meta_has_next(nfp_prog, next)) { next = nfp_meta_next(next); off--; } + while (off < 0 && nfp_meta_has_prev(nfp_prog, next)) { + next = nfp_meta_prev(next); + off++; + } if (off) { pr_err("Fixup found too large jump!! %d\n", off); return -ELOOP; @@ -2105,6 +2111,8 @@ static int nfp_translate(struct nfp_prog *nfp_prog) nfp_prog->n_translated++; } + nfp_prog->last_bpf_off = nfp_prog_current_offset(nfp_prog) - 1; + nfp_outro(nfp_prog); if (nfp_prog->error) return nfp_prog->error; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 082a15f6dfb5..0f4d218fc77a 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016 Netronome Systems, Inc. + * Copyright (C) 2016-2017 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -142,6 +142,7 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) * @verifier_meta: temporary storage for verifier's insn meta * @type: BPF program type * @start_off: address of the first instruction in the memory + * @last_bpf_off: address of the last instruction translated from BPF * @tgt_out: jump target for normal exit * @tgt_abort: jump target for abort (e.g. access outside of packet buffer) * @tgt_done: jump target to get the next packet @@ -160,6 +161,7 @@ struct nfp_prog { enum bpf_prog_type type; unsigned int start_off; + unsigned int last_bpf_off; unsigned int tgt_out; unsigned int tgt_abort; unsigned int tgt_done; -- cgit v1.2.3 From 5b674140addc3c863efa227946ad7328f016a7a3 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:32:50 -0800 Subject: nfp: bpf: record jump destination to simplify jump fixup eBPF insns are internally organized as dual-list inside NFP offload JIT. Random access to an insn needs to be done by either forward or backward traversal along the list. One place we need to do such traversal is at nfp_fixup_branches where one traversal is needed for each jump insn to find the destination. Such traversals could be avoided if jump destinations are collected through a single travesal in a pre-scan pass, and such information could also be useful in other places where jump destination info are needed. This patch adds such jump destination collection in nfp_prog_prepare. Suggested-by: Jakub Kicinski Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 57 ++++------------------- drivers/net/ethernet/netronome/nfp/bpf/main.h | 13 +++++- drivers/net/ethernet/netronome/nfp/bpf/offload.c | 22 +++++++-- drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 4 +- 4 files changed, 41 insertions(+), 55 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 20daf6b95601..f76659ecb654 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -65,12 +65,6 @@ next = nfp_meta_next(pos), \ next2 = nfp_meta_next(next)) -static bool -nfp_meta_has_next(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) -{ - return meta->l.next != &nfp_prog->insns; -} - static bool nfp_meta_has_prev(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { @@ -1864,9 +1858,8 @@ static void br_set_offset(u64 *instr, u16 offset) /* --- Assembler logic --- */ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) { - struct nfp_insn_meta *meta, *next; + struct nfp_insn_meta *meta, *jmp_dst; u32 idx, br_idx; - int off; list_for_each_entry(meta, &nfp_prog->insns, l) { if (meta->skip) @@ -1874,13 +1867,10 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) if (BPF_CLASS(meta->insn.code) != BPF_JMP) continue; - if (list_is_last(&meta->l, &nfp_prog->insns)) { - next = NULL; + if (list_is_last(&meta->l, &nfp_prog->insns)) idx = nfp_prog->last_bpf_off; - } else { - next = list_next_entry(meta, l); - idx = next->off - 1; - } + else + idx = list_next_entry(meta, l)->off - 1; br_idx = nfp_prog_offset_to_index(nfp_prog, idx); @@ -1893,43 +1883,14 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) if (FIELD_GET(OP_BR_SPECIAL, nfp_prog->prog[br_idx])) continue; - /* Find the target offset in assembler realm */ - off = meta->insn.off; - if (!off) { - pr_err("Fixup found zero offset!!\n"); + if (!meta->jmp_dst) { + pr_err("Non-exit jump doesn't have destination info recorded!!\n"); return -ELOOP; } - if (!next) { - /* When "next" is NULL, "meta" is the last node in the - * list. Given it is an JMP, it then must be a backward - * jump. - * - * For eBPF, the jump offset is against pc + 1, so we - * need to compensate the offset by 1 as we are pointing - * "next" to the current node "meta". - */ - if (WARN_ON_ONCE(off > -2)) - return -ELOOP; - - next = meta; - off += 1; - } - - while (off > 0 && nfp_meta_has_next(nfp_prog, next)) { - next = nfp_meta_next(next); - off--; - } - while (off < 0 && nfp_meta_has_prev(nfp_prog, next)) { - next = nfp_meta_prev(next); - off++; - } - if (off) { - pr_err("Fixup found too large jump!! %d\n", off); - return -ELOOP; - } + jmp_dst = meta->jmp_dst; - if (next->skip) { + if (jmp_dst->skip) { pr_err("Branch landing on removed instruction!!\n"); return -ELOOP; } @@ -1938,7 +1899,7 @@ static int nfp_fixup_branches(struct nfp_prog *nfp_prog) idx <= br_idx; idx++) { if (!nfp_is_br(nfp_prog->prog[idx])) continue; - br_set_offset(&nfp_prog->prog[idx], next->off); + br_set_offset(&nfp_prog->prog[idx], jmp_dst->off); } } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 0f4d218fc77a..e488656f406c 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -94,6 +94,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); * @insn: BPF instruction * @ptr: pointer type for memory operations * @ptr_not_const: pointer is not always constant + * @jmp_dst: destination info for jump instructions * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number * @skip: skip this instruction (optimized out) @@ -102,8 +103,13 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); */ struct nfp_insn_meta { struct bpf_insn insn; - struct bpf_reg_state ptr; - bool ptr_not_const; + union { + struct { + struct bpf_reg_state ptr; + bool ptr_not_const; + }; + struct nfp_insn_meta *jmp_dst; + }; unsigned int off; unsigned short n; bool skip; @@ -191,4 +197,7 @@ int nfp_bpf_translate(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog); int nfp_bpf_destroy(struct nfp_app *app, struct nfp_net *nn, struct bpf_prog *prog); +struct nfp_insn_meta * +nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, + unsigned int insn_idx, unsigned int n_insns); #endif diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index bc879aeb62d4..240db663d83f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016 Netronome Systems, Inc. + * Copyright (C) 2016-2017 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -55,11 +55,10 @@ static int nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, unsigned int cnt) { + struct nfp_insn_meta *meta; unsigned int i; for (i = 0; i < cnt; i++) { - struct nfp_insn_meta *meta; - meta = kzalloc(sizeof(*meta), GFP_KERNEL); if (!meta) return -ENOMEM; @@ -70,6 +69,23 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, list_add_tail(&meta->l, &nfp_prog->insns); } + /* Another pass to record jump information. */ + list_for_each_entry(meta, &nfp_prog->insns, l) { + u64 code = meta->insn.code; + + if (BPF_CLASS(code) == BPF_JMP && BPF_OP(code) != BPF_EXIT && + BPF_OP(code) != BPF_CALL) { + struct nfp_insn_meta *dst_meta; + unsigned short dst_indx; + + dst_indx = meta->n + 1 + meta->insn.off; + dst_meta = nfp_bpf_goto_meta(nfp_prog, meta, dst_indx, + cnt); + + meta->jmp_dst = dst_meta; + } + } + return 0; } diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index 8d43491ddd6b..cca67730b91f 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016 Netronome Systems, Inc. + * Copyright (C) 2016-2017 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -40,7 +40,7 @@ #include "main.h" -static struct nfp_insn_meta * +struct nfp_insn_meta * nfp_bpf_goto_meta(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int insn_idx, unsigned int n_insns) { -- cgit v1.2.3 From a09d5c52c42129adbac2d1e39bd0e49a92729e3e Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:32:51 -0800 Subject: nfp: bpf: flag jump destination to guide insn combine optimizations NFP eBPF offload JIT engine is doing some instruction combine based optimizations which however must not be safe if the combined sequences are across basic block boarders. Currently, there are post checks during fixing jump destinations. If the jump destination is found to be eBPF insn that has been combined into another one, then JIT engine will raise error and abort. This is not optimal. The JIT engine ought to disable the optimization on such cross-bb-border sequences instead of abort. As there is no control flow information in eBPF infrastructure that we can't do basic block based optimizations, this patch extends the existing jump destination record pass to also flag the jump destination, then in instruction combine passes we could skip the optimizations if insns in the sequence are jump targets. Suggested-by: Jakub Kicinski Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.h | 4 ++++ drivers/net/ethernet/netronome/nfp/bpf/offload.c | 1 + 2 files changed, 5 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index e488656f406c..99da1d34dd0e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -89,6 +89,8 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); #define nfp_meta_next(meta) list_next_entry(meta, l) #define nfp_meta_prev(meta) list_prev_entry(meta, l) +#define FLAG_INSN_IS_JUMP_DST BIT(0) + /** * struct nfp_insn_meta - BPF instruction wrapper * @insn: BPF instruction @@ -97,6 +99,7 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); * @jmp_dst: destination info for jump instructions * @off: index of first generated machine instruction (in nfp_prog.prog) * @n: eBPF instruction number + * @flags: eBPF instruction extra optimization flags * @skip: skip this instruction (optimized out) * @double_cb: callback for second part of the instruction * @l: link on nfp_prog->insns list @@ -112,6 +115,7 @@ struct nfp_insn_meta { }; unsigned int off; unsigned short n; + unsigned short flags; bool skip; instr_cb_t double_cb; diff --git a/drivers/net/ethernet/netronome/nfp/bpf/offload.c b/drivers/net/ethernet/netronome/nfp/bpf/offload.c index 240db663d83f..377976ce92dd 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/offload.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/offload.c @@ -83,6 +83,7 @@ nfp_prog_prepare(struct nfp_prog *nfp_prog, const struct bpf_insn *prog, cnt); meta->jmp_dst = dst_meta; + dst_meta->flags |= FLAG_INSN_IS_JUMP_DST; } } -- cgit v1.2.3 From 1266f5d6559e30e9afdebfff60d60ab86a8da77b Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:32:52 -0800 Subject: nfp: bpf: don't do ld/mask combination if mask is jump destination If the mask insn in the ld/mask pair is jump destination, then don't do combination. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 3 +++ 1 file changed, 3 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index f76659ecb654..f2317b764222 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2142,6 +2142,9 @@ static void nfp_bpf_opt_ld_mask(struct nfp_prog *nfp_prog) if (next.src_reg || next.dst_reg) continue; + if (meta2->flags & FLAG_INSN_IS_JUMP_DST) + continue; + meta2->skip = true; } } -- cgit v1.2.3 From 29fe46efba5c3e1ac0f857a03a29f6bf0d0c5592 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:32:53 -0800 Subject: nfp: bpf: don't do ld/shifts combination if shifts are jump destination If any of the shift insns in the ld/shift sequence is jump destination, don't do combination. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index f2317b764222..54915a3b8a7e 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2181,6 +2181,10 @@ static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) if (next1.imm != 0x20 || next2.imm != 0x20) continue; + if (meta2->flags & FLAG_INSN_IS_JUMP_DST || + meta3->flags & FLAG_INSN_IS_JUMP_DST) + continue; + meta2->skip = true; meta3->skip = true; } -- cgit v1.2.3 From 08859f159eaf7159a7a680c6151073f4451b222b Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:32:54 -0800 Subject: nfp: bpf: relax source operands check The NFP normally requires the source operands to be difference addressing modes, but we should rule out the very special NN_REG_NONE type. There are instruction that ignores both A/B operands, for example: local_csr_rd For these instructions, we might pass the same operand type, NN_REG_NONE, for both A/B operands. NOTE: in current NFP ISA, it is only possible for instructions with unrestricted operands to take none operands, but in case there is new and similar instructoin in restricted form, they would follow similar rules, so swreg_to_restricted is updated as well. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/nfp_asm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index 830f6de25f47..da277386077c 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -120,7 +120,8 @@ int swreg_to_unrestricted(swreg dst, swreg lreg, swreg rreg, reg->dst = nfp_swreg_to_unreg(dst, true); /* Decode source operands */ - if (swreg_type(lreg) == swreg_type(rreg)) + if (swreg_type(lreg) == swreg_type(rreg) && + swreg_type(lreg) != NN_REG_NONE) return -EFAULT; if (swreg_type(lreg) == NN_REG_GPR_B || @@ -200,7 +201,8 @@ int swreg_to_restricted(swreg dst, swreg lreg, swreg rreg, reg->dst = nfp_swreg_to_rereg(dst, true, false, NULL); /* Decode source operands */ - if (swreg_type(lreg) == swreg_type(rreg)) + if (swreg_type(lreg) == swreg_type(rreg) && + swreg_type(lreg) != NN_REG_NONE) return -EFAULT; if (swreg_type(lreg) == NN_REG_GPR_B || -- cgit v1.2.3 From 3239e7bb28a8a4a96c36beeaa6439666e1ed4f8b Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:32:55 -0800 Subject: nfp: bpf: correct the encoding for No-Dest immed When immed is used with No-Dest, the emitter should use reg.dst instead of reg.areg for the destination, using the latter will actually encode register zero. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 54915a3b8a7e..024b44089623 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -224,9 +224,11 @@ emit_immed(struct nfp_prog *nfp_prog, swreg dst, u16 imm, return; } - __emit_immed(nfp_prog, reg.areg, reg.breg, imm >> 8, width, - invert, shift, reg.wr_both, - reg.dst_lmextn, reg.src_lmextn); + /* Use reg.dst when destination is No-Dest. */ + __emit_immed(nfp_prog, + swreg_type(dst) == NN_REG_NONE ? reg.dst : reg.areg, + reg.breg, imm >> 8, width, invert, shift, + reg.wr_both, reg.dst_lmextn, reg.src_lmextn); } static void -- cgit v1.2.3 From 5468a8b929e6276e139405d525c963a56890b5e0 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 30 Nov 2017 21:32:56 -0800 Subject: nfp: bpf: encode indirect commands Add support for emitting commands with field overwrites. Signed-off-by: Jakub Kicinski Signed-off-by: Jiong Wang Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 17 +++++++++++++---- drivers/net/ethernet/netronome/nfp/nfp_asm.h | 3 ++- 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 024b44089623..da4e106d3b16 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -96,7 +96,7 @@ nfp_prog_offset_to_index(struct nfp_prog *nfp_prog, unsigned int offset) /* --- Emitters --- */ static void __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, - u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync) + u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir) { enum cmd_ctx_swap ctx; u64 insn; @@ -114,14 +114,15 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, FIELD_PREP(OP_CMD_CNT, size) | FIELD_PREP(OP_CMD_SIG, sync) | FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | + FIELD_PREP(OP_CMD_INDIR, indir) | FIELD_PREP(OP_CMD_MODE, mode); nfp_prog_push(nfp_prog, insn); } static void -emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, - u8 mode, u8 xfer, swreg lreg, swreg rreg, u8 size, bool sync) +emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, + swreg lreg, swreg rreg, u8 size, bool sync, bool indir) { struct nfp_insn_re_regs reg; int err; @@ -142,7 +143,15 @@ emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, return; } - __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync); + __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync, + indir); +} + +static void +emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, + swreg lreg, swreg rreg, u8 size, bool sync) +{ + emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false); } static void diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index 74d0c11ab2f9..6ff842a15e5d 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2016 Netronome Systems, Inc. + * Copyright (C) 2016-2017 Netronome Systems, Inc. * * This software is dual licensed under the GNU General License Version 2, * June 1991 as shown in the file COPYING in the top-level directory of this @@ -209,6 +209,7 @@ enum alu_dst_ab { #define OP_CMD_CNT 0x0000e000000ULL #define OP_CMD_SIG 0x000f0000000ULL #define OP_CMD_TGT_CMD 0x07f00000000ULL +#define OP_CMD_INDIR 0x20000000000ULL #define OP_CMD_MODE 0x1c0000000000ULL struct cmd_tgt_act { -- cgit v1.2.3 From 5e4d6d20939f8e77b7734c0cea6886dff60c99de Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:32:57 -0800 Subject: nfp: bpf: factor out is_mbpf_load & is_mbpf_store It is usual that we need to check if one BPF insn is for loading/storeing data from/to memory. Therefore, it makes sense to factor out related code to become common helper functions. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/main.h | 10 ++++++++++ drivers/net/ethernet/netronome/nfp/bpf/verifier.c | 4 ++-- 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 99da1d34dd0e..20ef0adb2931 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -144,6 +144,16 @@ static inline u8 mbpf_mode(const struct nfp_insn_meta *meta) return BPF_MODE(meta->insn.code); } +static inline bool is_mbpf_load(const struct nfp_insn_meta *meta) +{ + return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM); +} + +static inline bool is_mbpf_store(const struct nfp_insn_meta *meta) +{ + return (meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM); +} + /** * struct nfp_prog - nfp BPF program * @prog: machine code diff --git a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c index cca67730b91f..d2bf29c90226 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/verifier.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/verifier.c @@ -180,10 +180,10 @@ nfp_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn_idx) if (meta->insn.code == (BPF_JMP | BPF_EXIT)) return nfp_bpf_check_exit(nfp_prog, env); - if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_LDX | BPF_MEM)) + if (is_mbpf_load(meta)) return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.src_reg); - if ((meta->insn.code & ~BPF_SIZE_MASK) == (BPF_STX | BPF_MEM)) + if (is_mbpf_store(meta)) return nfp_bpf_check_ptr(nfp_prog, meta, env, meta->insn.dst_reg); -- cgit v1.2.3 From 9879a3814beb3b1350755475e67a8d92ba1f7e4b Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:32:58 -0800 Subject: nfp: bpf: implement memory bulk copy for length within 32-bytes For NFP, we want to re-group a sequence of load/store pairs lowered from memcpy/memmove into single memory bulk operation which then could be accelerated using NFP CPP bus. This patch extends the existing load/store auxiliary information by adding two new fields: struct bpf_insn *paired_st; s16 ldst_gather_len; Both fields are supposed to be carried by the the load instruction at the head of the sequence. "paired_st" is the corresponding store instruction at the head and "ldst_gather_len" is the gathered length. If "ldst_gather_len" is negative, then the sequence is doing memory load/store in descending order, otherwise it is in ascending order. We need this information to detect overlapped memory access. This patch then optimize memory bulk copy when the copy length is within 32-bytes. The strategy of read/write used is: * Read. Use read32 (direct_ref), always. * Write. - length <= 8-bytes write8 (direct_ref). - length <= 32-bytes and is 4-byte aligned write32 (direct_ref). - length <= 32-bytes but is not 4-byte aligned write8 (indirect_ref). NOTE: the optimization should not change program semantics. The destination register of the last load instruction should contain the same value before and after this optimization. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 113 ++++++++++++++++++++++++++ drivers/net/ethernet/netronome/nfp/bpf/main.h | 4 + drivers/net/ethernet/netronome/nfp/nfp_asm.c | 1 + drivers/net/ethernet/netronome/nfp/nfp_asm.h | 4 + 4 files changed, 122 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index da4e106d3b16..138568c0eee6 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -154,6 +154,13 @@ emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false); } +static void +emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, + swreg lreg, swreg rreg, u8 size, bool sync) +{ + emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true); +} + static void __emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip, enum br_ctx_signal_state css, u16 addr, u8 defer) @@ -515,6 +522,109 @@ static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src) wrp_mov(nfp_prog, reg_both(dst), reg_b(src)); } +/* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the + * result to @dst from low end. + */ +static void +wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len, + u8 offset) +{ + enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE; + u8 mask = (1 << field_len) - 1; + + emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); +} + +/* NFP has Command Push Pull bus which supports bluk memory operations. */ +static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + bool descending_seq = meta->ldst_gather_len < 0; + s16 len = abs(meta->ldst_gather_len); + swreg src_base, off; + unsigned int i; + u8 xfer_num; + + if (WARN_ON_ONCE(len > 32)) + return -EOPNOTSUPP; + + off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + src_base = reg_a(meta->insn.src_reg * 2); + xfer_num = round_up(len, 4) / 4; + + /* Memory read from source addr into transfer-in registers. */ + emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, off, + xfer_num - 1, true); + + /* Move from transfer-in to transfer-out. */ + for (i = 0; i < xfer_num; i++) + wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i)); + + off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog)); + + if (len <= 8) { + /* Use single direct_ref write8. */ + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, len - 1, + true); + } else if (IS_ALIGNED(len, 4)) { + /* Use single direct_ref write32. */ + emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, + true); + } else { + /* Use single indirect_ref write8. */ + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + len - 1, true); + } + + /* TODO: The following extra load is to make sure data flow be identical + * before and after we do memory copy optimization. + * + * The load destination register is not guaranteed to be dead, so we + * need to make sure it is loaded with the value the same as before + * this transformation. + * + * These extra loads could be removed once we have accurate register + * usage information. + */ + if (descending_seq) + xfer_num = 0; + else if (BPF_SIZE(meta->insn.code) != BPF_DW) + xfer_num = xfer_num - 1; + else + xfer_num = xfer_num - 2; + + switch (BPF_SIZE(meta->insn.code)) { + case BPF_B: + wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(xfer_num), 1, + IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1); + break; + case BPF_H: + wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(xfer_num), 2, (len & 3) ^ 2); + break; + case BPF_W: + wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(0)); + break; + case BPF_DW: + wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2), + reg_xfer(xfer_num)); + wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), + reg_xfer(xfer_num + 1)); + break; + } + + if (BPF_SIZE(meta->insn.code) != BPF_DW) + wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0); + + return 0; +} + static int data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) { @@ -1490,6 +1600,9 @@ static int mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size) { + if (meta->ldst_gather_len) + return nfp_cpp_memcpy(nfp_prog, meta); + if (meta->ptr.type == PTR_TO_CTX) { if (nfp_prog->type == BPF_PROG_TYPE_XDP) return mem_ldx_xdp(nfp_prog, meta, size); diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h index 20ef0adb2931..5884291ddba5 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.h +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h @@ -95,6 +95,8 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *); * struct nfp_insn_meta - BPF instruction wrapper * @insn: BPF instruction * @ptr: pointer type for memory operations + * @ldst_gather_len: memcpy length gathered from load/store sequence + * @paired_st: the paired store insn at the head of the sequence * @ptr_not_const: pointer is not always constant * @jmp_dst: destination info for jump instructions * @off: index of first generated machine instruction (in nfp_prog.prog) @@ -109,6 +111,8 @@ struct nfp_insn_meta { union { struct { struct bpf_reg_state ptr; + struct bpf_insn *paired_st; + s16 ldst_gather_len; bool ptr_not_const; }; struct nfp_insn_meta *jmp_dst; diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c index da277386077c..d3610987fb07 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c @@ -41,6 +41,7 @@ const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = { [CMD_TGT_WRITE8_SWAP] = { 0x02, 0x42 }, + [CMD_TGT_WRITE32_SWAP] = { 0x02, 0x5f }, [CMD_TGT_READ8] = { 0x01, 0x43 }, [CMD_TGT_READ32] = { 0x00, 0x5c }, [CMD_TGT_READ32_LE] = { 0x01, 0x5c }, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h index 6ff842a15e5d..98803f9f40b6 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h +++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h @@ -220,6 +220,7 @@ struct cmd_tgt_act { enum cmd_tgt_map { CMD_TGT_READ8, CMD_TGT_WRITE8_SWAP, + CMD_TGT_WRITE32_SWAP, CMD_TGT_READ32, CMD_TGT_READ32_LE, CMD_TGT_READ32_SWAP, @@ -241,6 +242,9 @@ enum cmd_ctx_swap { CMD_CTX_NO_SWAP = 3, }; +#define CMD_OVE_LEN BIT(7) +#define CMD_OV_LEN GENMASK(12, 8) + #define OP_LCSR_BASE 0x0fc00000000ULL #define OP_LCSR_A_SRC 0x000000003ffULL #define OP_LCSR_B_SRC 0x000000ffc00ULL -- cgit v1.2.3 From 8c90053858fce1ca60fab7be03bb61d314ea5c1c Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:32:59 -0800 Subject: nfp: bpf: implement memory bulk copy for length bigger than 32-bytes When the gathered copy length is bigger than 32-bytes and within 128-bytes (the maximum length a single CPP Pull/Push request can finish), the strategy of read/write are changeed into: * Read. - use direct reference mode when length is within 32-bytes. - use indirect mode when length is bigger than 32-bytes. * Write. - length <= 8-bytes use write8 (direct_ref). - length <= 32-byte and 4-bytes aligned use write32 (direct_ref). - length <= 32-bytes but not 4-bytes aligned use write8 (indirect_ref). - length > 32-bytes and 4-bytes aligned use write32 (indirect_ref). - length > 32-bytes and not 4-bytes aligned and <= 40-bytes use write32 (direct_ref) to finish the first 32-bytes. use write8 (direct_ref) to finish all remaining hanging part. - length > 32-bytes and not 4-bytes aligned use write32 (indirect_ref) to finish those 4-byte aligned parts. use write8 (direct_ref) to finish all remaining hanging part. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 52 ++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 7 deletions(-) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 138568c0eee6..1b98ef239605 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -544,16 +544,18 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) unsigned int i; u8 xfer_num; - if (WARN_ON_ONCE(len > 32)) - return -EOPNOTSUPP; - off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); src_base = reg_a(meta->insn.src_reg * 2); xfer_num = round_up(len, 4) / 4; + /* Setup PREV_ALU fields to override memory read length. */ + if (len > 32) + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + /* Memory read from source addr into transfer-in registers. */ - emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, off, - xfer_num - 1, true); + emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, + off, xfer_num - 1, true, len > 32); /* Move from transfer-in to transfer-out. */ for (i = 0; i < xfer_num; i++) @@ -566,18 +568,54 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, len - 1, true); - } else if (IS_ALIGNED(len, 4)) { + } else if (len <= 32 && IS_ALIGNED(len, 4)) { /* Use single direct_ref write32. */ emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, true); - } else { + } else if (len <= 32) { /* Use single indirect_ref write8. */ wrp_immed(nfp_prog, reg_none(), CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, len - 1, true); + } else if (IS_ALIGNED(len, 4)) { + /* Use single indirect_ref write32. */ + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + xfer_num - 1, true); + } else if (len <= 40) { + /* Use one direct_ref write32 to write the first 32-bytes, then + * another direct_ref write8 to write the remaining bytes. + */ + emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, 7, + true); + + off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, + imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, + reg_a(meta->paired_st->dst_reg * 2), off, len - 33, + true); + } else { + /* Use one indirect_ref write32 to write 4-bytes aligned length, + * then another direct_ref write8 to write the remaining bytes. + */ + u8 new_off; + + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + xfer_num - 2, true); + new_off = meta->paired_st->off + (xfer_num - 1) * 4; + off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, + xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, + (len & 0x3) - 1, true); } /* TODO: The following extra load is to make sure data flow be identical -- cgit v1.2.3 From 6bc7103c89bff2b53a159e03b74c8216c79bfef8 Mon Sep 17 00:00:00 2001 From: Jiong Wang Date: Thu, 30 Nov 2017 21:33:00 -0800 Subject: nfp: bpf: detect load/store sequences lowered from memory copy This patch add the optimization frontend, but adding a new eBPF IR scan pass "nfp_bpf_opt_ldst_gather". The pass will traverse the IR to recognize the load/store pairs sequences that come from lowering of memory copy builtins. The gathered memory copy information will be kept in the meta info structure of the first load instruction in the sequence and will be consumed by the optimization backend added in the previous patches. NOTE: a sequence with cross memory access doesn't qualify this optimization, i.e. if one load in the sequence will load from place that has been written by previous store. This is because when we turn the sequence into single CPP operation, we are reading all contents at once into NFP transfer registers, then write them out as a whole. This is not identical with what the original load/store sequence is doing. Detecting cross memory access for two random pointers will be difficult, fortunately under XDP/eBPF's restrictied runtime environment, the copy normally happen among map, packet data and stack, they do not overlap with each other. And for cases supported by NFP, cross memory access will only happen on PTR_TO_PACKET. Fortunately for this, there is ID information that we could do accurate memory alias check. Signed-off-by: Jiong Wang Reviewed-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/bpf/jit.c | 237 +++++++++++++++++++++++++++ 1 file changed, 237 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 1b98ef239605..3419ad495962 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -2352,12 +2352,249 @@ static void nfp_bpf_opt_ld_shift(struct nfp_prog *nfp_prog) } } +/* load/store pair that forms memory copy sould look like the following: + * + * ld_width R, [addr_src + offset_src] + * st_width [addr_dest + offset_dest], R + * + * The destination register of load and source register of store should + * be the same, load and store should also perform at the same width. + * If either of addr_src or addr_dest is stack pointer, we don't do the + * CPP optimization as stack is modelled by registers on NFP. + */ +static bool +curr_pair_is_memcpy(struct nfp_insn_meta *ld_meta, + struct nfp_insn_meta *st_meta) +{ + struct bpf_insn *ld = &ld_meta->insn; + struct bpf_insn *st = &st_meta->insn; + + if (!is_mbpf_load(ld_meta) || !is_mbpf_store(st_meta)) + return false; + + if (ld_meta->ptr.type != PTR_TO_PACKET) + return false; + + if (st_meta->ptr.type != PTR_TO_PACKET) + return false; + + if (BPF_SIZE(ld->code) != BPF_SIZE(st->code)) + return false; + + if (ld->dst_reg != st->src_reg) + return false; + + /* There is jump to the store insn in this pair. */ + if (st_meta->flags & FLAG_INSN_IS_JUMP_DST) + return false; + + return true; +} + +/* Currently, we only support chaining load/store pairs if: + * + * - Their address base registers are the same. + * - Their address offsets are in the same order. + * - They operate at the same memory width. + * - There is no jump into the middle of them. + */ +static bool +curr_pair_chain_with_previous(struct nfp_insn_meta *ld_meta, + struct nfp_insn_meta *st_meta, + struct bpf_insn *prev_ld, + struct bpf_insn *prev_st) +{ + u8 prev_size, curr_size, prev_ld_base, prev_st_base, prev_ld_dst; + struct bpf_insn *ld = &ld_meta->insn; + struct bpf_insn *st = &st_meta->insn; + s16 prev_ld_off, prev_st_off; + + /* This pair is the start pair. */ + if (!prev_ld) + return true; + + prev_size = BPF_LDST_BYTES(prev_ld); + curr_size = BPF_LDST_BYTES(ld); + prev_ld_base = prev_ld->src_reg; + prev_st_base = prev_st->dst_reg; + prev_ld_dst = prev_ld->dst_reg; + prev_ld_off = prev_ld->off; + prev_st_off = prev_st->off; + + if (ld->dst_reg != prev_ld_dst) + return false; + + if (ld->src_reg != prev_ld_base || st->dst_reg != prev_st_base) + return false; + + if (curr_size != prev_size) + return false; + + /* There is jump to the head of this pair. */ + if (ld_meta->flags & FLAG_INSN_IS_JUMP_DST) + return false; + + /* Both in ascending order. */ + if (prev_ld_off + prev_size == ld->off && + prev_st_off + prev_size == st->off) + return true; + + /* Both in descending order. */ + if (ld->off + curr_size == prev_ld_off && + st->off + curr_size == prev_st_off) + return true; + + return false; +} + +/* Return TRUE if cross memory access happens. Cross memory access means + * store area is overlapping with load area that a later load might load + * the value from previous store, for this case we can't treat the sequence + * as an memory copy. + */ +static bool +cross_mem_access(struct bpf_insn *ld, struct nfp_insn_meta *head_ld_meta, + struct nfp_insn_meta *head_st_meta) +{ + s16 head_ld_off, head_st_off, ld_off; + + /* Different pointer types does not overlap. */ + if (head_ld_meta->ptr.type != head_st_meta->ptr.type) + return false; + + /* load and store are both PTR_TO_PACKET, check ID info. */ + if (head_ld_meta->ptr.id != head_st_meta->ptr.id) + return true; + + /* Canonicalize the offsets. Turn all of them against the original + * base register. + */ + head_ld_off = head_ld_meta->insn.off + head_ld_meta->ptr.off; + head_st_off = head_st_meta->insn.off + head_st_meta->ptr.off; + ld_off = ld->off + head_ld_meta->ptr.off; + + /* Ascending order cross. */ + if (ld_off > head_ld_off && + head_ld_off < head_st_off && ld_off >= head_st_off) + return true; + + /* Descending order cross. */ + if (ld_off < head_ld_off && + head_ld_off > head_st_off && ld_off <= head_st_off) + return true; + + return false; +} + +/* This pass try to identify the following instructoin sequences. + * + * load R, [regA + offA] + * store [regB + offB], R + * load R, [regA + offA + const_imm_A] + * store [regB + offB + const_imm_A], R + * load R, [regA + offA + 2 * const_imm_A] + * store [regB + offB + 2 * const_imm_A], R + * ... + * + * Above sequence is typically generated by compiler when lowering + * memcpy. NFP prefer using CPP instructions to accelerate it. + */ +static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *head_ld_meta = NULL; + struct nfp_insn_meta *head_st_meta = NULL; + struct nfp_insn_meta *meta1, *meta2; + struct bpf_insn *prev_ld = NULL; + struct bpf_insn *prev_st = NULL; + u8 count = 0; + + nfp_for_each_insn_walk2(nfp_prog, meta1, meta2) { + struct bpf_insn *ld = &meta1->insn; + struct bpf_insn *st = &meta2->insn; + + /* Reset record status if any of the following if true: + * - The current insn pair is not load/store. + * - The load/store pair doesn't chain with previous one. + * - The chained load/store pair crossed with previous pair. + * - The chained load/store pair has a total size of memory + * copy beyond 128 bytes which is the maximum length a + * single NFP CPP command can transfer. + */ + if (!curr_pair_is_memcpy(meta1, meta2) || + !curr_pair_chain_with_previous(meta1, meta2, prev_ld, + prev_st) || + (head_ld_meta && (cross_mem_access(ld, head_ld_meta, + head_st_meta) || + head_ld_meta->ldst_gather_len >= 128))) { + if (!count) + continue; + + if (count > 1) { + s16 prev_ld_off = prev_ld->off; + s16 prev_st_off = prev_st->off; + s16 head_ld_off = head_ld_meta->insn.off; + + if (prev_ld_off < head_ld_off) { + head_ld_meta->insn.off = prev_ld_off; + head_st_meta->insn.off = prev_st_off; + head_ld_meta->ldst_gather_len = + -head_ld_meta->ldst_gather_len; + } + + head_ld_meta->paired_st = &head_st_meta->insn; + head_st_meta->skip = true; + } else { + head_ld_meta->ldst_gather_len = 0; + } + + /* If the chain is ended by an load/store pair then this + * could serve as the new head of the the next chain. + */ + if (curr_pair_is_memcpy(meta1, meta2)) { + head_ld_meta = meta1; + head_st_meta = meta2; + head_ld_meta->ldst_gather_len = + BPF_LDST_BYTES(ld); + meta1 = nfp_meta_next(meta1); + meta2 = nfp_meta_next(meta2); + prev_ld = ld; + prev_st = st; + count = 1; + } else { + head_ld_meta = NULL; + head_st_meta = NULL; + prev_ld = NULL; + prev_st = NULL; + count = 0; + } + + continue; + } + + if (!head_ld_meta) { + head_ld_meta = meta1; + head_st_meta = meta2; + } else { + meta1->skip = true; + meta2->skip = true; + } + + head_ld_meta->ldst_gather_len += BPF_LDST_BYTES(ld); + meta1 = nfp_meta_next(meta1); + meta2 = nfp_meta_next(meta2); + prev_ld = ld; + prev_st = st; + count++; + } +} + static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) { nfp_bpf_opt_reg_init(nfp_prog); nfp_bpf_opt_ld_mask(nfp_prog); nfp_bpf_opt_ld_shift(nfp_prog); + nfp_bpf_opt_ldst_gather(nfp_prog); return 0; } -- cgit v1.2.3 From 118b4aa25d90d0930611b71dd28a749c67309ccb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Dec 2017 15:08:55 -0800 Subject: net: xdp: avoid output parameters when querying XDP prog The output parameters will get unwieldy if we want to add more information about the program. Simply pass the entire struct netdev_bpf in. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- include/linux/netdevice.h | 3 ++- net/core/dev.c | 24 ++++++++++++++---------- net/core/rtnetlink.c | 6 +++++- 3 files changed, 21 insertions(+), 12 deletions(-) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ef789e1d679e..667bdd3ad33e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3330,7 +3330,8 @@ struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); -u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t xdp_op, u32 *prog_id); +void __dev_xdp_query(struct net_device *dev, bpf_op_t xdp_op, + struct netdev_bpf *xdp); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); diff --git a/net/core/dev.c b/net/core/dev.c index 07ed21d64f92..3f271c9cb5e0 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7073,17 +7073,21 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); -u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op, u32 *prog_id) +void __dev_xdp_query(struct net_device *dev, bpf_op_t bpf_op, + struct netdev_bpf *xdp) { - struct netdev_bpf xdp; - - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG; + memset(xdp, 0, sizeof(*xdp)); + xdp->command = XDP_QUERY_PROG; /* Query must always succeed. */ - WARN_ON(bpf_op(dev, &xdp) < 0); - if (prog_id) - *prog_id = xdp.prog_id; + WARN_ON(bpf_op(dev, xdp) < 0); +} + +static u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t bpf_op) +{ + struct netdev_bpf xdp; + + __dev_xdp_query(dev, bpf_op, &xdp); return xdp.prog_attached; } @@ -7134,10 +7138,10 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, bpf_chk = generic_xdp_install; if (fd >= 0) { - if (bpf_chk && __dev_xdp_attached(dev, bpf_chk, NULL)) + if (bpf_chk && __dev_xdp_attached(dev, bpf_chk)) return -EEXIST; if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && - __dev_xdp_attached(dev, bpf_op, NULL)) + __dev_xdp_attached(dev, bpf_op)) return -EBUSY; prog = bpf_prog_get_type_dev(fd, BPF_PROG_TYPE_XDP, diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dabba2a91fc8..9c4cb584bfb0 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -1261,6 +1261,7 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) { const struct net_device_ops *ops = dev->netdev_ops; const struct bpf_prog *generic_xdp_prog; + struct netdev_bpf xdp; ASSERT_RTNL(); @@ -1273,7 +1274,10 @@ static u8 rtnl_xdp_attached_mode(struct net_device *dev, u32 *prog_id) if (!ops->ndo_bpf) return XDP_ATTACHED_NONE; - return __dev_xdp_attached(dev, ops->ndo_bpf, prog_id); + __dev_xdp_query(dev, ops->ndo_bpf, &xdp); + *prog_id = xdp.prog_id; + + return xdp.prog_attached; } static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) -- cgit v1.2.3 From 92f0292b35a09bb5f12a4184ac86668599bc233b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Dec 2017 15:08:56 -0800 Subject: net: xdp: report flags program was installed with on query Some drivers enforce that flags on program replacement and removal must match the flags passed on install. This leaves the possibility open to enable simultaneous loading of XDP programs both to HW and DRV. Allow such drivers to report the flags back to the stack. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 1 + include/linux/netdevice.h | 2 ++ 2 files changed, 3 insertions(+) diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index 1a603fdd9e80..ea6bbf1efefc 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3392,6 +3392,7 @@ static int nfp_net_xdp(struct net_device *netdev, struct netdev_bpf *xdp) if (nn->dp.bpf_offload_xdp) xdp->prog_attached = XDP_ATTACHED_HW; xdp->prog_id = nn->xdp_prog ? nn->xdp_prog->aux->id : 0; + xdp->flags = nn->xdp_prog ? nn->xdp_flags : 0; return 0; case BPF_OFFLOAD_VERIFIER_PREP: return nfp_app_bpf_verifier_prep(nn->app, nn, xdp); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 667bdd3ad33e..cc4ce7456e38 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -820,6 +820,8 @@ struct netdev_bpf { struct { u8 prog_attached; u32 prog_id; + /* flags with which program was installed */ + u32 prog_flags; }; /* BPF_OFFLOAD_VERIFIER_PREP */ struct { -- cgit v1.2.3 From bd0b2e7fe611953470ec7c533b455fb2abd382cd Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Dec 2017 15:08:57 -0800 Subject: net: xdp: make the stack take care of the tear down Since day one of XDP drivers had to remember to free the program on the remove path. This leads to code duplication and is error prone. Make the stack query the installed programs on unregister and if something is installed, remove the program. Freeing of program attached to XDP generic is moved from free_netdev() as well. Because the remove will now be called before notifiers are invoked, BPF offload state of the program will not get destroyed before uninstall. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/broadcom/bnxt/bnxt.c | 2 -- drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 3 --- drivers/net/ethernet/netronome/nfp/bpf/main.c | 7 ------ .../net/ethernet/netronome/nfp/nfp_net_common.c | 3 --- drivers/net/ethernet/qlogic/qede/qede_main.c | 4 --- drivers/net/tun.c | 4 --- net/core/dev.c | 29 ++++++++++++++++------ 7 files changed, 22 insertions(+), 30 deletions(-) diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c index c5c38d4b7d1c..8c1dd60eab6f 100644 --- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c +++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c @@ -7800,8 +7800,6 @@ static void bnxt_remove_one(struct pci_dev *pdev) bnxt_dcb_free(bp); kfree(bp->edev); bp->edev = NULL; - if (bp->xdp_prog) - bpf_prog_put(bp->xdp_prog); bnxt_cleanup_pci(bp); free_netdev(dev); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index d2b057a3e512..0f5c012de52e 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -4308,9 +4308,6 @@ static void mlx5e_nic_cleanup(struct mlx5e_priv *priv) { mlx5e_ipsec_cleanup(priv); mlx5e_vxlan_cleanup(priv); - - if (priv->channels.params.xdp_prog) - bpf_prog_put(priv->channels.params.xdp_prog); } static int mlx5e_init_nic_rx(struct mlx5e_priv *priv) diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.c b/drivers/net/ethernet/netronome/nfp/bpf/main.c index e379b78e86ef..54bfd7846f6d 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/main.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/main.c @@ -82,12 +82,6 @@ static const char *nfp_bpf_extra_cap(struct nfp_app *app, struct nfp_net *nn) return nfp_net_ebpf_capable(nn) ? "BPF" : ""; } -static void nfp_bpf_vnic_free(struct nfp_app *app, struct nfp_net *nn) -{ - if (nn->dp.bpf_offload_xdp) - nfp_bpf_xdp_offload(app, nn, NULL); -} - static int nfp_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) { @@ -168,7 +162,6 @@ const struct nfp_app_type app_bpf = { .extra_cap = nfp_bpf_extra_cap, .vnic_alloc = nfp_app_nic_vnic_alloc, - .vnic_free = nfp_bpf_vnic_free, .setup_tc = nfp_bpf_setup_tc, .tc_busy = nfp_bpf_tc_busy, diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c index ea6bbf1efefc..ad3e9f6a61e5 100644 --- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c +++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c @@ -3562,9 +3562,6 @@ struct nfp_net *nfp_net_alloc(struct pci_dev *pdev, bool needs_netdev, */ void nfp_net_free(struct nfp_net *nn) { - if (nn->xdp_prog) - bpf_prog_put(nn->xdp_prog); - if (nn->dp.netdev) free_netdev(nn->dp.netdev); else diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c index 8f9b3eb82137..57332b3e5e64 100644 --- a/drivers/net/ethernet/qlogic/qede/qede_main.c +++ b/drivers/net/ethernet/qlogic/qede/qede_main.c @@ -1068,10 +1068,6 @@ static void __qede_remove(struct pci_dev *pdev, enum qede_remove_mode mode) pci_set_drvdata(pdev, NULL); - /* Release edev's reference to XDP's bpf if such exist */ - if (edev->xdp_prog) - bpf_prog_put(edev->xdp_prog); - /* Use global ops since we've freed edev */ qed_ops->common->slowpath_stop(cdev); if (system_state == SYSTEM_POWER_OFF) diff --git a/drivers/net/tun.c b/drivers/net/tun.c index 95749006d687..6746e498dc61 100644 --- a/drivers/net/tun.c +++ b/drivers/net/tun.c @@ -673,7 +673,6 @@ static void tun_detach(struct tun_file *tfile, bool clean) static void tun_detach_all(struct net_device *dev) { struct tun_struct *tun = netdev_priv(dev); - struct bpf_prog *xdp_prog = rtnl_dereference(tun->xdp_prog); struct tun_file *tfile, *tmp; int i, n = tun->numqueues; @@ -708,9 +707,6 @@ static void tun_detach_all(struct net_device *dev) } BUG_ON(tun->numdisabled != 0); - if (xdp_prog) - bpf_prog_put(xdp_prog); - if (tun->flags & IFF_PERSIST) module_put(THIS_MODULE); } diff --git a/net/core/dev.c b/net/core/dev.c index 3f271c9cb5e0..6bea8931bb62 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -7110,6 +7110,27 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, return bpf_op(dev, &xdp); } +static void dev_xdp_uninstall(struct net_device *dev) +{ + struct netdev_bpf xdp; + bpf_op_t ndo_bpf; + + /* Remove generic XDP */ + WARN_ON(dev_xdp_install(dev, generic_xdp_install, NULL, 0, NULL)); + + /* Remove from the driver */ + ndo_bpf = dev->netdev_ops->ndo_bpf; + if (!ndo_bpf) + return; + + __dev_xdp_query(dev, ndo_bpf, &xdp); + if (xdp.prog_attached == XDP_ATTACHED_NONE) + return; + + /* Program removal should always succeed */ + WARN_ON(dev_xdp_install(dev, ndo_bpf, NULL, xdp.prog_flags, NULL)); +} + /** * dev_change_xdp_fd - set or clear a bpf program for a device rx path * @dev: device @@ -7240,6 +7261,7 @@ static void rollback_registered_many(struct list_head *head) /* Shutdown queueing discipline. */ dev_shutdown(dev); + dev_xdp_uninstall(dev); /* Notify protocols, that we are about to destroy * this device. They should clean all the things. @@ -8199,7 +8221,6 @@ EXPORT_SYMBOL(alloc_netdev_mqs); void free_netdev(struct net_device *dev) { struct napi_struct *p, *n; - struct bpf_prog *prog; might_sleep(); netif_free_tx_queues(dev); @@ -8218,12 +8239,6 @@ void free_netdev(struct net_device *dev) free_percpu(dev->pcpu_refcnt); dev->pcpu_refcnt = NULL; - prog = rcu_dereference_protected(dev->xdp_prog, 1); - if (prog) { - bpf_prog_put(prog); - static_key_slow_dec(&generic_xdp_needed); - } - /* Compatibility with error handling in drivers */ if (dev->reg_state == NETREG_UNINITIALIZED) { netdev_freemem(dev); -- cgit v1.2.3 From 83c9e13aa39aed5cf9a2f8dd69770b7c35ba1281 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Dec 2017 15:08:58 -0800 Subject: netdevsim: add software driver for testing offloads To be able to run selftests without any hardware required we need a software model. The model can also serve as an example implementation for those implementing actual HW offloads. The dummy driver have previously been extended to test SR-IOV, but the general consensus seems to be against adding further features to it. Add a new driver for purposes of software modelling only. eBPF and SR-IOV will be added here shortly, others are invited to further extend the driver with their offload models. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- MAINTAINERS | 5 ++ drivers/net/Kconfig | 11 ++++ drivers/net/Makefile | 1 + drivers/net/netdevsim/Makefile | 6 ++ drivers/net/netdevsim/netdev.c | 118 ++++++++++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdevsim.h | 26 +++++++++ 6 files changed, 167 insertions(+) create mode 100644 drivers/net/netdevsim/Makefile create mode 100644 drivers/net/netdevsim/netdev.c create mode 100644 drivers/net/netdevsim/netdevsim.h diff --git a/MAINTAINERS b/MAINTAINERS index 77d819b458a9..010e46a38373 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -9599,6 +9599,11 @@ NETWORKING [WIRELESS] L: linux-wireless@vger.kernel.org Q: http://patchwork.kernel.org/project/linux-wireless/list/ +NETDEVSIM +M: Jakub Kicinski +S: Maintained +F: drivers/net/netdevsim/* + NETXEN (1/10) GbE SUPPORT M: Manish Chopra M: Rahul Verma diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig index 0936da592e12..944ec3c9282c 100644 --- a/drivers/net/Kconfig +++ b/drivers/net/Kconfig @@ -497,4 +497,15 @@ config THUNDERBOLT_NET source "drivers/net/hyperv/Kconfig" +config NETDEVSIM + tristate "Simulated networking device" + depends on DEBUG_FS + help + This driver is a developer testing tool and software model that can + be used to test various control path networking APIs, especially + HW-offload related. + + To compile this driver as a module, choose M here: the module + will be called netdevsim. + endif # NETDEVICES diff --git a/drivers/net/Makefile b/drivers/net/Makefile index 766f62d02a0b..04c3b747812c 100644 --- a/drivers/net/Makefile +++ b/drivers/net/Makefile @@ -78,3 +78,4 @@ obj-$(CONFIG_FUJITSU_ES) += fjes/ thunderbolt-net-y += thunderbolt.o obj-$(CONFIG_THUNDERBOLT_NET) += thunderbolt-net.o +obj-$(CONFIG_NETDEVSIM) += netdevsim/ diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile new file mode 100644 index 000000000000..07867bfe873b --- /dev/null +++ b/drivers/net/netdevsim/Makefile @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0 + +obj-$(CONFIG_NETDEVSIM) += netdevsim.o + +netdevsim-objs := \ + netdev.o \ diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c new file mode 100644 index 000000000000..7599c72c477a --- /dev/null +++ b/drivers/net/netdevsim/netdev.c @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree. + * + * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE + * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME + * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + */ + +#include +#include +#include +#include +#include +#include +#include + +#include "netdevsim.h" + +static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) +{ + struct netdevsim *ns = netdev_priv(dev); + + u64_stats_update_begin(&ns->syncp); + ns->tx_packets++; + ns->tx_bytes += skb->len; + u64_stats_update_end(&ns->syncp); + + dev_kfree_skb(skb); + + return NETDEV_TX_OK; +} + +static void nsim_set_rx_mode(struct net_device *dev) +{ +} + +static void +nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) +{ + struct netdevsim *ns = netdev_priv(dev); + unsigned int start; + + do { + start = u64_stats_fetch_begin(&ns->syncp); + stats->tx_bytes = ns->tx_bytes; + stats->tx_packets = ns->tx_packets; + } while (u64_stats_fetch_retry(&ns->syncp, start)); +} + +static const struct net_device_ops nsim_netdev_ops = { + .ndo_start_xmit = nsim_start_xmit, + .ndo_set_rx_mode = nsim_set_rx_mode, + .ndo_set_mac_address = eth_mac_addr, + .ndo_validate_addr = eth_validate_addr, + .ndo_get_stats64 = nsim_get_stats64, +}; + +static void nsim_setup(struct net_device *dev) +{ + ether_setup(dev); + eth_hw_addr_random(dev); + + dev->netdev_ops = &nsim_netdev_ops; + dev->needs_free_netdev = true; + + dev->tx_queue_len = 0; + dev->flags |= IFF_NOARP; + dev->flags &= ~IFF_MULTICAST; + dev->priv_flags |= IFF_LIVE_ADDR_CHANGE | + IFF_NO_QUEUE; + dev->features |= NETIF_F_HIGHDMA | + NETIF_F_SG | + NETIF_F_FRAGLIST | + NETIF_F_HW_CSUM | + NETIF_F_TSO; + dev->max_mtu = ETH_MAX_MTU; +} + +static int nsim_validate(struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack) +{ + if (tb[IFLA_ADDRESS]) { + if (nla_len(tb[IFLA_ADDRESS]) != ETH_ALEN) + return -EINVAL; + if (!is_valid_ether_addr(nla_data(tb[IFLA_ADDRESS]))) + return -EADDRNOTAVAIL; + } + return 0; +} + +static struct rtnl_link_ops nsim_link_ops __read_mostly = { + .kind = DRV_NAME, + .priv_size = sizeof(struct netdevsim), + .setup = nsim_setup, + .validate = nsim_validate, +}; + +static int __init nsim_module_init(void) +{ + return rtnl_link_register(&nsim_link_ops); +} + +static void __exit nsim_module_exit(void) +{ + rtnl_link_unregister(&nsim_link_ops); +} + +module_init(nsim_module_init); +module_exit(nsim_module_exit); +MODULE_LICENSE("GPL"); +MODULE_ALIAS_RTNL_LINK(DRV_NAME); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h new file mode 100644 index 000000000000..4558c6f11598 --- /dev/null +++ b/drivers/net/netdevsim/netdevsim.h @@ -0,0 +1,26 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree. + * + * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE + * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME + * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + */ + +#include +#include +#include + +#define DRV_NAME "netdevsim" + +struct netdevsim { + u64 tx_packets; + u64 tx_bytes; + struct u64_stats_sync syncp; +}; -- cgit v1.2.3 From 31d3ad832948c75139b0e5b653912f7898a1d5d5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Dec 2017 15:08:59 -0800 Subject: netdevsim: add bpf offload support Add support for loading programs for netdevsim devices and expose the related information via DebugFS. Both offload of XDP and cls_bpf programs is supported. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/netdevsim/Makefile | 1 + drivers/net/netdevsim/bpf.c | 373 ++++++++++++++++++++++++++++++++++++++ drivers/net/netdevsim/netdev.c | 116 +++++++++++- drivers/net/netdevsim/netdevsim.h | 40 ++++ 4 files changed, 529 insertions(+), 1 deletion(-) create mode 100644 drivers/net/netdevsim/bpf.c diff --git a/drivers/net/netdevsim/Makefile b/drivers/net/netdevsim/Makefile index 07867bfe873b..074ddebbc41d 100644 --- a/drivers/net/netdevsim/Makefile +++ b/drivers/net/netdevsim/Makefile @@ -4,3 +4,4 @@ obj-$(CONFIG_NETDEVSIM) += netdevsim.o netdevsim-objs := \ netdev.o \ + bpf.o \ diff --git a/drivers/net/netdevsim/bpf.c b/drivers/net/netdevsim/bpf.c new file mode 100644 index 000000000000..8e4398a50903 --- /dev/null +++ b/drivers/net/netdevsim/bpf.c @@ -0,0 +1,373 @@ +/* + * Copyright (C) 2017 Netronome Systems, Inc. + * + * This software is licensed under the GNU General License Version 2, + * June 1991 as shown in the file COPYING in the top-level directory of this + * source tree. + * + * THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS + * FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE + * OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME + * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + */ + +#include +#include +#include +#include +#include +#include + +#include "netdevsim.h" + +struct nsim_bpf_bound_prog { + struct netdevsim *ns; + struct bpf_prog *prog; + struct dentry *ddir; + const char *state; + bool is_loaded; + struct list_head l; +}; + +static int nsim_debugfs_bpf_string_read(struct seq_file *file, void *data) +{ + const char **str = file->private; + + if (*str) + seq_printf(file, "%s\n", *str); + + return 0; +} + +static int nsim_debugfs_bpf_string_open(struct inode *inode, struct file *f) +{ + return single_open(f, nsim_debugfs_bpf_string_read, inode->i_private); +} + +static const struct file_operations nsim_bpf_string_fops = { + .owner = THIS_MODULE, + .open = nsim_debugfs_bpf_string_open, + .release = single_release, + .read = seq_read, + .llseek = seq_lseek +}; + +static int +nsim_bpf_verify_insn(struct bpf_verifier_env *env, int insn_idx, int prev_insn) +{ + struct nsim_bpf_bound_prog *state; + + state = env->prog->aux->offload->dev_priv; + if (state->ns->bpf_bind_verifier_delay && !insn_idx) + msleep(state->ns->bpf_bind_verifier_delay); + + return 0; +} + +static const struct bpf_ext_analyzer_ops nsim_bpf_analyzer_ops = { + .insn_hook = nsim_bpf_verify_insn, +}; + +static bool nsim_xdp_offload_active(struct netdevsim *ns) +{ + return ns->xdp_prog_mode == XDP_ATTACHED_HW; +} + +static void nsim_prog_set_loaded(struct bpf_prog *prog, bool loaded) +{ + struct nsim_bpf_bound_prog *state; + + if (!prog || !prog->aux->offload) + return; + + state = prog->aux->offload->dev_priv; + state->is_loaded = loaded; +} + +static int +nsim_bpf_offload(struct netdevsim *ns, struct bpf_prog *prog, bool oldprog) +{ + nsim_prog_set_loaded(ns->bpf_offloaded, false); + + WARN(!!ns->bpf_offloaded != oldprog, + "bad offload state, expected offload %sto be active", + oldprog ? "" : "not "); + ns->bpf_offloaded = prog; + ns->bpf_offloaded_id = prog ? prog->aux->id : 0; + nsim_prog_set_loaded(prog, true); + + return 0; +} + +int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv) +{ + struct tc_cls_bpf_offload *cls_bpf = type_data; + struct bpf_prog *prog = cls_bpf->prog; + struct netdevsim *ns = cb_priv; + bool skip_sw; + + if (type != TC_SETUP_CLSBPF || + !tc_can_offload(ns->netdev) || + cls_bpf->common.protocol != htons(ETH_P_ALL) || + cls_bpf->common.chain_index) + return -EOPNOTSUPP; + + skip_sw = cls_bpf->gen_flags & TCA_CLS_FLAGS_SKIP_SW; + + if (nsim_xdp_offload_active(ns)) + return -EBUSY; + + if (!ns->bpf_tc_accept) + return -EOPNOTSUPP; + /* Note: progs without skip_sw will probably not be dev bound */ + if (prog && !prog->aux->offload && !ns->bpf_tc_non_bound_accept) + return -EOPNOTSUPP; + + switch (cls_bpf->command) { + case TC_CLSBPF_REPLACE: + return nsim_bpf_offload(ns, prog, true); + case TC_CLSBPF_ADD: + return nsim_bpf_offload(ns, prog, false); + case TC_CLSBPF_DESTROY: + return nsim_bpf_offload(ns, NULL, true); + default: + return -EOPNOTSUPP; + } +} + +int nsim_bpf_disable_tc(struct netdevsim *ns) +{ + if (ns->bpf_offloaded && !nsim_xdp_offload_active(ns)) + return -EBUSY; + return 0; +} + +static int nsim_xdp_offload_prog(struct netdevsim *ns, struct netdev_bpf *bpf) +{ + if (!nsim_xdp_offload_active(ns) && !bpf->prog) + return 0; + if (!nsim_xdp_offload_active(ns) && bpf->prog && ns->bpf_offloaded) { + NSIM_EA(bpf->extack, "TC program is already loaded"); + return -EBUSY; + } + + return nsim_bpf_offload(ns, bpf->prog, nsim_xdp_offload_active(ns)); +} + +static int nsim_xdp_set_prog(struct netdevsim *ns, struct netdev_bpf *bpf) +{ + int err; + + if (ns->xdp_prog && (bpf->flags ^ ns->xdp_flags) & XDP_FLAGS_MODES) { + NSIM_EA(bpf->extack, "program loaded with different flags"); + return -EBUSY; + } + + if (bpf->command == XDP_SETUP_PROG && !ns->bpf_xdpdrv_accept) { + NSIM_EA(bpf->extack, "driver XDP disabled in DebugFS"); + return -EOPNOTSUPP; + } + if (bpf->command == XDP_SETUP_PROG_HW && !ns->bpf_xdpoffload_accept) { + NSIM_EA(bpf->extack, "XDP offload disabled in DebugFS"); + return -EOPNOTSUPP; + } + + if (bpf->command == XDP_SETUP_PROG_HW) { + err = nsim_xdp_offload_prog(ns, bpf); + if (err) + return err; + } + + if (ns->xdp_prog) + bpf_prog_put(ns->xdp_prog); + + ns->xdp_prog = bpf->prog; + ns->xdp_flags = bpf->flags; + + if (!bpf->prog) + ns->xdp_prog_mode = XDP_ATTACHED_NONE; + else if (bpf->command == XDP_SETUP_PROG) + ns->xdp_prog_mode = XDP_ATTACHED_DRV; + else + ns->xdp_prog_mode = XDP_ATTACHED_HW; + + return 0; +} + +int nsim_bpf_create_prog(struct netdevsim *ns, struct bpf_prog *prog) +{ + struct nsim_bpf_bound_prog *state; + char name[16]; + int err; + + state = kzalloc(sizeof(*state), GFP_KERNEL); + if (!state) + return -ENOMEM; + + state->ns = ns; + state->prog = prog; + state->state = "verify"; + + /* Program id is not populated yet when we create the state. */ + sprintf(name, "%u", ns->prog_id_gen++); + state->ddir = debugfs_create_dir(name, ns->ddir_bpf_bound_progs); + if (IS_ERR(state->ddir)) { + err = PTR_ERR(state->ddir); + kfree(state); + return err; + } + + debugfs_create_u32("id", 0400, state->ddir, &prog->aux->id); + debugfs_create_file("state", 0400, state->ddir, + &state->state, &nsim_bpf_string_fops); + debugfs_create_bool("loaded", 0400, state->ddir, &state->is_loaded); + + list_add_tail(&state->l, &ns->bpf_bound_progs); + + prog->aux->offload->dev_priv = state; + + return 0; +} + +void nsim_bpf_destroy_prog(struct bpf_prog *prog) +{ + struct nsim_bpf_bound_prog *state; + + state = prog->aux->offload->dev_priv; + WARN(state->is_loaded, + "offload state destroyed while program still bound"); + debugfs_remove_recursive(state->ddir); + list_del(&state->l); + kfree(state); +} + +static int nsim_setup_prog_checks(struct netdevsim *ns, struct netdev_bpf *bpf) +{ + if (bpf->prog && bpf->prog->aux->offload) { + NSIM_EA(bpf->extack, "attempt to load offloaded prog to drv"); + return -EINVAL; + } + if (ns->netdev->mtu > NSIM_XDP_MAX_MTU) { + NSIM_EA(bpf->extack, "MTU too large w/ XDP enabled"); + return -EINVAL; + } + if (nsim_xdp_offload_active(ns)) { + NSIM_EA(bpf->extack, "xdp offload active, can't load drv prog"); + return -EBUSY; + } + return 0; +} + +static int +nsim_setup_prog_hw_checks(struct netdevsim *ns, struct netdev_bpf *bpf) +{ + struct nsim_bpf_bound_prog *state; + + if (!bpf->prog) + return 0; + + if (!bpf->prog->aux->offload) { + NSIM_EA(bpf->extack, "xdpoffload of non-bound program"); + return -EINVAL; + } + if (bpf->prog->aux->offload->netdev != ns->netdev) { + NSIM_EA(bpf->extack, "program bound to different dev"); + return -EINVAL; + } + + state = bpf->prog->aux->offload->dev_priv; + if (WARN_ON(strcmp(state->state, "xlated"))) { + NSIM_EA(bpf->extack, "offloading program in bad state"); + return -EINVAL; + } + return 0; +} + +int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf) +{ + struct netdevsim *ns = netdev_priv(dev); + struct nsim_bpf_bound_prog *state; + int err; + + ASSERT_RTNL(); + + switch (bpf->command) { + case BPF_OFFLOAD_VERIFIER_PREP: + if (!ns->bpf_bind_accept) + return -EOPNOTSUPP; + + err = nsim_bpf_create_prog(ns, bpf->verifier.prog); + if (err) + return err; + + bpf->verifier.ops = &nsim_bpf_analyzer_ops; + return 0; + case BPF_OFFLOAD_TRANSLATE: + state = bpf->offload.prog->aux->offload->dev_priv; + + state->state = "xlated"; + return 0; + case BPF_OFFLOAD_DESTROY: + nsim_bpf_destroy_prog(bpf->offload.prog); + return 0; + case XDP_QUERY_PROG: + bpf->prog_attached = ns->xdp_prog_mode; + bpf->prog_id = ns->xdp_prog ? ns->xdp_prog->aux->id : 0; + bpf->prog_flags = ns->xdp_prog ? ns->xdp_flags : 0; + return 0; + case XDP_SETUP_PROG: + err = nsim_setup_prog_checks(ns, bpf); + if (err) + return err; + + return nsim_xdp_set_prog(ns, bpf); + case XDP_SETUP_PROG_HW: + err = nsim_setup_prog_hw_checks(ns, bpf); + if (err) + return err; + + return nsim_xdp_set_prog(ns, bpf); + default: + return -EINVAL; + } +} + +int nsim_bpf_init(struct netdevsim *ns) +{ + INIT_LIST_HEAD(&ns->bpf_bound_progs); + + debugfs_create_u32("bpf_offloaded_id", 0400, ns->ddir, + &ns->bpf_offloaded_id); + + ns->bpf_bind_accept = true; + debugfs_create_bool("bpf_bind_accept", 0600, ns->ddir, + &ns->bpf_bind_accept); + debugfs_create_u32("bpf_bind_verifier_delay", 0600, ns->ddir, + &ns->bpf_bind_verifier_delay); + ns->ddir_bpf_bound_progs = + debugfs_create_dir("bpf_bound_progs", ns->ddir); + + ns->bpf_tc_accept = true; + debugfs_create_bool("bpf_tc_accept", 0600, ns->ddir, + &ns->bpf_tc_accept); + debugfs_create_bool("bpf_tc_non_bound_accept", 0600, ns->ddir, + &ns->bpf_tc_non_bound_accept); + ns->bpf_xdpdrv_accept = true; + debugfs_create_bool("bpf_xdpdrv_accept", 0600, ns->ddir, + &ns->bpf_xdpdrv_accept); + ns->bpf_xdpoffload_accept = true; + debugfs_create_bool("bpf_xdpoffload_accept", 0600, ns->ddir, + &ns->bpf_xdpoffload_accept); + + return 0; +} + +void nsim_bpf_uninit(struct netdevsim *ns) +{ + WARN_ON(!list_empty(&ns->bpf_bound_progs)); + WARN_ON(ns->xdp_prog); + WARN_ON(ns->bpf_offloaded); +} diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 7599c72c477a..828c1ce49a8b 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -13,16 +13,45 @@ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. */ +#include #include #include #include #include #include #include +#include #include #include "netdevsim.h" +static int nsim_init(struct net_device *dev) +{ + struct netdevsim *ns = netdev_priv(dev); + int err; + + ns->netdev = dev; + ns->ddir = debugfs_create_dir(netdev_name(dev), nsim_ddir); + + err = nsim_bpf_init(ns); + if (err) + goto err_debugfs_destroy; + + return 0; + +err_debugfs_destroy: + debugfs_remove_recursive(ns->ddir); + return err; +} + +static void nsim_uninit(struct net_device *dev) +{ + struct netdevsim *ns = netdev_priv(dev); + + debugfs_remove_recursive(ns->ddir); + nsim_bpf_uninit(ns); +} + static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); @@ -41,6 +70,19 @@ static void nsim_set_rx_mode(struct net_device *dev) { } +static int nsim_change_mtu(struct net_device *dev, int new_mtu) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (ns->xdp_prog_mode == XDP_ATTACHED_DRV && + new_mtu > NSIM_XDP_MAX_MTU) + return -EBUSY; + + dev->mtu = new_mtu; + + return 0; +} + static void nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) { @@ -54,12 +96,66 @@ nsim_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats) } while (u64_stats_fetch_retry(&ns->syncp, start)); } +static int +nsim_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv) +{ + return nsim_bpf_setup_tc_block_cb(type, type_data, cb_priv); +} + +static int +nsim_setup_tc_block(struct net_device *dev, struct tc_block_offload *f) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (f->binder_type != TCF_BLOCK_BINDER_TYPE_CLSACT_INGRESS) + return -EOPNOTSUPP; + + switch (f->command) { + case TC_BLOCK_BIND: + return tcf_block_cb_register(f->block, nsim_setup_tc_block_cb, + ns, ns); + case TC_BLOCK_UNBIND: + tcf_block_cb_unregister(f->block, nsim_setup_tc_block_cb, ns); + return 0; + default: + return -EOPNOTSUPP; + } +} + +static int +nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) +{ + switch (type) { + case TC_SETUP_BLOCK: + return nsim_setup_tc_block(dev, type_data); + default: + return -EOPNOTSUPP; + } +} + +static int +nsim_set_features(struct net_device *dev, netdev_features_t features) +{ + struct netdevsim *ns = netdev_priv(dev); + + if ((dev->features & NETIF_F_HW_TC) > (features & NETIF_F_HW_TC)) + return nsim_bpf_disable_tc(ns); + + return 0; +} + static const struct net_device_ops nsim_netdev_ops = { + .ndo_init = nsim_init, + .ndo_uninit = nsim_uninit, .ndo_start_xmit = nsim_start_xmit, .ndo_set_rx_mode = nsim_set_rx_mode, .ndo_set_mac_address = eth_mac_addr, .ndo_validate_addr = eth_validate_addr, + .ndo_change_mtu = nsim_change_mtu, .ndo_get_stats64 = nsim_get_stats64, + .ndo_setup_tc = nsim_setup_tc, + .ndo_set_features = nsim_set_features, + .ndo_bpf = nsim_bpf, }; static void nsim_setup(struct net_device *dev) @@ -80,6 +176,7 @@ static void nsim_setup(struct net_device *dev) NETIF_F_FRAGLIST | NETIF_F_HW_CSUM | NETIF_F_TSO; + dev->hw_features |= NETIF_F_HW_TC; dev->max_mtu = ETH_MAX_MTU; } @@ -102,14 +199,31 @@ static struct rtnl_link_ops nsim_link_ops __read_mostly = { .validate = nsim_validate, }; +struct dentry *nsim_ddir; + static int __init nsim_module_init(void) { - return rtnl_link_register(&nsim_link_ops); + int err; + + nsim_ddir = debugfs_create_dir(DRV_NAME, NULL); + if (IS_ERR(nsim_ddir)) + return PTR_ERR(nsim_ddir); + + err = rtnl_link_register(&nsim_link_ops); + if (err) + goto err_debugfs_destroy; + + return 0; + +err_debugfs_destroy: + debugfs_remove_recursive(nsim_ddir); + return err; } static void __exit nsim_module_exit(void) { rtnl_link_unregister(&nsim_link_ops); + debugfs_remove_recursive(nsim_ddir); } module_init(nsim_module_init); diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 4558c6f11598..8779e6a8f885 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -14,13 +14,53 @@ */ #include +#include #include #include #define DRV_NAME "netdevsim" +#define NSIM_XDP_MAX_MTU 4000 + +#define NSIM_EA(extack, msg) NL_SET_ERR_MSG_MOD((extack), msg) + +struct bpf_prog; +struct dentry; + struct netdevsim { + struct net_device *netdev; + u64 tx_packets; u64 tx_bytes; struct u64_stats_sync syncp; + + struct dentry *ddir; + + struct bpf_prog *bpf_offloaded; + u32 bpf_offloaded_id; + + u32 xdp_flags; + int xdp_prog_mode; + struct bpf_prog *xdp_prog; + + u32 prog_id_gen; + + bool bpf_bind_accept; + u32 bpf_bind_verifier_delay; + struct dentry *ddir_bpf_bound_progs; + struct list_head bpf_bound_progs; + + bool bpf_tc_accept; + bool bpf_tc_non_bound_accept; + bool bpf_xdpdrv_accept; + bool bpf_xdpoffload_accept; }; + +extern struct dentry *nsim_ddir; + +int nsim_bpf_init(struct netdevsim *ns); +void nsim_bpf_uninit(struct netdevsim *ns); +int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf); +int nsim_bpf_disable_tc(struct netdevsim *ns); +int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, + void *type_data, void *cb_priv); -- cgit v1.2.3 From 417ec26477a5c19abc72dd0298f48ebe5d2db43a Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Dec 2017 15:09:00 -0800 Subject: selftests/bpf: add offload test based on netdevsim Add a test of BPF offload control path interfaces based on just-added netdevsim driver. Perform various checks of both the stack and the expected driver behaviour. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- tools/testing/selftests/bpf/Makefile | 5 +- tools/testing/selftests/bpf/sample_ret0.c | 7 + tools/testing/selftests/bpf/test_offload.py | 681 ++++++++++++++++++++++++++++ 3 files changed, 691 insertions(+), 2 deletions(-) create mode 100644 tools/testing/selftests/bpf/sample_ret0.c create mode 100755 tools/testing/selftests/bpf/test_offload.py diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 333a48655ee0..2c9d8c63c6fa 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -17,9 +17,10 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test TEST_GEN_FILES = test_pkt_access.o test_xdp.o test_l4lb.o test_tcp_estats.o test_obj_id.o \ test_pkt_md_access.o test_xdp_redirect.o test_xdp_meta.o sockmap_parse_prog.o \ - sockmap_verdict_prog.o dev_cgroup.o + sockmap_verdict_prog.o dev_cgroup.o sample_ret0.o -TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh +TEST_PROGS := test_kmod.sh test_xdp_redirect.sh test_xdp_meta.sh \ + test_offload.py include ../lib.mk diff --git a/tools/testing/selftests/bpf/sample_ret0.c b/tools/testing/selftests/bpf/sample_ret0.c new file mode 100644 index 000000000000..fec99750d6ea --- /dev/null +++ b/tools/testing/selftests/bpf/sample_ret0.c @@ -0,0 +1,7 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ + +/* Sample program which should always load for testing control paths. */ +int func() +{ + return 0; +} diff --git a/tools/testing/selftests/bpf/test_offload.py b/tools/testing/selftests/bpf/test_offload.py new file mode 100755 index 000000000000..3914f7a4585a --- /dev/null +++ b/tools/testing/selftests/bpf/test_offload.py @@ -0,0 +1,681 @@ +#!/usr/bin/python3 + +# Copyright (C) 2017 Netronome Systems, Inc. +# +# This software is licensed under the GNU General License Version 2, +# June 1991 as shown in the file COPYING in the top-level directory of this +# source tree. +# +# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE +# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +from datetime import datetime +import argparse +import json +import os +import pprint +import subprocess +import time + +logfile = None +log_level = 1 +bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) +pp = pprint.PrettyPrinter() +devs = [] # devices we created for clean up +files = [] # files to be removed + +def log_get_sec(level=0): + return "*" * (log_level + level) + +def log_level_inc(add=1): + global log_level + log_level += add + +def log_level_dec(sub=1): + global log_level + log_level -= sub + +def log_level_set(level): + global log_level + log_level = level + +def log(header, data, level=None): + """ + Output to an optional log. + """ + if logfile is None: + return + if level is not None: + log_level_set(level) + + if not isinstance(data, str): + data = pp.pformat(data) + + if len(header): + logfile.write("\n" + log_get_sec() + " ") + logfile.write(header) + if len(header) and len(data.strip()): + logfile.write("\n") + logfile.write(data) + +def skip(cond, msg): + if not cond: + return + print("SKIP: " + msg) + log("SKIP: " + msg, "", level=1) + os.sys.exit(0) + +def fail(cond, msg): + if not cond: + return + print("FAIL: " + msg) + log("FAIL: " + msg, "", level=1) + os.sys.exit(1) + +def start_test(msg): + log(msg, "", level=1) + log_level_inc() + print(msg) + +def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True): + """ + Run a command in subprocess and return tuple of (retval, stdout); + optionally return stderr as well as third value. + """ + proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if background: + msg = "%s START: %s" % (log_get_sec(1), + datetime.now().strftime("%H:%M:%S.%f")) + log("BKG " + proc.args, msg) + return proc + + return cmd_result(proc, include_stderr=include_stderr, fail=fail) + +def cmd_result(proc, include_stderr=False, fail=False): + stdout, stderr = proc.communicate() + stdout = stdout.decode("utf-8") + stderr = stderr.decode("utf-8") + proc.stdout.close() + proc.stderr.close() + + stderr = "\n" + stderr + if stderr[-1] == "\n": + stderr = stderr[:-1] + + sec = log_get_sec(1) + log("CMD " + proc.args, + "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" % + (proc.returncode, sec, stdout, sec, stderr, + sec, datetime.now().strftime("%H:%M:%S.%f"))) + + if proc.returncode != 0 and fail: + if len(stderr) > 0 and stderr[-1] == "\n": + stderr = stderr[:-1] + raise Exception("Command failed: %s\n%s" % (proc.args, stderr)) + + if include_stderr: + return proc.returncode, stdout, stderr + else: + return proc.returncode, stdout + +def rm(f): + cmd("rm -f %s" % (f)) + if f in files: + files.remove(f) + +def tool(name, args, flags, JSON=True, fail=True): + params = "" + if JSON: + params += "%s " % (flags["json"]) + + ret, out = cmd(name + " " + params + args, fail=fail) + if JSON and len(out.strip()) != 0: + return ret, json.loads(out) + else: + return ret, out + +def bpftool(args, JSON=True, fail=True): + return tool("bpftool", args, {"json":"-p"}, JSON=JSON, fail=fail) + +def bpftool_prog_list(expected=None): + _, progs = bpftool("prog show", JSON=True, fail=True) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs loaded, expected %d" % + (len(progs), expected)) + return progs + +def bpftool_prog_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nprogs = len(bpftool_prog_list()) + if nprogs == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) + +def ip(args, force=False, JSON=True, fail=True): + if force: + args = "-force " + args + return tool("ip", args, {"json":"-j"}, JSON=JSON, fail=fail) + +def tc(args, JSON=True, fail=True): + return tool("tc", args, {"json":"-p"}, JSON=JSON, fail=fail) + +def ethtool(dev, opt, args, fail=True): + return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) + +def bpf_obj(name, sec=".text", path=bpf_test_dir,): + return "obj %s sec %s" % (os.path.join(path, name), sec) + +def bpf_pinned(name): + return "pinned %s" % (name) + +def bpf_bytecode(bytecode): + return "bytecode \"%s\"" % (bytecode) + +class DebugfsDir: + """ + Class for accessing DebugFS directories as a dictionary. + """ + + def __init__(self, path): + self.path = path + self._dict = self._debugfs_dir_read(path) + + def __len__(self): + return len(self._dict.keys()) + + def __getitem__(self, key): + if type(key) is int: + key = list(self._dict.keys())[key] + return self._dict[key] + + def __setitem__(self, key, value): + log("DebugFS set %s = %s" % (key, value), "") + log_level_inc() + + cmd("echo '%s' > %s/%s" % (value, self.path, key)) + log_level_dec() + + _, out = cmd('cat %s/%s' % (self.path, key)) + self._dict[key] = out.strip() + + def _debugfs_dir_read(self, path): + dfs = {} + + log("DebugFS state for %s" % (path), "") + log_level_inc(add=2) + + _, out = cmd('ls ' + path) + for f in out.split(): + p = os.path.join(path, f) + if os.path.isfile(p): + _, out = cmd('cat %s/%s' % (path, f)) + dfs[f] = out.strip() + elif os.path.isdir(p): + dfs[f] = DebugfsDir(p) + else: + raise Exception("%s is neither file nor directory" % (p)) + + log_level_dec() + log("DebugFS state", dfs) + log_level_dec() + + return dfs + +class NetdevSim: + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self): + self.dev = self._netdevsim_create() + devs.append(self) + + self.dfs_dir = '/sys/kernel/debug/netdevsim/%s' % (self.dev['ifname']) + self.dfs_refresh() + + def __getitem__(self, key): + return self.dev[key] + + def _netdevsim_create(self): + _, old = ip("link show") + ip("link add sim%d type netdevsim") + _, new = ip("link show") + + for dev in new: + f = filter(lambda x: x["ifname"] == dev["ifname"], old) + if len(list(f)) == 0: + return dev + + raise Exception("failed to create netdevsim device") + + def remove(self): + devs.remove(self) + ip("link del dev %s" % (self.dev["ifname"])) + + def dfs_refresh(self): + self.dfs = DebugfsDir(self.dfs_dir) + return self.dfs + + def dfs_num_bound_progs(self): + path = os.path.join(self.dfs_dir, "bpf_bound_progs") + _, progs = cmd('ls %s' % (path)) + return len(progs.split()) + + def dfs_get_bound_progs(self, expected): + progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs")) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs bound, expected %d" % + (len(progs), expected)) + return progs + + def wait_for_flush(self, bound=0, total=0, n_retry=20): + for i in range(n_retry): + nbound = self.dfs_num_bound_progs() + nprogs = len(bpftool_prog_list()) + if nbound == bound and nprogs == total: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) + + def set_mtu(self, mtu, fail=True): + return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), + fail=fail) + + def set_xdp(self, bpf, mode, force=False, fail=True): + return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), + force=force, fail=fail) + + def unset_xdp(self, mode, force=False, fail=True): + return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), + force=force, fail=fail) + + def ip_link_show(self, xdp): + _, link = ip("link show dev %s" % (self['ifname'])) + if len(link) > 1: + raise Exception("Multiple objects on ip link show") + if len(link) < 1: + return {} + fail(xdp != "xdp" in link, + "XDP program not reporting in iplink (reported %s, expected %s)" % + ("xdp" in link, xdp)) + return link[0] + + def tc_add_ingress(self): + tc("qdisc add dev %s ingress" % (self['ifname'])) + + def tc_del_ingress(self): + tc("qdisc del dev %s ingress" % (self['ifname'])) + + def tc_flush_filters(self, bound=0, total=0): + self.tc_del_ingress() + self.tc_add_ingress() + self.wait_for_flush(bound=bound, total=total) + + def tc_show_ingress(self, expected=None): + # No JSON support, oh well... + flags = ["skip_sw", "skip_hw", "in_hw"] + named = ["protocol", "pref", "chain", "handle", "id", "tag"] + + args = "-s filter show dev %s ingress" % (self['ifname']) + _, out = tc(args, JSON=False) + + filters = [] + lines = out.split('\n') + for line in lines: + words = line.split() + if "handle" not in words: + continue + fltr = {} + for flag in flags: + fltr[flag] = flag in words + for name in named: + try: + idx = words.index(name) + fltr[name] = words[idx + 1] + except ValueError: + pass + filters.append(fltr) + + if expected is not None: + fail(len(filters) != expected, + "%d ingress filters loaded, expected %d" % + (len(filters), expected)) + return filters + + def cls_bpf_add_filter(self, bpf, da=False, skip_sw=False, skip_hw=False, + fail=True): + params = "" + if da: + params += " da" + if skip_sw: + params += " skip_sw" + if skip_hw: + params += " skip_hw" + return tc("filter add dev %s ingress bpf %s %s" % + (self['ifname'], bpf, params), fail=fail) + + def set_ethtool_tc_offloads(self, enable, fail=True): + args = "hw-tc-offload %s" % ("on" if enable else "off") + return ethtool(self, "-K", args, fail=fail) + +################################################################################ +def clean_up(): + for dev in devs: + dev.remove() + for f in files: + cmd("rm -f %s" % (f)) + +def pin_prog(file_name, idx=0): + progs = bpftool_prog_list(expected=(idx + 1)) + prog = progs[idx] + bpftool("prog pin id %d %s" % (prog["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +# Parse command line +parser = argparse.ArgumentParser() +parser.add_argument("--log", help="output verbose log to given file") +args = parser.parse_args() +if args.log: + logfile = open(args.log, 'w+') + logfile.write("# -*-Org-*-") + +log("Prepare...", "", level=1) +log_level_inc() + +# Check permissions +skip(os.getuid() != 0, "test must be run as root") + +# Check tools +ret, progs = bpftool("prog", fail=False) +skip(ret != 0, "bpftool not installed") +# Check no BPF programs are loaded +skip(len(progs) != 0, "BPF programs already loaded on the system") + +# Check netdevsim +ret, out = cmd("modprobe netdevsim", fail=False) +skip(ret != 0, "netdevsim module could not be loaded") + +# Check debugfs +_, out = cmd("mount") +if out.find("/sys/kernel/debug type debugfs") == -1: + cmd("mount -t debugfs none /sys/kernel/debug") + +# Check samples are compiled +samples = ["sample_ret0.o"] +for s in samples: + ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) + skip(ret != 0, "sample %s/%s not found, please compile it" % + (bpf_test_dir, s)) + +try: + obj = bpf_obj("sample_ret0.o") + bytecode = bpf_bytecode("1,6 0 0 4294967295,") + + start_test("Test destruction of generic XDP...") + sim = NetdevSim() + sim.set_xdp(obj, "generic") + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.tc_add_ingress() + + start_test("Test TC non-offloaded...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False) + fail(ret != 0, "Software TC filter did not load") + + start_test("Test TC non-offloaded isn't getting bound...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + + sim.tc_flush_filters() + + start_test("Test TC offloads are off by default...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + fail(ret == 0, "TC filter loaded without enabling TC offloads") + sim.wait_for_flush() + + sim.set_ethtool_tc_offloads(True) + sim.dfs["bpf_tc_non_bound_accept"] = "Y" + + start_test("Test TC offload by default...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Filter not offloaded by default") + + sim.tc_flush_filters() + + start_test("Test TC cBPF bytcode tries offload by default...") + ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False) + fail(ret != 0, "Software TC filter did not load") + sim.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Bytecode not offloaded by default") + + sim.tc_flush_filters() + sim.dfs["bpf_tc_non_bound_accept"] = "N" + + start_test("Test TC cBPF unbound bytecode doesn't offload...") + ret, _ = sim.cls_bpf_add_filter(bytecode, skip_sw=True, fail=False) + fail(ret == 0, "TC bytecode loaded for offload") + sim.wait_for_flush() + + start_test("Test TC offloads work...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + fail(ret != 0, "TC filter did not load with TC offloads enabled") + + start_test("Test TC offload basics...") + dfs = sim.dfs_get_bound_progs(expected=1) + progs = bpftool_prog_list(expected=1) + ingress = sim.tc_show_ingress(expected=1) + + dprog = dfs[0] + prog = progs[0] + fltr = ingress[0] + fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter") + fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter") + fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back") + + start_test("Test TC offload is device-bound...") + fail(str(prog["id"]) != fltr["id"], "Program IDs don't match") + fail(prog["tag"] != fltr["tag"], "Program tags don't match") + fail(fltr["id"] != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test disabling TC offloads is rejected while filters installed...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...") + + start_test("Test qdisc removal frees things...") + sim.tc_flush_filters() + sim.tc_show_ingress(expected=0) + + start_test("Test disabling TC offloads is OK without filters...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret != 0, + "Driver refused to disable TC offloads without filters installed...") + + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of TC filters...") + sim.cls_bpf_add_filter(obj, skip_sw=True) + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of XDP...") + sim.set_xdp(obj, "offload") + sim.remove() + bpftool_prog_list_wait(expected=0) + + sim = NetdevSim() + sim.set_ethtool_tc_offloads(True) + + start_test("Test XDP prog reporting...") + sim.set_xdp(obj, "drv") + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + + start_test("Test XDP prog replace without force...") + ret, _ = sim.set_xdp(obj, "drv", fail=False) + fail(ret == 0, "Replaced XDP program without -force") + sim.wait_for_flush(total=1) + + start_test("Test XDP prog replace with force...") + ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False) + fail(ret != 0, "Could not replace XDP program with -force") + bpftool_prog_list_wait(expected=1) + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + + start_test("Test XDP prog replace with bad flags...") + ret, _ = sim.set_xdp(obj, "offload", force=True, fail=False) + fail(ret == 0, "Replaced XDP program with a program in different mode") + ret, _ = sim.set_xdp(obj, "", force=True, fail=False) + fail(ret == 0, "Replaced XDP program with a program in different mode") + + start_test("Test XDP prog remove with bad flags...") + ret, _ = sim.unset_xdp("offload", force=True, fail=False) + fail(ret == 0, "Removed program with a bad mode mode") + ret, _ = sim.unset_xdp("", force=True, fail=False) + fail(ret == 0, "Removed program with a bad mode mode") + + start_test("Test MTU restrictions...") + ret, _ = sim.set_mtu(9000, fail=False) + fail(ret == 0, + "Driver should refuse to increase MTU to 9000 with XDP loaded...") + sim.unset_xdp("drv") + bpftool_prog_list_wait(expected=0) + sim.set_mtu(9000) + ret, _ = sim.set_xdp(obj, "drv", fail=False) + fail(ret == 0, "Driver should refuse to load program with MTU of 9000...") + sim.set_mtu(1500) + + sim.wait_for_flush() + start_test("Test XDP offload...") + sim.set_xdp(obj, "offload") + ipl = sim.ip_link_show(xdp=True) + link_xdp = ipl["xdp"]["prog"] + progs = bpftool_prog_list(expected=1) + prog = progs[0] + fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") + + start_test("Test XDP offload is device bound...") + dfs = sim.dfs_get_bound_progs(expected=1) + dprog = dfs[0] + + fail(prog["id"] != link_xdp["id"], "Program IDs don't match") + fail(prog["tag"] != link_xdp["tag"], "Program tags don't match") + fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test removing XDP program many times...") + sim.unset_xdp("offload") + sim.unset_xdp("offload") + sim.unset_xdp("drv") + sim.unset_xdp("drv") + sim.unset_xdp("") + sim.unset_xdp("") + bpftool_prog_list_wait(expected=0) + + start_test("Test attempt to use a program for a wrong device...") + sim2 = NetdevSim() + sim2.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + + ret, _ = sim.set_xdp(pinned, "offload", fail=False) + fail(ret == 0, "Pinned program loaded for a different device accepted") + sim2.remove() + ret, _ = sim.set_xdp(pinned, "offload", fail=False) + fail(ret == 0, "Pinned program loaded for a removed device accepted") + rm(pin_file) + bpftool_prog_list_wait(expected=0) + + start_test("Test mixing of TC and XDP...") + sim.tc_add_ingress() + sim.set_xdp(obj, "offload") + ret, _ = sim.cls_bpf_add_filter(obj, skip_sw=True, fail=False) + fail(ret == 0, "Loading TC when XDP active should fail") + sim.unset_xdp("offload") + sim.wait_for_flush() + + sim.cls_bpf_add_filter(obj, skip_sw=True) + ret, _ = sim.set_xdp(obj, "offload", fail=False) + fail(ret == 0, "Loading XDP when TC active should fail") + + start_test("Test binding TC from pinned...") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + sim.tc_flush_filters(bound=1, total=1) + sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True) + sim.tc_flush_filters(bound=1, total=1) + + start_test("Test binding XDP from pinned...") + sim.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1) + + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + + start_test("Test offload of wrong type fails...") + ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False) + fail(ret == 0, "Managed to attach XDP program to TC") + + start_test("Test asking for TC offload of two filters...") + sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) + sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) + # The above will trigger a splat until TC cls_bpf drivers are fixed + + sim.tc_flush_filters(bound=2, total=2) + + start_test("Test if netdev removal waits for translation...") + delay_msec = 500 + sim.dfs["bpf_bind_verifier_delay"] = delay_msec + start = time.time() + cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \ + (sim['ifname'], obj) + tc_proc = cmd(cmd_line, background=True, fail=False) + # Wait for the verifier to start + while sim.dfs_num_bound_progs() <= 2: + pass + sim.remove() + end = time.time() + ret, _ = cmd_result(tc_proc, fail=False) + time_diff = end - start + log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff)) + + fail(ret == 0, "Managed to load TC filter on a unregistering device") + delay_sec = delay_msec * 0.001 + fail(time_diff < delay_sec, "Removal process took %s, expected %s" % + (time_diff, delay_sec)) + + print("%s: OK" % (os.path.basename(__file__))) + +finally: + log("Clean up...", "", level=1) + log_level_inc() + clean_up() -- cgit v1.2.3 From 79579220566cd33fe3b15ce8249c57e10251b258 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Dec 2017 15:09:01 -0800 Subject: netdevsim: add SR-IOV functionality dummy driver was extended with VF-related netdev APIs for testing SR-IOV-related software. netdevsim did not exist back then. Implement SR-IOV functionality in netdevsim. Notable difference is that since netdevsim has no module parameters, we will actually create a device with sriov_numvfs attribute for each netdev. The zero MAC address is accepted as some HW use it to mean any address is allowed. Link state is also now validated. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: Daniel Borkmann --- drivers/net/netdevsim/netdev.c | 274 +++++++++++++++++++++++++++++++++++++- drivers/net/netdevsim/netdevsim.h | 12 ++ 2 files changed, 284 insertions(+), 2 deletions(-) diff --git a/drivers/net/netdevsim/netdev.c b/drivers/net/netdevsim/netdev.c index 828c1ce49a8b..eb8c679fca9f 100644 --- a/drivers/net/netdevsim/netdev.c +++ b/drivers/net/netdevsim/netdev.c @@ -25,6 +25,125 @@ #include "netdevsim.h" +struct nsim_vf_config { + int link_state; + u16 min_tx_rate; + u16 max_tx_rate; + u16 vlan; + __be16 vlan_proto; + u16 qos; + u8 vf_mac[ETH_ALEN]; + bool spoofchk_enabled; + bool trusted; + bool rss_query_enabled; +}; + +static u32 nsim_dev_id; + +static int nsim_num_vf(struct device *dev) +{ + struct netdevsim *ns = to_nsim(dev); + + return ns->num_vfs; +} + +static struct bus_type nsim_bus = { + .name = DRV_NAME, + .dev_name = DRV_NAME, + .num_vf = nsim_num_vf, +}; + +static int nsim_vfs_enable(struct netdevsim *ns, unsigned int num_vfs) +{ + ns->vfconfigs = kcalloc(num_vfs, sizeof(struct nsim_vf_config), + GFP_KERNEL); + if (!ns->vfconfigs) + return -ENOMEM; + ns->num_vfs = num_vfs; + + return 0; +} + +static void nsim_vfs_disable(struct netdevsim *ns) +{ + kfree(ns->vfconfigs); + ns->vfconfigs = NULL; + ns->num_vfs = 0; +} + +static ssize_t +nsim_numvfs_store(struct device *dev, struct device_attribute *attr, + const char *buf, size_t count) +{ + struct netdevsim *ns = to_nsim(dev); + unsigned int num_vfs; + int ret; + + ret = kstrtouint(buf, 0, &num_vfs); + if (ret) + return ret; + + rtnl_lock(); + if (ns->num_vfs == num_vfs) + goto exit_good; + if (ns->num_vfs && num_vfs) { + ret = -EBUSY; + goto exit_unlock; + } + + if (num_vfs) { + ret = nsim_vfs_enable(ns, num_vfs); + if (ret) + goto exit_unlock; + } else { + nsim_vfs_disable(ns); + } +exit_good: + ret = count; +exit_unlock: + rtnl_unlock(); + + return ret; +} + +static ssize_t +nsim_numvfs_show(struct device *dev, struct device_attribute *attr, char *buf) +{ + struct netdevsim *ns = to_nsim(dev); + + return sprintf(buf, "%u\n", ns->num_vfs); +} + +static struct device_attribute nsim_numvfs_attr = + __ATTR(sriov_numvfs, 0664, nsim_numvfs_show, nsim_numvfs_store); + +static struct attribute *nsim_dev_attrs[] = { + &nsim_numvfs_attr.attr, + NULL, +}; + +static const struct attribute_group nsim_dev_attr_group = { + .attrs = nsim_dev_attrs, +}; + +static const struct attribute_group *nsim_dev_attr_groups[] = { + &nsim_dev_attr_group, + NULL, +}; + +static void nsim_dev_release(struct device *dev) +{ + struct netdevsim *ns = to_nsim(dev); + + nsim_vfs_disable(ns); + free_netdev(ns->netdev); +} + +struct device_type nsim_dev_type = { + .groups = nsim_dev_attr_groups, + .release = nsim_dev_release, +}; + static int nsim_init(struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); @@ -37,8 +156,19 @@ static int nsim_init(struct net_device *dev) if (err) goto err_debugfs_destroy; + ns->dev.id = nsim_dev_id++; + ns->dev.bus = &nsim_bus; + ns->dev.type = &nsim_dev_type; + err = device_register(&ns->dev); + if (err) + goto err_bpf_uninit; + + SET_NETDEV_DEV(dev, &ns->dev); + return 0; +err_bpf_uninit: + nsim_bpf_uninit(ns); err_debugfs_destroy: debugfs_remove_recursive(ns->ddir); return err; @@ -52,6 +182,14 @@ static void nsim_uninit(struct net_device *dev) nsim_bpf_uninit(ns); } +static void nsim_free(struct net_device *dev) +{ + struct netdevsim *ns = netdev_priv(dev); + + device_unregister(&ns->dev); + /* netdev and vf state will be freed out of device_release() */ +} + static netdev_tx_t nsim_start_xmit(struct sk_buff *skb, struct net_device *dev) { struct netdevsim *ns = netdev_priv(dev); @@ -122,6 +260,123 @@ nsim_setup_tc_block(struct net_device *dev, struct tc_block_offload *f) } } +static int nsim_set_vf_mac(struct net_device *dev, int vf, u8 *mac) +{ + struct netdevsim *ns = netdev_priv(dev); + + /* Only refuse multicast addresses, zero address can mean unset/any. */ + if (vf >= ns->num_vfs || is_multicast_ether_addr(mac)) + return -EINVAL; + memcpy(ns->vfconfigs[vf].vf_mac, mac, ETH_ALEN); + + return 0; +} + +static int nsim_set_vf_vlan(struct net_device *dev, int vf, + u16 vlan, u8 qos, __be16 vlan_proto) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (vf >= ns->num_vfs || vlan > 4095 || qos > 7) + return -EINVAL; + + ns->vfconfigs[vf].vlan = vlan; + ns->vfconfigs[vf].qos = qos; + ns->vfconfigs[vf].vlan_proto = vlan_proto; + + return 0; +} + +static int nsim_set_vf_rate(struct net_device *dev, int vf, int min, int max) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (vf >= ns->num_vfs) + return -EINVAL; + + ns->vfconfigs[vf].min_tx_rate = min; + ns->vfconfigs[vf].max_tx_rate = max; + + return 0; +} + +static int nsim_set_vf_spoofchk(struct net_device *dev, int vf, bool val) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (vf >= ns->num_vfs) + return -EINVAL; + ns->vfconfigs[vf].spoofchk_enabled = val; + + return 0; +} + +static int nsim_set_vf_rss_query_en(struct net_device *dev, int vf, bool val) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (vf >= ns->num_vfs) + return -EINVAL; + ns->vfconfigs[vf].rss_query_enabled = val; + + return 0; +} + +static int nsim_set_vf_trust(struct net_device *dev, int vf, bool val) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (vf >= ns->num_vfs) + return -EINVAL; + ns->vfconfigs[vf].trusted = val; + + return 0; +} + +static int +nsim_get_vf_config(struct net_device *dev, int vf, struct ifla_vf_info *ivi) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (vf >= ns->num_vfs) + return -EINVAL; + + ivi->vf = vf; + ivi->linkstate = ns->vfconfigs[vf].link_state; + ivi->min_tx_rate = ns->vfconfigs[vf].min_tx_rate; + ivi->max_tx_rate = ns->vfconfigs[vf].max_tx_rate; + ivi->vlan = ns->vfconfigs[vf].vlan; + ivi->vlan_proto = ns->vfconfigs[vf].vlan_proto; + ivi->qos = ns->vfconfigs[vf].qos; + memcpy(&ivi->mac, ns->vfconfigs[vf].vf_mac, ETH_ALEN); + ivi->spoofchk = ns->vfconfigs[vf].spoofchk_enabled; + ivi->trusted = ns->vfconfigs[vf].trusted; + ivi->rss_query_en = ns->vfconfigs[vf].rss_query_enabled; + + return 0; +} + +static int nsim_set_vf_link_state(struct net_device *dev, int vf, int state) +{ + struct netdevsim *ns = netdev_priv(dev); + + if (vf >= ns->num_vfs) + return -EINVAL; + + switch (state) { + case IFLA_VF_LINK_STATE_AUTO: + case IFLA_VF_LINK_STATE_ENABLE: + case IFLA_VF_LINK_STATE_DISABLE: + break; + default: + return -EINVAL; + } + + ns->vfconfigs[vf].link_state = state; + + return 0; +} + static int nsim_setup_tc(struct net_device *dev, enum tc_setup_type type, void *type_data) { @@ -153,6 +408,14 @@ static const struct net_device_ops nsim_netdev_ops = { .ndo_validate_addr = eth_validate_addr, .ndo_change_mtu = nsim_change_mtu, .ndo_get_stats64 = nsim_get_stats64, + .ndo_set_vf_mac = nsim_set_vf_mac, + .ndo_set_vf_vlan = nsim_set_vf_vlan, + .ndo_set_vf_rate = nsim_set_vf_rate, + .ndo_set_vf_spoofchk = nsim_set_vf_spoofchk, + .ndo_set_vf_trust = nsim_set_vf_trust, + .ndo_get_vf_config = nsim_get_vf_config, + .ndo_set_vf_link_state = nsim_set_vf_link_state, + .ndo_set_vf_rss_query_en = nsim_set_vf_rss_query_en, .ndo_setup_tc = nsim_setup_tc, .ndo_set_features = nsim_set_features, .ndo_bpf = nsim_bpf, @@ -164,7 +427,7 @@ static void nsim_setup(struct net_device *dev) eth_hw_addr_random(dev); dev->netdev_ops = &nsim_netdev_ops; - dev->needs_free_netdev = true; + dev->priv_destructor = nsim_free; dev->tx_queue_len = 0; dev->flags |= IFF_NOARP; @@ -209,12 +472,18 @@ static int __init nsim_module_init(void) if (IS_ERR(nsim_ddir)) return PTR_ERR(nsim_ddir); - err = rtnl_link_register(&nsim_link_ops); + err = bus_register(&nsim_bus); if (err) goto err_debugfs_destroy; + err = rtnl_link_register(&nsim_link_ops); + if (err) + goto err_unreg_bus; + return 0; +err_unreg_bus: + bus_unregister(&nsim_bus); err_debugfs_destroy: debugfs_remove_recursive(nsim_ddir); return err; @@ -223,6 +492,7 @@ err_debugfs_destroy: static void __exit nsim_module_exit(void) { rtnl_link_unregister(&nsim_link_ops); + bus_unregister(&nsim_bus); debugfs_remove_recursive(nsim_ddir); } diff --git a/drivers/net/netdevsim/netdevsim.h b/drivers/net/netdevsim/netdevsim.h index 8779e6a8f885..32270de9395a 100644 --- a/drivers/net/netdevsim/netdevsim.h +++ b/drivers/net/netdevsim/netdevsim.h @@ -13,6 +13,7 @@ * THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. */ +#include #include #include #include @@ -26,6 +27,7 @@ struct bpf_prog; struct dentry; +struct nsim_vf_config; struct netdevsim { struct net_device *netdev; @@ -34,8 +36,13 @@ struct netdevsim { u64 tx_bytes; struct u64_stats_sync syncp; + struct device dev; + struct dentry *ddir; + unsigned int num_vfs; + struct nsim_vf_config *vfconfigs; + struct bpf_prog *bpf_offloaded; u32 bpf_offloaded_id; @@ -64,3 +71,8 @@ int nsim_bpf(struct net_device *dev, struct netdev_bpf *bpf); int nsim_bpf_disable_tc(struct netdevsim *ns); int nsim_bpf_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv); + +static inline struct netdevsim *to_nsim(struct device *ptr) +{ + return container_of(ptr, struct netdevsim, dev); +} -- cgit v1.2.3 From c33616109a63aa64235108fb7f3b60cb2117ef3b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 1 Dec 2017 15:09:02 -0800 Subject: net: dummy: remove fake SR-IOV functionality netdevsim driver seems like a better place for fake SR-IOV functionality. Remove the code previously added to dummy. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Phil Sutter Signed-off-by: Daniel Borkmann --- drivers/net/dummy.c | 215 +--------------------------------------------------- 1 file changed, 1 insertion(+), 214 deletions(-) diff --git a/drivers/net/dummy.c b/drivers/net/dummy.c index 58483af80bdb..30b1c8512049 100644 --- a/drivers/net/dummy.c +++ b/drivers/net/dummy.c @@ -42,48 +42,7 @@ #define DRV_NAME "dummy" #define DRV_VERSION "1.0" -#undef pr_fmt -#define pr_fmt(fmt) DRV_NAME ": " fmt - static int numdummies = 1; -static int num_vfs; - -struct vf_data_storage { - u8 vf_mac[ETH_ALEN]; - u16 pf_vlan; /* When set, guest VLAN config not allowed. */ - u16 pf_qos; - __be16 vlan_proto; - u16 min_tx_rate; - u16 max_tx_rate; - u8 spoofchk_enabled; - bool rss_query_enabled; - u8 trusted; - int link_state; -}; - -struct dummy_priv { - struct vf_data_storage *vfinfo; -}; - -static int dummy_num_vf(struct device *dev) -{ - return num_vfs; -} - -static struct bus_type dummy_bus = { - .name = "dummy", - .num_vf = dummy_num_vf, -}; - -static void release_dummy_parent(struct device *dev) -{ -} - -static struct device dummy_parent = { - .init_name = "dummy", - .bus = &dummy_bus, - .release = release_dummy_parent, -}; /* fake multicast ability */ static void set_multicast_list(struct net_device *dev) @@ -133,25 +92,10 @@ static netdev_tx_t dummy_xmit(struct sk_buff *skb, struct net_device *dev) static int dummy_dev_init(struct net_device *dev) { - struct dummy_priv *priv = netdev_priv(dev); - dev->dstats = netdev_alloc_pcpu_stats(struct pcpu_dstats); if (!dev->dstats) return -ENOMEM; - priv->vfinfo = NULL; - - if (!num_vfs) - return 0; - - dev->dev.parent = &dummy_parent; - priv->vfinfo = kcalloc(num_vfs, sizeof(struct vf_data_storage), - GFP_KERNEL); - if (!priv->vfinfo) { - free_percpu(dev->dstats); - return -ENOMEM; - } - return 0; } @@ -169,117 +113,6 @@ static int dummy_change_carrier(struct net_device *dev, bool new_carrier) return 0; } -static int dummy_set_vf_mac(struct net_device *dev, int vf, u8 *mac) -{ - struct dummy_priv *priv = netdev_priv(dev); - - if (!is_valid_ether_addr(mac) || (vf >= num_vfs)) - return -EINVAL; - - memcpy(priv->vfinfo[vf].vf_mac, mac, ETH_ALEN); - - return 0; -} - -static int dummy_set_vf_vlan(struct net_device *dev, int vf, - u16 vlan, u8 qos, __be16 vlan_proto) -{ - struct dummy_priv *priv = netdev_priv(dev); - - if ((vf >= num_vfs) || (vlan > 4095) || (qos > 7)) - return -EINVAL; - - priv->vfinfo[vf].pf_vlan = vlan; - priv->vfinfo[vf].pf_qos = qos; - priv->vfinfo[vf].vlan_proto = vlan_proto; - - return 0; -} - -static int dummy_set_vf_rate(struct net_device *dev, int vf, int min, int max) -{ - struct dummy_priv *priv = netdev_priv(dev); - - if (vf >= num_vfs) - return -EINVAL; - - priv->vfinfo[vf].min_tx_rate = min; - priv->vfinfo[vf].max_tx_rate = max; - - return 0; -} - -static int dummy_set_vf_spoofchk(struct net_device *dev, int vf, bool val) -{ - struct dummy_priv *priv = netdev_priv(dev); - - if (vf >= num_vfs) - return -EINVAL; - - priv->vfinfo[vf].spoofchk_enabled = val; - - return 0; -} - -static int dummy_set_vf_rss_query_en(struct net_device *dev, int vf, bool val) -{ - struct dummy_priv *priv = netdev_priv(dev); - - if (vf >= num_vfs) - return -EINVAL; - - priv->vfinfo[vf].rss_query_enabled = val; - - return 0; -} - -static int dummy_set_vf_trust(struct net_device *dev, int vf, bool val) -{ - struct dummy_priv *priv = netdev_priv(dev); - - if (vf >= num_vfs) - return -EINVAL; - - priv->vfinfo[vf].trusted = val; - - return 0; -} - -static int dummy_get_vf_config(struct net_device *dev, - int vf, struct ifla_vf_info *ivi) -{ - struct dummy_priv *priv = netdev_priv(dev); - - if (vf >= num_vfs) - return -EINVAL; - - ivi->vf = vf; - memcpy(&ivi->mac, priv->vfinfo[vf].vf_mac, ETH_ALEN); - ivi->vlan = priv->vfinfo[vf].pf_vlan; - ivi->qos = priv->vfinfo[vf].pf_qos; - ivi->spoofchk = priv->vfinfo[vf].spoofchk_enabled; - ivi->linkstate = priv->vfinfo[vf].link_state; - ivi->min_tx_rate = priv->vfinfo[vf].min_tx_rate; - ivi->max_tx_rate = priv->vfinfo[vf].max_tx_rate; - ivi->rss_query_en = priv->vfinfo[vf].rss_query_enabled; - ivi->trusted = priv->vfinfo[vf].trusted; - ivi->vlan_proto = priv->vfinfo[vf].vlan_proto; - - return 0; -} - -static int dummy_set_vf_link_state(struct net_device *dev, int vf, int state) -{ - struct dummy_priv *priv = netdev_priv(dev); - - if (vf >= num_vfs) - return -EINVAL; - - priv->vfinfo[vf].link_state = state; - - return 0; -} - static const struct net_device_ops dummy_netdev_ops = { .ndo_init = dummy_dev_init, .ndo_uninit = dummy_dev_uninit, @@ -289,14 +122,6 @@ static const struct net_device_ops dummy_netdev_ops = { .ndo_set_mac_address = eth_mac_addr, .ndo_get_stats64 = dummy_get_stats64, .ndo_change_carrier = dummy_change_carrier, - .ndo_set_vf_mac = dummy_set_vf_mac, - .ndo_set_vf_vlan = dummy_set_vf_vlan, - .ndo_set_vf_rate = dummy_set_vf_rate, - .ndo_set_vf_spoofchk = dummy_set_vf_spoofchk, - .ndo_set_vf_trust = dummy_set_vf_trust, - .ndo_get_vf_config = dummy_get_vf_config, - .ndo_set_vf_link_state = dummy_set_vf_link_state, - .ndo_set_vf_rss_query_en = dummy_set_vf_rss_query_en, }; static void dummy_get_drvinfo(struct net_device *dev, @@ -323,13 +148,6 @@ static const struct ethtool_ops dummy_ethtool_ops = { .get_ts_info = dummy_get_ts_info, }; -static void dummy_free_netdev(struct net_device *dev) -{ - struct dummy_priv *priv = netdev_priv(dev); - - kfree(priv->vfinfo); -} - static void dummy_setup(struct net_device *dev) { ether_setup(dev); @@ -338,7 +156,6 @@ static void dummy_setup(struct net_device *dev) dev->netdev_ops = &dummy_netdev_ops; dev->ethtool_ops = &dummy_ethtool_ops; dev->needs_free_netdev = true; - dev->priv_destructor = dummy_free_netdev; /* Fill in device structure with ethernet-generic values. */ dev->flags |= IFF_NOARP; @@ -370,7 +187,6 @@ static int dummy_validate(struct nlattr *tb[], struct nlattr *data[], static struct rtnl_link_ops dummy_link_ops __read_mostly = { .kind = DRV_NAME, - .priv_size = sizeof(struct dummy_priv), .setup = dummy_setup, .validate = dummy_validate, }; @@ -379,16 +195,12 @@ static struct rtnl_link_ops dummy_link_ops __read_mostly = { module_param(numdummies, int, 0); MODULE_PARM_DESC(numdummies, "Number of dummy pseudo devices"); -module_param(num_vfs, int, 0); -MODULE_PARM_DESC(num_vfs, "Number of dummy VFs per dummy device"); - static int __init dummy_init_one(void) { struct net_device *dev_dummy; int err; - dev_dummy = alloc_netdev(sizeof(struct dummy_priv), - "dummy%d", NET_NAME_ENUM, dummy_setup); + dev_dummy = alloc_netdev(0, "dummy%d", NET_NAME_ENUM, dummy_setup); if (!dev_dummy) return -ENOMEM; @@ -407,21 +219,6 @@ static int __init dummy_init_module(void) { int i, err = 0; - if (num_vfs) { - err = bus_register(&dummy_bus); - if (err < 0) { - pr_err("registering dummy bus failed\n"); - return err; - } - - err = device_register(&dummy_parent); - if (err < 0) { - pr_err("registering dummy parent device failed\n"); - bus_unregister(&dummy_bus); - return err; - } - } - rtnl_lock(); err = __rtnl_link_register(&dummy_link_ops); if (err < 0) @@ -437,22 +234,12 @@ static int __init dummy_init_module(void) out: rtnl_unlock(); - if (err && num_vfs) { - device_unregister(&dummy_parent); - bus_unregister(&dummy_bus); - } - return err; } static void __exit dummy_cleanup_module(void) { rtnl_link_unregister(&dummy_link_ops); - - if (num_vfs) { - device_unregister(&dummy_parent); - bus_unregister(&dummy_bus); - } } module_init(dummy_init_module); -- cgit v1.2.3