From 4ea69b2fd623dee2bbc77d3b6b7d8c0924e2026a Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 24 May 2018 12:26:46 +0530 Subject: bpf: powerpc64: pad function address loads with NOPs For multi-function programs, loading the address of a callee function to a register requires emitting instructions whose count varies from one to five depending on the nature of the address. Since we come to know of the callee's address only before the extra pass, the number of instructions required to load this address may vary from what was previously generated. This can make the JITed image grow or shrink. To avoid this, we should generate a constant five-instruction when loading function addresses by padding the optimized load sequence with NOPs. Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann --- arch/powerpc/net/bpf_jit_comp64.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 1bdb1aff0619..e4582744a31d 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) { + unsigned int i, ctx_idx = ctx->idx; + + /* Load function address into r12 */ + PPC_LI64(12, func); + + /* For bpf-to-bpf function calls, the callee's address is unknown + * until the last extra pass. As seen above, we use PPC_LI64() to + * load the callee's address, but this may optimize the number of + * instructions required based on the nature of the address. + * + * Since we don't want the number of instructions emitted to change, + * we pad the optimized PPC_LI64() call with NOPs to guarantee that + * we always have a five-instruction sequence, which is the maximum + * that PPC_LI64() can emit. + */ + for (i = ctx->idx - ctx_idx; i < 5; i++) + PPC_NOP(); + #ifdef PPC64_ELF_ABI_v1 - /* func points to the function descriptor */ - PPC_LI64(b2p[TMP_REG_2], func); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to LR */ - PPC_MTLR(b2p[TMP_REG_1]); /* * Load TOC from function descriptor at offset 8. * We can clobber r2 since we get called through a * function pointer (so caller will save/restore r2) * and since we don't use a TOC ourself. */ - PPC_BPF_LL(2, b2p[TMP_REG_2], 8); -#else - /* We can clobber r12 */ - PPC_FUNC_ADDR(12, func); - PPC_MTLR(12); + PPC_BPF_LL(2, 12, 8); + /* Load actual entry point from function descriptor */ + PPC_BPF_LL(12, 12, 0); #endif + + PPC_MTLR(12); PPC_BLRL(); } -- cgit v1.2.3 From 8484ce8306f941f921cfceaf357a28d3a014f178 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 24 May 2018 12:26:47 +0530 Subject: bpf: powerpc64: add JIT support for multi-function programs This adds support for bpf-to-bpf function calls in the powerpc64 JIT compiler. The JIT compiler converts the bpf call instructions to native branch instructions. After a round of the usual passes, the start addresses of the JITed images for the callee functions are known. Finally, to fixup the branch target addresses, we need to perform an extra pass. Because of the address range in which JITed images are allocated on powerpc64, the offsets of the start addresses of these images from __bpf_call_base are as large as 64 bits. So, for a function call, we cannot use the imm field of the instruction to determine the callee's address. Instead, we use the alternative method of getting it from the list of function addresses in the auxiliary data of the caller by using the off field as an index. Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann --- arch/powerpc/net/bpf_jit_comp64.c | 76 +++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index e4582744a31d..f1c95779843b 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -268,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 /* Assemble the body code between the prologue & epilogue */ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs) + u32 *addrs, bool extra_pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -724,11 +724,25 @@ emit_clear: break; /* - * Call kernel helper + * Call kernel helper or bpf function */ case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - func = (u8 *) __bpf_call_base + imm; + + /* bpf function call */ + if (insn[i].src_reg == BPF_PSEUDO_CALL) + if (!extra_pass) + func = NULL; + else if (fp->aux->func && off < fp->aux->func_cnt) + /* use the subprog id from the off + * field to lookup the callee address + */ + func = (u8 *) fp->aux->func[off]->bpf_func; + else + return -EINVAL; + /* kernel helper call */ + else + func = (u8 *) __bpf_call_base + imm; bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -876,6 +890,14 @@ cond_branch: return 0; } +struct powerpc64_jit_data { + struct bpf_binary_header *header; + u32 *addrs; + u8 *image; + u32 proglen; + struct codegen_context ctx; +}; + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { u32 proglen; @@ -883,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) u8 *image = NULL; u32 *code_base; u32 *addrs; + struct powerpc64_jit_data *jit_data; struct codegen_context cgctx; int pass; int flen; @@ -890,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_prog *org_fp = fp; struct bpf_prog *tmp_fp; bool bpf_blinded = false; + bool extra_pass = false; if (!fp->jit_requested) return org_fp; @@ -903,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp = tmp_fp; } + jit_data = fp->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + fp = org_fp; + goto out; + } + fp->aux->jit_data = jit_data; + } + flen = fp->len; + addrs = jit_data->addrs; + if (addrs) { + cgctx = jit_data->ctx; + image = jit_data->image; + bpf_hdr = jit_data->header; + proglen = jit_data->proglen; + alloclen = proglen + FUNCTION_DESCR_SIZE; + extra_pass = true; + goto skip_init_ctx; + } + addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); if (addrs == NULL) { fp = org_fp; - goto out; + goto out_addrs; } memset(&cgctx, 0, sizeof(struct codegen_context)); @@ -916,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { /* We hit something illegal or unsupported. */ fp = org_fp; - goto out; + goto out_addrs; } /* @@ -937,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) bpf_jit_fill_ill_insns); if (!bpf_hdr) { fp = org_fp; - goto out; + goto out_addrs; } +skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); /* Code generation passes 1-2 */ @@ -947,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; bpf_jit_build_prologue(code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs); + bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); bpf_jit_build_epilogue(code_base, &cgctx); if (bpf_jit_enable > 1) @@ -973,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp->jited_len = alloclen; bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + if (!fp->is_func || extra_pass) { +out_addrs: + kfree(addrs); + kfree(jit_data); + fp->aux->jit_data = NULL; + } else { + jit_data->addrs = addrs; + jit_data->ctx = cgctx; + jit_data->proglen = proglen; + jit_data->image = image; + jit_data->header = bpf_hdr; + } out: - kfree(addrs); - if (bpf_blinded) bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); -- cgit v1.2.3