From dbf44daf7c88bb0b378e3cb9dc101ae0c5b33832 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 4 May 2018 01:08:21 +0200 Subject: bpf, ppc64: remove ld_abs/ld_ind Since LD_ABS/LD_IND instructions are now removed from the core and reimplemented through a combination of inlined BPF instructions and a slow-path helper, we can get rid of the complexity from ppc64 JIT. Signed-off-by: Daniel Borkmann Acked-by: Naveen N. Rao Acked-by: Alexei Starovoitov Tested-by: Sandipan Das Signed-off-by: Alexei Starovoitov --- arch/powerpc/net/Makefile | 2 +- arch/powerpc/net/bpf_jit64.h | 37 ++------ arch/powerpc/net/bpf_jit_asm64.S | 180 -------------------------------------- arch/powerpc/net/bpf_jit_comp64.c | 109 +---------------------- 4 files changed, 11 insertions(+), 317 deletions(-) delete mode 100644 arch/powerpc/net/bpf_jit_asm64.S (limited to 'arch/powerpc') diff --git a/arch/powerpc/net/Makefile b/arch/powerpc/net/Makefile index 02d369ca6a53..809f019d3cba 100644 --- a/arch/powerpc/net/Makefile +++ b/arch/powerpc/net/Makefile @@ -3,7 +3,7 @@ # Arch-specific network modules # ifeq ($(CONFIG_PPC64),y) -obj-$(CONFIG_BPF_JIT) += bpf_jit_asm64.o bpf_jit_comp64.o +obj-$(CONFIG_BPF_JIT) += bpf_jit_comp64.o else obj-$(CONFIG_BPF_JIT) += bpf_jit_asm.o bpf_jit_comp.o endif diff --git a/arch/powerpc/net/bpf_jit64.h b/arch/powerpc/net/bpf_jit64.h index 8bdef7ed28a8..3609be4692b3 100644 --- a/arch/powerpc/net/bpf_jit64.h +++ b/arch/powerpc/net/bpf_jit64.h @@ -20,7 +20,7 @@ * with our redzone usage. * * [ prev sp ] <------------- - * [ nv gpr save area ] 8*8 | + * [ nv gpr save area ] 6*8 | * [ tail_call_cnt ] 8 | * [ local_tmp_var ] 8 | * fp (r31) --> [ ebpf stack space ] upto 512 | @@ -28,8 +28,8 @@ * sp (r1) ---> [ stack pointer ] -------------- */ -/* for gpr non volatile registers BPG_REG_6 to 10, plus skb cache registers */ -#define BPF_PPC_STACK_SAVE (8*8) +/* for gpr non volatile registers BPG_REG_6 to 10 */ +#define BPF_PPC_STACK_SAVE (6*8) /* for bpf JIT code internal usage */ #define BPF_PPC_STACK_LOCALS 16 /* stack frame excluding BPF stack, ensure this is quadword aligned */ @@ -39,10 +39,8 @@ #ifndef __ASSEMBLY__ /* BPF register usage */ -#define SKB_HLEN_REG (MAX_BPF_JIT_REG + 0) -#define SKB_DATA_REG (MAX_BPF_JIT_REG + 1) -#define TMP_REG_1 (MAX_BPF_JIT_REG + 2) -#define TMP_REG_2 (MAX_BPF_JIT_REG + 3) +#define TMP_REG_1 (MAX_BPF_JIT_REG + 0) +#define TMP_REG_2 (MAX_BPF_JIT_REG + 1) /* BPF to ppc register mappings */ static const int b2p[] = { @@ -63,40 +61,23 @@ static const int b2p[] = { [BPF_REG_FP] = 31, /* eBPF jit internal registers */ [BPF_REG_AX] = 2, - [SKB_HLEN_REG] = 25, - [SKB_DATA_REG] = 26, [TMP_REG_1] = 9, [TMP_REG_2] = 10 }; -/* PPC NVR range -- update this if we ever use NVRs below r24 */ -#define BPF_PPC_NVR_MIN 24 - -/* Assembly helpers */ -#define DECLARE_LOAD_FUNC(func) u64 func(u64 r3, u64 r4); \ - u64 func##_negative_offset(u64 r3, u64 r4); \ - u64 func##_positive_offset(u64 r3, u64 r4); - -DECLARE_LOAD_FUNC(sk_load_word); -DECLARE_LOAD_FUNC(sk_load_half); -DECLARE_LOAD_FUNC(sk_load_byte); - -#define CHOOSE_LOAD_FUNC(imm, func) \ - (imm < 0 ? \ - (imm >= SKF_LL_OFF ? func##_negative_offset : func) : \ - func##_positive_offset) +/* PPC NVR range -- update this if we ever use NVRs below r27 */ +#define BPF_PPC_NVR_MIN 27 #define SEEN_FUNC 0x1000 /* might call external helpers */ #define SEEN_STACK 0x2000 /* uses BPF stack */ -#define SEEN_SKB 0x4000 /* uses sk_buff */ -#define SEEN_TAILCALL 0x8000 /* uses tail calls */ +#define SEEN_TAILCALL 0x4000 /* uses tail calls */ struct codegen_context { /* * This is used to track register usage as well * as calls to external helpers. * - register usage is tracked with corresponding - * bits (r3-r10 and r25-r31) + * bits (r3-r10 and r27-r31) * - rest of the bits can be used to track other * things -- for now, we use bits 16 to 23 * encoded in SEEN_* macros above diff --git a/arch/powerpc/net/bpf_jit_asm64.S b/arch/powerpc/net/bpf_jit_asm64.S deleted file mode 100644 index 7e4c51430b84..000000000000 --- a/arch/powerpc/net/bpf_jit_asm64.S +++ /dev/null @@ -1,180 +0,0 @@ -/* - * bpf_jit_asm64.S: Packet/header access helper functions - * for PPC64 BPF compiler. - * - * Copyright 2016, Naveen N. Rao - * IBM Corporation - * - * Based on bpf_jit_asm.S by Matt Evans - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; version 2 - * of the License. - */ - -#include -#include -#include "bpf_jit64.h" - -/* - * All of these routines are called directly from generated code, - * with the below register usage: - * r27 skb pointer (ctx) - * r25 skb header length - * r26 skb->data pointer - * r4 offset - * - * Result is passed back in: - * r8 data read in host endian format (accumulator) - * - * r9 is used as a temporary register - */ - -#define r_skb r27 -#define r_hlen r25 -#define r_data r26 -#define r_off r4 -#define r_val r8 -#define r_tmp r9 - -_GLOBAL_TOC(sk_load_word) - cmpdi r_off, 0 - blt bpf_slow_path_word_neg - b sk_load_word_positive_offset - -_GLOBAL_TOC(sk_load_word_positive_offset) - /* Are we accessing past headlen? */ - subi r_tmp, r_hlen, 4 - cmpd r_tmp, r_off - blt bpf_slow_path_word - /* Nope, just hitting the header. cr0 here is eq or gt! */ - LWZX_BE r_val, r_data, r_off - blr /* Return success, cr0 != LT */ - -_GLOBAL_TOC(sk_load_half) - cmpdi r_off, 0 - blt bpf_slow_path_half_neg - b sk_load_half_positive_offset - -_GLOBAL_TOC(sk_load_half_positive_offset) - subi r_tmp, r_hlen, 2 - cmpd r_tmp, r_off - blt bpf_slow_path_half - LHZX_BE r_val, r_data, r_off - blr - -_GLOBAL_TOC(sk_load_byte) - cmpdi r_off, 0 - blt bpf_slow_path_byte_neg - b sk_load_byte_positive_offset - -_GLOBAL_TOC(sk_load_byte_positive_offset) - cmpd r_hlen, r_off - ble bpf_slow_path_byte - lbzx r_val, r_data, r_off - blr - -/* - * Call out to skb_copy_bits: - * Allocate a new stack frame here to remain ABI-compliant in - * stashing LR. - */ -#define bpf_slow_path_common(SIZE) \ - mflr r0; \ - std r0, PPC_LR_STKOFF(r1); \ - stdu r1, -(STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS)(r1); \ - mr r3, r_skb; \ - /* r4 = r_off as passed */ \ - addi r5, r1, STACK_FRAME_MIN_SIZE; \ - li r6, SIZE; \ - bl skb_copy_bits; \ - nop; \ - /* save r5 */ \ - addi r5, r1, STACK_FRAME_MIN_SIZE; \ - /* r3 = 0 on success */ \ - addi r1, r1, STACK_FRAME_MIN_SIZE + BPF_PPC_STACK_LOCALS; \ - ld r0, PPC_LR_STKOFF(r1); \ - mtlr r0; \ - cmpdi r3, 0; \ - blt bpf_error; /* cr0 = LT */ - -bpf_slow_path_word: - bpf_slow_path_common(4) - /* Data value is on stack, and cr0 != LT */ - LWZX_BE r_val, 0, r5 - blr - -bpf_slow_path_half: - bpf_slow_path_common(2) - LHZX_BE r_val, 0, r5 - blr - -bpf_slow_path_byte: - bpf_slow_path_common(1) - lbzx r_val, 0, r5 - blr - -/* - * Call out to bpf_internal_load_pointer_neg_helper - */ -#define sk_negative_common(SIZE) \ - mflr r0; \ - std r0, PPC_LR_STKOFF(r1); \ - stdu r1, -STACK_FRAME_MIN_SIZE(r1); \ - mr r3, r_skb; \ - /* r4 = r_off, as passed */ \ - li r5, SIZE; \ - bl bpf_internal_load_pointer_neg_helper; \ - nop; \ - addi r1, r1, STACK_FRAME_MIN_SIZE; \ - ld r0, PPC_LR_STKOFF(r1); \ - mtlr r0; \ - /* R3 != 0 on success */ \ - cmpldi r3, 0; \ - beq bpf_error_slow; /* cr0 = EQ */ - -bpf_slow_path_word_neg: - lis r_tmp, -32 /* SKF_LL_OFF */ - cmpd r_off, r_tmp /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - b sk_load_word_negative_offset - -_GLOBAL_TOC(sk_load_word_negative_offset) - sk_negative_common(4) - LWZX_BE r_val, 0, r3 - blr - -bpf_slow_path_half_neg: - lis r_tmp, -32 /* SKF_LL_OFF */ - cmpd r_off, r_tmp /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - b sk_load_half_negative_offset - -_GLOBAL_TOC(sk_load_half_negative_offset) - sk_negative_common(2) - LHZX_BE r_val, 0, r3 - blr - -bpf_slow_path_byte_neg: - lis r_tmp, -32 /* SKF_LL_OFF */ - cmpd r_off, r_tmp /* addr < SKF_* */ - blt bpf_error /* cr0 = LT */ - b sk_load_byte_negative_offset - -_GLOBAL_TOC(sk_load_byte_negative_offset) - sk_negative_common(1) - lbzx r_val, 0, r3 - blr - -bpf_error_slow: - /* fabricate a cr0 = lt */ - li r_tmp, -1 - cmpdi r_tmp, 0 -bpf_error: - /* - * Entered with cr0 = lt - * Generated code will 'blt epilogue', returning 0. - */ - li r_val, 0 - blr diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 0ef3d9580e98..1bdb1aff0619 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -59,7 +59,7 @@ static inline bool bpf_has_stack_frame(struct codegen_context *ctx) * [ prev sp ] <------------- * [ ... ] | * sp (r1) ---> [ stack pointer ] -------------- - * [ nv gpr save area ] 8*8 + * [ nv gpr save area ] 6*8 * [ tail_call_cnt ] 8 * [ local_tmp_var ] 8 * [ unused red zone ] 208 bytes protected @@ -88,21 +88,6 @@ static int bpf_jit_stack_offsetof(struct codegen_context *ctx, int reg) BUG(); } -static void bpf_jit_emit_skb_loads(u32 *image, struct codegen_context *ctx) -{ - /* - * Load skb->len and skb->data_len - * r3 points to skb - */ - PPC_LWZ(b2p[SKB_HLEN_REG], 3, offsetof(struct sk_buff, len)); - PPC_LWZ(b2p[TMP_REG_1], 3, offsetof(struct sk_buff, data_len)); - /* header_len = len - data_len */ - PPC_SUB(b2p[SKB_HLEN_REG], b2p[SKB_HLEN_REG], b2p[TMP_REG_1]); - - /* skb->data pointer */ - PPC_BPF_LL(b2p[SKB_DATA_REG], 3, offsetof(struct sk_buff, data)); -} - static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) { int i; @@ -145,18 +130,6 @@ static void bpf_jit_build_prologue(u32 *image, struct codegen_context *ctx) if (bpf_is_seen_register(ctx, i)) PPC_BPF_STL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); - /* - * Save additional non-volatile regs if we cache skb - * Also, setup skb data - */ - if (ctx->seen & SEEN_SKB) { - PPC_BPF_STL(b2p[SKB_HLEN_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); - PPC_BPF_STL(b2p[SKB_DATA_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); - bpf_jit_emit_skb_loads(image, ctx); - } - /* Setup frame pointer to point to the bpf stack area */ if (bpf_is_seen_register(ctx, BPF_REG_FP)) PPC_ADDI(b2p[BPF_REG_FP], 1, @@ -172,14 +145,6 @@ static void bpf_jit_emit_common_epilogue(u32 *image, struct codegen_context *ctx if (bpf_is_seen_register(ctx, i)) PPC_BPF_LL(b2p[i], 1, bpf_jit_stack_offsetof(ctx, b2p[i])); - /* Restore non-volatile registers used for skb cache */ - if (ctx->seen & SEEN_SKB) { - PPC_BPF_LL(b2p[SKB_HLEN_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_HLEN_REG])); - PPC_BPF_LL(b2p[SKB_DATA_REG], 1, - bpf_jit_stack_offsetof(ctx, b2p[SKB_DATA_REG])); - } - /* Tear down our stack frame */ if (bpf_has_stack_frame(ctx)) { PPC_ADDI(1, 1, BPF_PPC_STACKFRAME + ctx->stack_size); @@ -753,23 +718,10 @@ emit_clear: ctx->seen |= SEEN_FUNC; func = (u8 *) __bpf_call_base + imm; - /* Save skb pointer if we need to re-cache skb data */ - if ((ctx->seen & SEEN_SKB) && - bpf_helper_changes_pkt_data(func)) - PPC_BPF_STL(3, 1, bpf_jit_stack_local(ctx)); - bpf_jit_emit_func_call(image, ctx, (u64)func); /* move return value from r3 to BPF_REG_0 */ PPC_MR(b2p[BPF_REG_0], 3); - - /* refresh skb cache */ - if ((ctx->seen & SEEN_SKB) && - bpf_helper_changes_pkt_data(func)) { - /* reload skb pointer to r3 */ - PPC_BPF_LL(3, 1, bpf_jit_stack_local(ctx)); - bpf_jit_emit_skb_loads(image, ctx); - } break; /* @@ -886,65 +838,6 @@ cond_branch: PPC_BCC(true_cond, addrs[i + 1 + off]); break; - /* - * Loads from packet header/data - * Assume 32-bit input value in imm and X (src_reg) - */ - - /* Absolute loads */ - case BPF_LD | BPF_W | BPF_ABS: - func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_word); - goto common_load_abs; - case BPF_LD | BPF_H | BPF_ABS: - func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_half); - goto common_load_abs; - case BPF_LD | BPF_B | BPF_ABS: - func = (u8 *)CHOOSE_LOAD_FUNC(imm, sk_load_byte); -common_load_abs: - /* - * Load from [imm] - * Load into r4, which can just be passed onto - * skb load helpers as the second parameter - */ - PPC_LI32(4, imm); - goto common_load; - - /* Indirect loads */ - case BPF_LD | BPF_W | BPF_IND: - func = (u8 *)sk_load_word; - goto common_load_ind; - case BPF_LD | BPF_H | BPF_IND: - func = (u8 *)sk_load_half; - goto common_load_ind; - case BPF_LD | BPF_B | BPF_IND: - func = (u8 *)sk_load_byte; -common_load_ind: - /* - * Load from [src_reg + imm] - * Treat src_reg as a 32-bit value - */ - PPC_EXTSW(4, src_reg); - if (imm) { - if (imm >= -32768 && imm < 32768) - PPC_ADDI(4, 4, IMM_L(imm)); - else { - PPC_LI32(b2p[TMP_REG_1], imm); - PPC_ADD(4, 4, b2p[TMP_REG_1]); - } - } - -common_load: - ctx->seen |= SEEN_SKB; - ctx->seen |= SEEN_FUNC; - bpf_jit_emit_func_call(image, ctx, (u64)func); - - /* - * Helper returns 'lt' condition on error, and an - * appropriate return value in BPF_REG_0 - */ - PPC_BCC(COND_LT, exit_addr); - break; - /* * Tail call */ -- cgit v1.2.3 From 4ea69b2fd623dee2bbc77d3b6b7d8c0924e2026a Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 24 May 2018 12:26:46 +0530 Subject: bpf: powerpc64: pad function address loads with NOPs For multi-function programs, loading the address of a callee function to a register requires emitting instructions whose count varies from one to five depending on the nature of the address. Since we come to know of the callee's address only before the extra pass, the number of instructions required to load this address may vary from what was previously generated. This can make the JITed image grow or shrink. To avoid this, we should generate a constant five-instruction when loading function addresses by padding the optimized load sequence with NOPs. Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann --- arch/powerpc/net/bpf_jit_comp64.c | 34 +++++++++++++++++++++++----------- 1 file changed, 23 insertions(+), 11 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index 1bdb1aff0619..e4582744a31d 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -167,25 +167,37 @@ static void bpf_jit_build_epilogue(u32 *image, struct codegen_context *ctx) static void bpf_jit_emit_func_call(u32 *image, struct codegen_context *ctx, u64 func) { + unsigned int i, ctx_idx = ctx->idx; + + /* Load function address into r12 */ + PPC_LI64(12, func); + + /* For bpf-to-bpf function calls, the callee's address is unknown + * until the last extra pass. As seen above, we use PPC_LI64() to + * load the callee's address, but this may optimize the number of + * instructions required based on the nature of the address. + * + * Since we don't want the number of instructions emitted to change, + * we pad the optimized PPC_LI64() call with NOPs to guarantee that + * we always have a five-instruction sequence, which is the maximum + * that PPC_LI64() can emit. + */ + for (i = ctx->idx - ctx_idx; i < 5; i++) + PPC_NOP(); + #ifdef PPC64_ELF_ABI_v1 - /* func points to the function descriptor */ - PPC_LI64(b2p[TMP_REG_2], func); - /* Load actual entry point from function descriptor */ - PPC_BPF_LL(b2p[TMP_REG_1], b2p[TMP_REG_2], 0); - /* ... and move it to LR */ - PPC_MTLR(b2p[TMP_REG_1]); /* * Load TOC from function descriptor at offset 8. * We can clobber r2 since we get called through a * function pointer (so caller will save/restore r2) * and since we don't use a TOC ourself. */ - PPC_BPF_LL(2, b2p[TMP_REG_2], 8); -#else - /* We can clobber r12 */ - PPC_FUNC_ADDR(12, func); - PPC_MTLR(12); + PPC_BPF_LL(2, 12, 8); + /* Load actual entry point from function descriptor */ + PPC_BPF_LL(12, 12, 0); #endif + + PPC_MTLR(12); PPC_BLRL(); } -- cgit v1.2.3 From 8484ce8306f941f921cfceaf357a28d3a014f178 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Thu, 24 May 2018 12:26:47 +0530 Subject: bpf: powerpc64: add JIT support for multi-function programs This adds support for bpf-to-bpf function calls in the powerpc64 JIT compiler. The JIT compiler converts the bpf call instructions to native branch instructions. After a round of the usual passes, the start addresses of the JITed images for the callee functions are known. Finally, to fixup the branch target addresses, we need to perform an extra pass. Because of the address range in which JITed images are allocated on powerpc64, the offsets of the start addresses of these images from __bpf_call_base are as large as 64 bits. So, for a function call, we cannot use the imm field of the instruction to determine the callee's address. Instead, we use the alternative method of getting it from the list of function addresses in the auxiliary data of the caller by using the off field as an index. Signed-off-by: Sandipan Das Signed-off-by: Daniel Borkmann --- arch/powerpc/net/bpf_jit_comp64.c | 76 +++++++++++++++++++++++++++++++++------ 1 file changed, 66 insertions(+), 10 deletions(-) (limited to 'arch/powerpc') diff --git a/arch/powerpc/net/bpf_jit_comp64.c b/arch/powerpc/net/bpf_jit_comp64.c index e4582744a31d..f1c95779843b 100644 --- a/arch/powerpc/net/bpf_jit_comp64.c +++ b/arch/powerpc/net/bpf_jit_comp64.c @@ -268,7 +268,7 @@ static void bpf_jit_emit_tail_call(u32 *image, struct codegen_context *ctx, u32 /* Assemble the body code between the prologue & epilogue */ static int bpf_jit_build_body(struct bpf_prog *fp, u32 *image, struct codegen_context *ctx, - u32 *addrs) + u32 *addrs, bool extra_pass) { const struct bpf_insn *insn = fp->insnsi; int flen = fp->len; @@ -724,11 +724,25 @@ emit_clear: break; /* - * Call kernel helper + * Call kernel helper or bpf function */ case BPF_JMP | BPF_CALL: ctx->seen |= SEEN_FUNC; - func = (u8 *) __bpf_call_base + imm; + + /* bpf function call */ + if (insn[i].src_reg == BPF_PSEUDO_CALL) + if (!extra_pass) + func = NULL; + else if (fp->aux->func && off < fp->aux->func_cnt) + /* use the subprog id from the off + * field to lookup the callee address + */ + func = (u8 *) fp->aux->func[off]->bpf_func; + else + return -EINVAL; + /* kernel helper call */ + else + func = (u8 *) __bpf_call_base + imm; bpf_jit_emit_func_call(image, ctx, (u64)func); @@ -876,6 +890,14 @@ cond_branch: return 0; } +struct powerpc64_jit_data { + struct bpf_binary_header *header; + u32 *addrs; + u8 *image; + u32 proglen; + struct codegen_context ctx; +}; + struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) { u32 proglen; @@ -883,6 +905,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) u8 *image = NULL; u32 *code_base; u32 *addrs; + struct powerpc64_jit_data *jit_data; struct codegen_context cgctx; int pass; int flen; @@ -890,6 +913,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) struct bpf_prog *org_fp = fp; struct bpf_prog *tmp_fp; bool bpf_blinded = false; + bool extra_pass = false; if (!fp->jit_requested) return org_fp; @@ -903,11 +927,32 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp = tmp_fp; } + jit_data = fp->aux->jit_data; + if (!jit_data) { + jit_data = kzalloc(sizeof(*jit_data), GFP_KERNEL); + if (!jit_data) { + fp = org_fp; + goto out; + } + fp->aux->jit_data = jit_data; + } + flen = fp->len; + addrs = jit_data->addrs; + if (addrs) { + cgctx = jit_data->ctx; + image = jit_data->image; + bpf_hdr = jit_data->header; + proglen = jit_data->proglen; + alloclen = proglen + FUNCTION_DESCR_SIZE; + extra_pass = true; + goto skip_init_ctx; + } + addrs = kzalloc((flen+1) * sizeof(*addrs), GFP_KERNEL); if (addrs == NULL) { fp = org_fp; - goto out; + goto out_addrs; } memset(&cgctx, 0, sizeof(struct codegen_context)); @@ -916,10 +961,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) cgctx.stack_size = round_up(fp->aux->stack_depth, 16); /* Scouting faux-generate pass 0 */ - if (bpf_jit_build_body(fp, 0, &cgctx, addrs)) { + if (bpf_jit_build_body(fp, 0, &cgctx, addrs, false)) { /* We hit something illegal or unsupported. */ fp = org_fp; - goto out; + goto out_addrs; } /* @@ -937,9 +982,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) bpf_jit_fill_ill_insns); if (!bpf_hdr) { fp = org_fp; - goto out; + goto out_addrs; } +skip_init_ctx: code_base = (u32 *)(image + FUNCTION_DESCR_SIZE); /* Code generation passes 1-2 */ @@ -947,7 +993,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) /* Now build the prologue, body code & epilogue for real. */ cgctx.idx = 0; bpf_jit_build_prologue(code_base, &cgctx); - bpf_jit_build_body(fp, code_base, &cgctx, addrs); + bpf_jit_build_body(fp, code_base, &cgctx, addrs, extra_pass); bpf_jit_build_epilogue(code_base, &cgctx); if (bpf_jit_enable > 1) @@ -973,10 +1019,20 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp) fp->jited_len = alloclen; bpf_flush_icache(bpf_hdr, (u8 *)bpf_hdr + (bpf_hdr->pages * PAGE_SIZE)); + if (!fp->is_func || extra_pass) { +out_addrs: + kfree(addrs); + kfree(jit_data); + fp->aux->jit_data = NULL; + } else { + jit_data->addrs = addrs; + jit_data->ctx = cgctx; + jit_data->proglen = proglen; + jit_data->image = image; + jit_data->header = bpf_hdr; + } out: - kfree(addrs); - if (bpf_blinded) bpf_jit_prog_release_other(fp, fp == org_fp ? tmp_fp : org_fp); -- cgit v1.2.3