From a9c5ad31fbdc4dec6d266fe22e51de1ad6d1bcf2 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 31 Aug 2022 08:26:52 -0700 Subject: bpf: x86: Support in-register struct arguments in trampoline programs In C, struct value can be passed as a function argument. For small structs, struct value may be passed in one or more registers. For trampoline based bpf programs, this would cause complication since one-to-one mapping between function argument and arch argument register is not valid any more. The latest llvm16 added bpf support to pass by values for struct up to 16 bytes ([1]). This is also true for x86_64 architecture where two registers will hold the struct value if the struct size is >8 and <= 16. This may not be true if one of struct member is 'double' type but in current linux source code we don't have such instance yet, so we assume all >8 && <= 16 struct holds two general purpose argument registers. Also change on-stack nr_args value to the number of registers holding the arguments. This will permit bpf_get_func_arg() helper to get all argument values. [1] https://reviews.llvm.org/D132144 Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220831152652.2078600-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 68 +++++++++++++++++++++++++++++++++------------ 1 file changed, 51 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index c1f6c1c51d99..ae89f4143eb4 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1751,34 +1751,60 @@ emit_jmp: static void save_regs(const struct btf_func_model *m, u8 **prog, int nr_args, int stack_size) { - int i; + int i, j, arg_size, nr_regs; /* Store function arguments to stack. * For a function that accepts two pointers the sequence will be: * mov QWORD PTR [rbp-0x10],rdi * mov QWORD PTR [rbp-0x8],rsi */ - for (i = 0; i < min(nr_args, 6); i++) - emit_stx(prog, bytes_to_bpf_size(m->arg_size[i]), - BPF_REG_FP, - i == 5 ? X86_REG_R9 : BPF_REG_1 + i, - -(stack_size - i * 8)); + for (i = 0, j = 0; i < min(nr_args, 6); i++) { + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) { + nr_regs = (m->arg_size[i] + 7) / 8; + arg_size = 8; + } else { + nr_regs = 1; + arg_size = m->arg_size[i]; + } + + while (nr_regs) { + emit_stx(prog, bytes_to_bpf_size(arg_size), + BPF_REG_FP, + j == 5 ? X86_REG_R9 : BPF_REG_1 + j, + -(stack_size - j * 8)); + nr_regs--; + j++; + } + } } static void restore_regs(const struct btf_func_model *m, u8 **prog, int nr_args, int stack_size) { - int i; + int i, j, arg_size, nr_regs; /* Restore function arguments from stack. * For a function that accepts two pointers the sequence will be: * EMIT4(0x48, 0x8B, 0x7D, 0xF0); mov rdi,QWORD PTR [rbp-0x10] * EMIT4(0x48, 0x8B, 0x75, 0xF8); mov rsi,QWORD PTR [rbp-0x8] */ - for (i = 0; i < min(nr_args, 6); i++) - emit_ldx(prog, bytes_to_bpf_size(m->arg_size[i]), - i == 5 ? X86_REG_R9 : BPF_REG_1 + i, - BPF_REG_FP, - -(stack_size - i * 8)); + for (i = 0, j = 0; i < min(nr_args, 6); i++) { + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) { + nr_regs = (m->arg_size[i] + 7) / 8; + arg_size = 8; + } else { + nr_regs = 1; + arg_size = m->arg_size[i]; + } + + while (nr_regs) { + emit_ldx(prog, bytes_to_bpf_size(arg_size), + j == 5 ? X86_REG_R9 : BPF_REG_1 + j, + BPF_REG_FP, + -(stack_size - j * 8)); + nr_regs--; + j++; + } + } } static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, @@ -2015,7 +2041,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i struct bpf_tramp_links *tlinks, void *orig_call) { - int ret, i, nr_args = m->nr_args; + int ret, i, nr_args = m->nr_args, extra_nregs = 0; int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; @@ -2028,6 +2054,14 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (nr_args > 6) return -ENOTSUPP; + for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) + extra_nregs += (m->arg_size[i] + 7) / 8 - 1; + } + if (nr_args + extra_nregs > 6) + return -ENOTSUPP; + stack_size += extra_nregs * 8; + /* Generated trampoline stack layout: * * RBP + 8 [ return address ] @@ -2040,7 +2074,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i * [ ... ] * RBP - regs_off [ reg_arg1 ] program's ctx pointer * - * RBP - args_off [ args count ] always + * RBP - args_off [ arg regs count ] always * * RBP - ip_off [ traced function ] BPF_TRAMP_F_IP_ARG flag * @@ -2083,11 +2117,11 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i EMIT4(0x48, 0x83, 0xEC, stack_size); /* sub rsp, stack_size */ EMIT1(0x53); /* push rbx */ - /* Store number of arguments of the traced function: - * mov rax, nr_args + /* Store number of argument registers of the traced function: + * mov rax, nr_args + extra_nregs * mov QWORD PTR [rbp - args_off], rax */ - emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args); + emit_mov_imm64(&prog, BPF_REG_0, 0, (u32) nr_args + extra_nregs); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -args_off); if (flags & BPF_TRAMP_F_IP_ARG) { -- cgit v1.2.3 From eb707dde264af5eb0271156d7fbd59133fa02cac Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Wed, 31 Aug 2022 08:27:02 -0700 Subject: bpf: arm64: No support of struct argument in trampoline programs ARM64 does not support struct argument for trampoline based bpf programs yet. Signed-off-by: Yonghong Song Link: https://lore.kernel.org/r/20220831152702.2079066-1-yhs@fb.com Signed-off-by: Alexei Starovoitov --- arch/arm64/net/bpf_jit_comp.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/arm64/net/bpf_jit_comp.c b/arch/arm64/net/bpf_jit_comp.c index 389623ae5a91..30f76178608b 100644 --- a/arch/arm64/net/bpf_jit_comp.c +++ b/arch/arm64/net/bpf_jit_comp.c @@ -1970,7 +1970,7 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, u32 flags, struct bpf_tramp_links *tlinks, void *orig_call) { - int ret; + int i, ret; int nargs = m->nr_args; int max_insns = ((long)image_end - (long)image) / AARCH64_INSN_SIZE; struct jit_ctx ctx = { @@ -1982,6 +1982,12 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, if (nargs > 8) return -ENOTSUPP; + /* don't support struct argument */ + for (i = 0; i < MAX_BPF_FUNC_ARGS; i++) { + if (m->arg_flags[i] & BTF_FMODEL_STRUCT_ARG) + return -ENOTSUPP; + } + ret = prepare_trampoline(&ctx, im, tlinks, orig_call, nargs, flags); if (ret < 0) return ret; -- cgit v1.2.3 From ceea991a019c57a1fb0edd12a5f836a0fa431aee Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Sat, 3 Sep 2022 15:11:54 +0200 Subject: bpf: Move bpf_dispatcher function out of ftrace locations The dispatcher function is attached/detached to trampoline by dispatcher update function. At the same time it's available as ftrace attachable function. After discussion [1] the proposed solution is to use compiler attributes to alter bpf_dispatcher_##name##_func function: - remove it from being instrumented with __no_instrument_function__ attribute, so ftrace has no track of it - but still generate 5 nop instructions with patchable_function_entry(5) attribute, which are expected by bpf_arch_text_poke used by dispatcher update function Enabling HAVE_DYNAMIC_FTRACE_NO_PATCHABLE option for x86, so __patchable_function_entries functions are not part of ftrace/mcount locations. Adding attributes to bpf_dispatcher_XXX function on x86_64 so it's kept out of ftrace locations and has 5 byte nop generated at entry. These attributes need to be arch specific as pointed out by Ilya Leoshkevic in here [2]. The dispatcher image is generated only for x86_64 arch, so the code can stay as is for other archs. [1] https://lore.kernel.org/bpf/20220722110811.124515-1-jolsa@kernel.org/ [2] https://lore.kernel.org/bpf/969a14281a7791c334d476825863ee449964dd0c.camel@linux.ibm.com/ Suggested-by: Peter Zijlstra (Intel) Signed-off-by: Jiri Olsa Signed-off-by: Daniel Borkmann Acked-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/bpf/20220903131154.420467-3-jolsa@kernel.org --- arch/x86/Kconfig | 1 + include/linux/bpf.h | 7 +++++++ 2 files changed, 8 insertions(+) (limited to 'arch') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index f9920f1341c8..089c20cefd2b 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -284,6 +284,7 @@ config X86 select PROC_PID_ARCH_STATUS if PROC_FS select HAVE_ARCH_NODE_DEV_GROUP if X86_SGX imply IMA_SECURE_AND_OR_TRUSTED_BOOT if EFI + select HAVE_DYNAMIC_FTRACE_NO_PATCHABLE config INSTRUCTION_DECODER def_bool y diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 54178b9e9c3a..e0dbe0c0a17e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -977,7 +977,14 @@ int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); }, \ } +#ifdef CONFIG_X86_64 +#define BPF_DISPATCHER_ATTRIBUTES __attribute__((patchable_function_entry(5))) +#else +#define BPF_DISPATCHER_ATTRIBUTES +#endif + #define DEFINE_BPF_DISPATCHER(name) \ + notrace BPF_DISPATCHER_ATTRIBUTES \ noinline __nocfi unsigned int bpf_dispatcher_##name##_func( \ const void *ctx, \ const struct bpf_insn *insnsi, \ -- cgit v1.2.3 From 4d854f4f31ec4b317dfe316111ddac0fab81f735 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Mon, 26 Sep 2022 17:33:37 +0200 Subject: bpf: Use given function address for trampoline ip arg Using function address given at the generation time as the trampoline ip argument. This way we get directly the function address that we need, so we don't need to: - read the ip from the stack - subtract X86_PATCH_SIZE - subtract ENDBR_INSN_SIZE if CONFIG_X86_KERNEL_IBT is enabled which is not even implemented yet ;-) Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20220926153340.1621984-4-jolsa@kernel.org Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index ae89f4143eb4..d4a6183197e9 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -662,7 +662,7 @@ static void emit_mov_imm64(u8 **pprog, u32 dst_reg, */ emit_mov_imm32(&prog, false, dst_reg, imm32_lo); } else { - /* movabsq %rax, imm64 */ + /* movabsq rax, imm64 */ EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); EMIT(imm32_lo, 4); EMIT(imm32_hi, 4); @@ -2039,13 +2039,14 @@ static int invoke_bpf_mod_ret(const struct btf_func_model *m, u8 **pprog, int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *image_end, const struct btf_func_model *m, u32 flags, struct bpf_tramp_links *tlinks, - void *orig_call) + void *func_addr) { int ret, i, nr_args = m->nr_args, extra_nregs = 0; int regs_off, ip_off, args_off, stack_size = nr_args * 8, run_ctx_off; struct bpf_tramp_links *fentry = &tlinks[BPF_TRAMP_FENTRY]; struct bpf_tramp_links *fexit = &tlinks[BPF_TRAMP_FEXIT]; struct bpf_tramp_links *fmod_ret = &tlinks[BPF_TRAMP_MODIFY_RETURN]; + void *orig_call = func_addr; u8 **branches = NULL; u8 *prog; bool save_ret; @@ -2126,12 +2127,10 @@ int arch_prepare_bpf_trampoline(struct bpf_tramp_image *im, void *image, void *i if (flags & BPF_TRAMP_F_IP_ARG) { /* Store IP address of the traced function: - * mov rax, QWORD PTR [rbp + 8] - * sub rax, X86_PATCH_SIZE + * movabsq rax, func_addr * mov QWORD PTR [rbp - ip_off], rax */ - emit_ldx(&prog, BPF_DW, BPF_REG_0, BPF_REG_FP, 8); - EMIT4(0x48, 0x83, 0xe8, X86_PATCH_SIZE); + emit_mov_imm64(&prog, BPF_REG_0, (long) func_addr >> 32, (u32) (long) func_addr); emit_stx(&prog, BPF_DW, BPF_REG_FP, BPF_REG_0, -ip_off); } -- cgit v1.2.3 From 19c02415da2345d0dda2b5c4495bc17cc14b18b5 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 26 Sep 2022 11:47:38 -0700 Subject: bpf: use bpf_prog_pack for bpf_dispatcher Allocate bpf_dispatcher with bpf_prog_pack_alloc so that bpf_dispatcher can share pages with bpf programs. arch_prepare_bpf_dispatcher() is updated to provide a RW buffer as working area for arch code to write to. This also fixes CPA W^X warnning like: CPA refuse W^X violation: 8000000000000163 -> 0000000000000163 range: ... Signed-off-by: Song Liu Link: https://lore.kernel.org/r/20220926184739.3512547-2-song@kernel.org Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 16 ++++++++-------- include/linux/bpf.h | 3 ++- include/linux/filter.h | 5 +++++ kernel/bpf/core.c | 9 +++++++-- kernel/bpf/dispatcher.c | 27 +++++++++++++++++++++------ 5 files changed, 43 insertions(+), 17 deletions(-) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index d4a6183197e9..35796db58116 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -2242,7 +2242,7 @@ cleanup: return ret; } -static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) +static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs, u8 *image, u8 *buf) { u8 *jg_reloc, *prog = *pprog; int pivot, err, jg_bytes = 1; @@ -2258,12 +2258,12 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a]); err = emit_cond_near_jump(&prog, /* je func */ - (void *)progs[a], prog, + (void *)progs[a], image + (prog - buf), X86_JE); if (err) return err; - emit_indirect_jump(&prog, 2 /* rdx */, prog); + emit_indirect_jump(&prog, 2 /* rdx */, image + (prog - buf)); *pprog = prog; return 0; @@ -2288,7 +2288,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) jg_reloc = prog; err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */ - progs); + progs, image, buf); if (err) return err; @@ -2302,7 +2302,7 @@ static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */ - b, progs); + b, progs, image, buf); if (err) return err; @@ -2322,12 +2322,12 @@ static int cmp_ips(const void *a, const void *b) return 0; } -int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) +int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs) { - u8 *prog = image; + u8 *prog = buf; sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL); - return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs); + return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs, image, buf); } struct x64_jit_data { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index edd43edb27d6..9ae155c75014 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -946,6 +946,7 @@ struct bpf_dispatcher { struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX]; int num_progs; void *image; + void *rw_image; u32 image_off; struct bpf_ksym ksym; }; @@ -964,7 +965,7 @@ int bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, struct bpf_trampolin struct bpf_trampoline *bpf_trampoline_get(u64 key, struct bpf_attach_target_info *tgt_info); void bpf_trampoline_put(struct bpf_trampoline *tr); -int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs); +int arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs); #define BPF_DISPATCHER_INIT(_name) { \ .mutex = __MUTEX_INITIALIZER(_name.mutex), \ .func = &_name##_func, \ diff --git a/include/linux/filter.h b/include/linux/filter.h index 98e28126c24b..efc42a6e3aed 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1023,6 +1023,8 @@ extern long bpf_jit_limit_max; typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); +void bpf_jit_fill_hole_with_zero(void *area, unsigned int size); + struct bpf_binary_header * bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, unsigned int alignment, @@ -1035,6 +1037,9 @@ void bpf_jit_free(struct bpf_prog *fp); struct bpf_binary_header * bpf_jit_binary_pack_hdr(const struct bpf_prog *fp); +void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns); +void bpf_prog_pack_free(struct bpf_binary_header *hdr); + static inline bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp) { return list_empty(&fp->aux->ksym.lnode) || diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index d1be78c28619..711fd293b6de 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -825,6 +825,11 @@ struct bpf_prog_pack { unsigned long bitmap[]; }; +void bpf_jit_fill_hole_with_zero(void *area, unsigned int size) +{ + memset(area, 0, size); +} + #define BPF_PROG_SIZE_TO_NBITS(size) (round_up(size, BPF_PROG_CHUNK_SIZE) / BPF_PROG_CHUNK_SIZE) static DEFINE_MUTEX(pack_mutex); @@ -864,7 +869,7 @@ static struct bpf_prog_pack *alloc_new_pack(bpf_jit_fill_hole_t bpf_fill_ill_ins return pack; } -static void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) +void *bpf_prog_pack_alloc(u32 size, bpf_jit_fill_hole_t bpf_fill_ill_insns) { unsigned int nbits = BPF_PROG_SIZE_TO_NBITS(size); struct bpf_prog_pack *pack; @@ -905,7 +910,7 @@ out: return ptr; } -static void bpf_prog_pack_free(struct bpf_binary_header *hdr) +void bpf_prog_pack_free(struct bpf_binary_header *hdr) { struct bpf_prog_pack *pack = NULL, *tmp; unsigned int nbits; diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c index 2444bd15cc2d..fa64b80b8bca 100644 --- a/kernel/bpf/dispatcher.c +++ b/kernel/bpf/dispatcher.c @@ -85,12 +85,12 @@ static bool bpf_dispatcher_remove_prog(struct bpf_dispatcher *d, return false; } -int __weak arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) +int __weak arch_prepare_bpf_dispatcher(void *image, void *buf, s64 *funcs, int num_funcs) { return -ENOTSUPP; } -static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image) +static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image, void *buf) { s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; int i; @@ -99,12 +99,12 @@ static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image) if (d->progs[i].prog) *ipsp++ = (s64)(uintptr_t)d->progs[i].prog->bpf_func; } - return arch_prepare_bpf_dispatcher(image, &ips[0], d->num_progs); + return arch_prepare_bpf_dispatcher(image, buf, &ips[0], d->num_progs); } static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) { - void *old, *new; + void *old, *new, *tmp; u32 noff; int err; @@ -117,8 +117,14 @@ static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) } new = d->num_progs ? d->image + noff : NULL; + tmp = d->num_progs ? d->rw_image + noff : NULL; if (new) { - if (bpf_dispatcher_prepare(d, new)) + /* Prepare the dispatcher in d->rw_image. Then use + * bpf_arch_text_copy to update d->image, which is RO+X. + */ + if (bpf_dispatcher_prepare(d, new, tmp)) + return; + if (IS_ERR(bpf_arch_text_copy(new, tmp, PAGE_SIZE / 2))) return; } @@ -140,9 +146,18 @@ void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, mutex_lock(&d->mutex); if (!d->image) { - d->image = bpf_jit_alloc_exec_page(); + d->image = bpf_prog_pack_alloc(PAGE_SIZE, bpf_jit_fill_hole_with_zero); if (!d->image) goto out; + d->rw_image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!d->rw_image) { + u32 size = PAGE_SIZE; + + bpf_arch_text_copy(d->image, &size, sizeof(size)); + bpf_prog_pack_free((struct bpf_binary_header *)d->image); + d->image = NULL; + goto out; + } bpf_image_ksym_add(d->image, &d->ksym); } -- cgit v1.2.3 From 64696c40d03c01e0ea2e3e9aa1c490a7b6a1b6be Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 29 Sep 2022 00:04:03 -0700 Subject: bpf: Add __bpf_prog_{enter,exit}_struct_ops for struct_ops trampoline The struct_ops prog is to allow using bpf to implement the functions in a struct (eg. kernel module). The current usage is to implement the tcp_congestion. The kernel does not call the tcp-cc's ops (ie. the bpf prog) in a recursive way. The struct_ops is sharing the tracing-trampoline's enter/exit function which tracks prog->active to avoid recursion. It is needed for tracing prog. However, it turns out the struct_ops bpf prog will hit this prog->active and unnecessarily skipped running the struct_ops prog. eg. The '.ssthresh' may run in_task() and then interrupted by softirq that runs the same '.ssthresh'. Skip running the '.ssthresh' will end up returning random value to the caller. The patch adds __bpf_prog_{enter,exit}_struct_ops for the struct_ops trampoline. They do not track the prog->active to detect recursion. One exception is when the tcp_congestion's '.init' ops is doing bpf_setsockopt(TCP_CONGESTION) and then recurs to the same '.init' ops. This will be addressed in the following patches. Fixes: ca06f55b9002 ("bpf: Add per-program recursion prevention mechanism") Signed-off-by: Martin KaFai Lau Link: https://lore.kernel.org/r/20220929070407.965581-2-martin.lau@linux.dev Signed-off-by: Alexei Starovoitov --- arch/x86/net/bpf_jit_comp.c | 3 +++ include/linux/bpf.h | 4 ++++ kernel/bpf/trampoline.c | 23 +++++++++++++++++++++++ 3 files changed, 30 insertions(+) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 35796db58116..5b6230779cf3 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -1836,6 +1836,9 @@ static int invoke_bpf_prog(const struct btf_func_model *m, u8 **pprog, if (p->aux->sleepable) { enter = __bpf_prog_enter_sleepable; exit = __bpf_prog_exit_sleepable; + } else if (p->type == BPF_PROG_TYPE_STRUCT_OPS) { + enter = __bpf_prog_enter_struct_ops; + exit = __bpf_prog_exit_struct_ops; } else if (p->expected_attach_type == BPF_LSM_CGROUP) { enter = __bpf_prog_enter_lsm_cgroup; exit = __bpf_prog_exit_lsm_cgroup; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0f3eaf3ed98c..9e7d46d16032 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -864,6 +864,10 @@ u64 notrace __bpf_prog_enter_lsm_cgroup(struct bpf_prog *prog, struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_prog_exit_lsm_cgroup(struct bpf_prog *prog, u64 start, struct bpf_tramp_run_ctx *run_ctx); +u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx); +void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx); void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr); void notrace __bpf_tramp_exit(struct bpf_tramp_image *tr); diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 6f7b939321d6..bf0906e1e2b9 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -964,6 +964,29 @@ void notrace __bpf_prog_exit_sleepable(struct bpf_prog *prog, u64 start, rcu_read_unlock_trace(); } +u64 notrace __bpf_prog_enter_struct_ops(struct bpf_prog *prog, + struct bpf_tramp_run_ctx *run_ctx) + __acquires(RCU) +{ + rcu_read_lock(); + migrate_disable(); + + run_ctx->saved_run_ctx = bpf_set_run_ctx(&run_ctx->run_ctx); + + return bpf_prog_start_time(); +} + +void notrace __bpf_prog_exit_struct_ops(struct bpf_prog *prog, u64 start, + struct bpf_tramp_run_ctx *run_ctx) + __releases(RCU) +{ + bpf_reset_run_ctx(run_ctx->saved_run_ctx); + + update_prog_stats(prog, start); + migrate_enable(); + rcu_read_unlock(); +} + void notrace __bpf_tramp_enter(struct bpf_tramp_image *tr) { percpu_ref_get(&tr->pcref); -- cgit v1.2.3