diff options
110 files changed, 6901 insertions, 2721 deletions
diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 97dc386e3cb8..cc29869d12a3 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -1260,12 +1260,9 @@ static inline void emit_push_r64(const s8 src[], struct jit_ctx *ctx) static void build_prologue(struct jit_ctx *ctx) { - const s8 r0 = bpf2a32[BPF_REG_0][1]; - const s8 r2 = bpf2a32[BPF_REG_1][1]; - const s8 r3 = bpf2a32[BPF_REG_1][0]; - const s8 r4 = bpf2a32[BPF_REG_6][1]; - const s8 fplo = bpf2a32[BPF_REG_FP][1]; - const s8 fphi = bpf2a32[BPF_REG_FP][0]; + const s8 arm_r0 = bpf2a32[BPF_REG_0][1]; + const s8 *bpf_r1 = bpf2a32[BPF_REG_1]; + const s8 *bpf_fp = bpf2a32[BPF_REG_FP]; const s8 *tcc = bpf2a32[TCALL_CNT]; /* Save callee saved registers. */ @@ -1278,8 +1275,10 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_PUSH(CALLEE_PUSH_MASK), ctx); emit(ARM_MOV_R(ARM_FP, ARM_SP), ctx); #endif - /* Save frame pointer for later */ - emit(ARM_SUB_I(ARM_IP, ARM_SP, SCRATCH_SIZE), ctx); + /* mov r3, #0 */ + /* sub r2, sp, #SCRATCH_SIZE */ + emit(ARM_MOV_I(bpf_r1[0], 0), ctx); + emit(ARM_SUB_I(bpf_r1[1], ARM_SP, SCRATCH_SIZE), ctx); ctx->stack_size = imm8m(STACK_SIZE); @@ -1287,18 +1286,15 @@ static void build_prologue(struct jit_ctx *ctx) emit(ARM_SUB_I(ARM_SP, ARM_SP, ctx->stack_size), ctx); /* Set up BPF prog stack base register */ - emit_a32_mov_r(fplo, ARM_IP, ctx); - emit_a32_mov_i(fphi, 0, ctx); + emit_a32_mov_r64(true, bpf_fp, bpf_r1, ctx); - /* mov r4, 0 */ - emit(ARM_MOV_I(r4, 0), ctx); + /* Initialize Tail Count */ + emit(ARM_MOV_I(bpf_r1[1], 0), ctx); + emit_a32_mov_r64(true, tcc, bpf_r1, ctx); /* Move BPF_CTX to BPF_R1 */ - emit(ARM_MOV_R(r3, r4), ctx); - emit(ARM_MOV_R(r2, r0), ctx); - /* Initialize Tail Count */ - emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[0])), ctx); - emit(ARM_STR_I(r4, ARM_FP, EBPF_SCRATCH_TO_ARM_FP(tcc[1])), ctx); + emit(ARM_MOV_R(bpf_r1[1], arm_r0), ctx); + /* end of prologue */ } diff --git a/arch/arm64/Kconfig b/arch/arm64/Kconfig index b1b4476ddb83..29d03459de20 100644 --- a/arch/arm64/Kconfig +++ b/arch/arm64/Kconfig @@ -69,6 +69,7 @@ config ARM64 select ARCH_SUPPORTS_INT128 if CC_HAS_INT128 && (GCC_VERSION >= 50000 || CC_IS_CLANG) select ARCH_SUPPORTS_NUMA_BALANCING select ARCH_WANT_COMPAT_IPC_PARSE_VERSION if COMPAT + select ARCH_WANT_DEFAULT_BPF_JIT select ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT select ARCH_WANT_FRAME_POINTERS select ARCH_WANT_HUGE_PMD_SHARE if ARM64_4K_PAGES || (ARM64_16K_PAGES && !ARM64_VA_BITS_36) diff --git a/arch/riscv/include/asm/perf_event.h b/arch/riscv/include/asm/perf_event.h index aefbfaa6a781..0234048b12bc 100644 --- a/arch/riscv/include/asm/perf_event.h +++ b/arch/riscv/include/asm/perf_event.h @@ -82,4 +82,8 @@ struct riscv_pmu { int irq; }; +#ifdef CONFIG_PERF_EVENTS +#define perf_arch_bpf_user_pt_regs(regs) (struct user_regs_struct *)regs +#endif + #endif /* _ASM_RISCV_PERF_EVENT_H */ diff --git a/arch/riscv/include/asm/pgtable.h b/arch/riscv/include/asm/pgtable.h index 36ae01761352..f66b87314fa2 100644 --- a/arch/riscv/include/asm/pgtable.h +++ b/arch/riscv/include/asm/pgtable.h @@ -94,6 +94,10 @@ extern pgd_t swapper_pg_dir[]; #define VMALLOC_END (PAGE_OFFSET - 1) #define VMALLOC_START (PAGE_OFFSET - VMALLOC_SIZE) +#define BPF_JIT_REGION_SIZE (SZ_128M) +#define BPF_JIT_REGION_START (PAGE_OFFSET - BPF_JIT_REGION_SIZE) +#define BPF_JIT_REGION_END (VMALLOC_END) + /* * Roughly size the vmemmap space to be large enough to fit enough * struct pages to map half the virtual address space. Then diff --git a/arch/riscv/include/uapi/asm/bpf_perf_event.h b/arch/riscv/include/uapi/asm/bpf_perf_event.h new file mode 100644 index 000000000000..6cb1c2823288 --- /dev/null +++ b/arch/riscv/include/uapi/asm/bpf_perf_event.h @@ -0,0 +1,9 @@ +/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ +#ifndef _UAPI__ASM_BPF_PERF_EVENT_H__ +#define _UAPI__ASM_BPF_PERF_EVENT_H__ + +#include <asm/ptrace.h> + +typedef struct user_regs_struct bpf_user_pt_regs_t; + +#endif /* _UAPI__ASM_BPF_PERF_EVENT_H__ */ diff --git a/arch/riscv/net/bpf_jit_comp.c b/arch/riscv/net/bpf_jit_comp.c index 7fbf56aab661..483f4ad7f4dc 100644 --- a/arch/riscv/net/bpf_jit_comp.c +++ b/arch/riscv/net/bpf_jit_comp.c @@ -120,6 +120,11 @@ static bool seen_reg(int reg, struct rv_jit_context *ctx) return false; } +static void mark_fp(struct rv_jit_context *ctx) +{ + __set_bit(RV_CTX_F_SEEN_S5, &ctx->flags); +} + static void mark_call(struct rv_jit_context *ctx) { __set_bit(RV_CTX_F_SEEN_CALL, &ctx->flags); @@ -456,6 +461,11 @@ static u32 rv_amoadd_d(u8 rd, u8 rs2, u8 rs1, u8 aq, u8 rl) return rv_amo_insn(0, aq, rl, rs2, rs1, 3, rd, 0x2f); } +static u32 rv_auipc(u8 rd, u32 imm31_12) +{ + return rv_u_insn(imm31_12, rd, 0x17); +} + static bool is_12b_int(s64 val) { return -(1 << 11) <= val && val < (1 << 11); @@ -479,27 +489,7 @@ static bool is_32b_int(s64 val) static int is_12b_check(int off, int insn) { if (!is_12b_int(off)) { - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", - insn, (int)off); - return -1; - } - return 0; -} - -static int is_13b_check(int off, int insn) -{ - if (!is_13b_int(off)) { - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", - insn, (int)off); - return -1; - } - return 0; -} - -static int is_21b_check(int off, int insn) -{ - if (!is_21b_int(off)) { - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", + pr_err("bpf-jit: insn=%d 12b < offset=%d not supported yet!\n", insn, (int)off); return -1; } @@ -545,10 +535,13 @@ static void emit_imm(u8 rd, s64 val, struct rv_jit_context *ctx) emit(rv_addi(rd, rd, lower), ctx); } -static int rv_offset(int bpf_to, int bpf_from, struct rv_jit_context *ctx) +static int rv_offset(int insn, int off, struct rv_jit_context *ctx) { - int from = ctx->offset[bpf_from] - 1, to = ctx->offset[bpf_to]; + int from, to; + off++; /* BPF branch is from PC+1, RV is from PC */ + from = (insn > 0) ? ctx->offset[insn - 1] : 0; + to = (insn + off > 0) ? ctx->offset[insn + off - 1] : 0; return (to - from) << 2; } @@ -559,7 +552,7 @@ static int epilogue_offset(struct rv_jit_context *ctx) return (to - from) << 2; } -static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) +static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx) { int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 8; @@ -596,8 +589,114 @@ static void __build_epilogue(u8 reg, struct rv_jit_context *ctx) emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx); /* Set return value. */ - emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); - emit(rv_jalr(RV_REG_ZERO, reg, 0), ctx); + if (!is_tail_call) + emit(rv_addi(RV_REG_A0, RV_REG_A5, 0), ctx); + emit(rv_jalr(RV_REG_ZERO, is_tail_call ? RV_REG_T3 : RV_REG_RA, + is_tail_call ? 4 : 0), /* skip TCC init */ + ctx); +} + +/* return -1 or inverted cond */ +static int invert_bpf_cond(u8 cond) +{ + switch (cond) { + case BPF_JEQ: + return BPF_JNE; + case BPF_JGT: + return BPF_JLE; + case BPF_JLT: + return BPF_JGE; + case BPF_JGE: + return BPF_JLT; + case BPF_JLE: + return BPF_JGT; + case BPF_JNE: + return BPF_JEQ; + case BPF_JSGT: + return BPF_JSLE; + case BPF_JSLT: + return BPF_JSGE; + case BPF_JSGE: + return BPF_JSLT; + case BPF_JSLE: + return BPF_JSGT; + } + return -1; +} + +static void emit_bcc(u8 cond, u8 rd, u8 rs, int rvoff, + struct rv_jit_context *ctx) +{ + switch (cond) { + case BPF_JEQ: + emit(rv_beq(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JGT: + emit(rv_bltu(rs, rd, rvoff >> 1), ctx); + return; + case BPF_JLT: + emit(rv_bltu(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JGE: + emit(rv_bgeu(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JLE: + emit(rv_bgeu(rs, rd, rvoff >> 1), ctx); + return; + case BPF_JNE: + emit(rv_bne(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JSGT: + emit(rv_blt(rs, rd, rvoff >> 1), ctx); + return; + case BPF_JSLT: + emit(rv_blt(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JSGE: + emit(rv_bge(rd, rs, rvoff >> 1), ctx); + return; + case BPF_JSLE: + emit(rv_bge(rs, rd, rvoff >> 1), ctx); + } +} + +static void emit_branch(u8 cond, u8 rd, u8 rs, int rvoff, + struct rv_jit_context *ctx) +{ + s64 upper, lower; + + if (is_13b_int(rvoff)) { + emit_bcc(cond, rd, rs, rvoff, ctx); + return; + } + + /* Adjust for jal */ + rvoff -= 4; + + /* Transform, e.g.: + * bne rd,rs,foo + * to + * beq rd,rs,<.L1> + * (auipc foo) + * jal(r) foo + * .L1 + */ + cond = invert_bpf_cond(cond); + if (is_21b_int(rvoff)) { + emit_bcc(cond, rd, rs, 8, ctx); + emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); + return; + } + + /* 32b No need for an additional rvoff adjustment, since we + * get that from the auipc at PC', where PC = PC' + 4. + */ + upper = (rvoff + (1 << 11)) >> 12; + lower = rvoff & 0xfff; + + emit_bcc(cond, rd, rs, 12, ctx); + emit(rv_auipc(RV_REG_T1, upper), ctx); + emit(rv_jalr(RV_REG_ZERO, RV_REG_T1, lower), ctx); } static void emit_zext_32(u8 reg, struct rv_jit_context *ctx) @@ -627,18 +726,14 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) return -1; emit(rv_lwu(RV_REG_T1, off, RV_REG_A1), ctx); off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; - if (is_13b_check(off, insn)) - return -1; - emit(rv_bgeu(RV_REG_A2, RV_REG_T1, off >> 1), ctx); + emit_branch(BPF_JGE, RV_REG_A2, RV_REG_T1, off, ctx); /* if (TCC-- < 0) * goto out; */ emit(rv_addi(RV_REG_T1, tcc, -1), ctx); off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; - if (is_13b_check(off, insn)) - return -1; - emit(rv_blt(tcc, RV_REG_ZERO, off >> 1), ctx); + emit_branch(BPF_JSLT, tcc, RV_REG_ZERO, off, ctx); /* prog = array->ptrs[index]; * if (!prog) @@ -651,18 +746,15 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx) return -1; emit(rv_ld(RV_REG_T2, off, RV_REG_T2), ctx); off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2; - if (is_13b_check(off, insn)) - return -1; - emit(rv_beq(RV_REG_T2, RV_REG_ZERO, off >> 1), ctx); + emit_branch(BPF_JEQ, RV_REG_T2, RV_REG_ZERO, off, ctx); /* goto *(prog->bpf_func + 4); */ off = offsetof(struct bpf_prog, bpf_func); if (is_12b_check(off, insn)) return -1; emit(rv_ld(RV_REG_T3, off, RV_REG_T2), ctx); - emit(rv_addi(RV_REG_T3, RV_REG_T3, 4), ctx); emit(rv_addi(RV_REG_TCC, RV_REG_T1, 0), ctx); - __build_epilogue(RV_REG_T3, ctx); + __build_epilogue(true, ctx); return 0; } @@ -687,13 +779,6 @@ static void init_regs(u8 *rd, u8 *rs, const struct bpf_insn *insn, *rs = bpf_to_rv_reg(insn->src_reg, ctx); } -static int rv_offset_check(int *rvoff, s16 off, int insn, - struct rv_jit_context *ctx) -{ - *rvoff = rv_offset(insn + off, insn, ctx); - return is_13b_check(*rvoff, insn); -} - static void emit_zext_32_rd_rs(u8 *rd, u8 *rs, struct rv_jit_context *ctx) { emit(rv_addi(RV_REG_T2, *rd, 0), ctx); @@ -726,13 +811,57 @@ static void emit_sext_32_rd(u8 *rd, struct rv_jit_context *ctx) *rd = RV_REG_T2; } +static void emit_jump_and_link(u8 rd, s64 rvoff, bool force_jalr, + struct rv_jit_context *ctx) +{ + s64 upper, lower; + + if (rvoff && is_21b_int(rvoff) && !force_jalr) { + emit(rv_jal(rd, rvoff >> 1), ctx); + return; + } + + upper = (rvoff + (1 << 11)) >> 12; + lower = rvoff & 0xfff; + emit(rv_auipc(RV_REG_T1, upper), ctx); + emit(rv_jalr(rd, RV_REG_T1, lower), ctx); +} + +static bool is_signed_bpf_cond(u8 cond) +{ + return cond == BPF_JSGT || cond == BPF_JSLT || + cond == BPF_JSGE || cond == BPF_JSLE; +} + +static int emit_call(bool fixed, u64 addr, struct rv_jit_context *ctx) +{ + s64 off = 0; + u64 ip; + u8 rd; + + if (addr && ctx->insns) { + ip = (u64)(long)(ctx->insns + ctx->ninsns); + off = addr - ip; + if (!is_32b_int(off)) { + pr_err("bpf-jit: target call addr %pK is out of range\n", + (void *)addr); + return -ERANGE; + } + } + + emit_jump_and_link(RV_REG_RA, off, !fixed, ctx); + rd = bpf_to_rv_reg(BPF_REG_0, ctx); + emit(rv_addi(rd, RV_REG_A0, 0), ctx); + return 0; +} + static int emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx, bool extra_pass) { bool is64 = BPF_CLASS(insn->code) == BPF_ALU64 || BPF_CLASS(insn->code) == BPF_JMP; + int s, e, rvoff, i = insn - ctx->prog->insnsi; struct bpf_prog_aux *aux = ctx->prog->aux; - int rvoff, i = insn - ctx->prog->insnsi; u8 rd = -1, rs = -1, code = insn->code; s16 off = insn->off; s32 imm = insn->imm; @@ -1000,214 +1129,110 @@ out_be: /* JUMP off */ case BPF_JMP | BPF_JA: - rvoff = rv_offset(i + off, i, ctx); - if (!is_21b_int(rvoff)) { - pr_err("bpf-jit: insn=%d offset=%d not supported yet!\n", - i, rvoff); - return -1; - } - - emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); + rvoff = rv_offset(i, off, ctx); + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); break; /* IF (dst COND src) JUMP off */ case BPF_JMP | BPF_JEQ | BPF_X: case BPF_JMP32 | BPF_JEQ | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_beq(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JGT | BPF_X: case BPF_JMP32 | BPF_JGT | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bltu(rs, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JLT | BPF_X: case BPF_JMP32 | BPF_JLT | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bltu(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JGE | BPF_X: case BPF_JMP32 | BPF_JGE | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bgeu(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JLE | BPF_X: case BPF_JMP32 | BPF_JLE | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bgeu(rs, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JNE | BPF_X: case BPF_JMP32 | BPF_JNE | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bne(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSGT | BPF_X: case BPF_JMP32 | BPF_JSGT | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_sext_32_rd_rs(&rd, &rs, ctx); - emit(rv_blt(rs, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSLT | BPF_X: case BPF_JMP32 | BPF_JSLT | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_sext_32_rd_rs(&rd, &rs, ctx); - emit(rv_blt(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSGE | BPF_X: case BPF_JMP32 | BPF_JSGE | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_sext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bge(rd, rs, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSLE | BPF_X: case BPF_JMP32 | BPF_JSLE | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_sext_32_rd_rs(&rd, &rs, ctx); - emit(rv_bge(rs, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSET | BPF_X: case BPF_JMP32 | BPF_JSET | BPF_X: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - if (!is64) - emit_zext_32_rd_rs(&rd, &rs, ctx); - emit(rv_and(RV_REG_T1, rd, rs), ctx); - emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx); + rvoff = rv_offset(i, off, ctx); + if (!is64) { + s = ctx->ninsns; + if (is_signed_bpf_cond(BPF_OP(code))) + emit_sext_32_rd_rs(&rd, &rs, ctx); + else + emit_zext_32_rd_rs(&rd, &rs, ctx); + e = ctx->ninsns; + + /* Adjust for extra insns */ + rvoff -= (e - s) << 2; + } + + if (BPF_OP(code) == BPF_JSET) { + /* Adjust for and */ + rvoff -= 4; + emit(rv_and(RV_REG_T1, rd, rs), ctx); + emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, + ctx); + } else { + emit_branch(BPF_OP(code), rd, rs, rvoff, ctx); + } break; /* IF (dst COND imm) JUMP off */ case BPF_JMP | BPF_JEQ | BPF_K: case BPF_JMP32 | BPF_JEQ | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_beq(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JGT | BPF_K: case BPF_JMP32 | BPF_JGT | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_bltu(RV_REG_T1, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JLT | BPF_K: case BPF_JMP32 | BPF_JLT | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_bltu(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JGE | BPF_K: case BPF_JMP32 | BPF_JGE | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_bgeu(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JLE | BPF_K: case BPF_JMP32 | BPF_JLE | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_bgeu(RV_REG_T1, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JNE | BPF_K: case BPF_JMP32 | BPF_JNE | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_bne(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSGT | BPF_K: case BPF_JMP32 | BPF_JSGT | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_sext_32_rd(&rd, ctx); - emit(rv_blt(RV_REG_T1, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSLT | BPF_K: case BPF_JMP32 | BPF_JSLT | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_sext_32_rd(&rd, ctx); - emit(rv_blt(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSGE | BPF_K: case BPF_JMP32 | BPF_JSGE | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_sext_32_rd(&rd, ctx); - emit(rv_bge(rd, RV_REG_T1, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSLE | BPF_K: case BPF_JMP32 | BPF_JSLE | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; - emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_sext_32_rd(&rd, ctx); - emit(rv_bge(RV_REG_T1, rd, rvoff >> 1), ctx); - break; case BPF_JMP | BPF_JSET | BPF_K: case BPF_JMP32 | BPF_JSET | BPF_K: - if (rv_offset_check(&rvoff, off, i, ctx)) - return -1; + rvoff = rv_offset(i, off, ctx); + s = ctx->ninsns; emit_imm(RV_REG_T1, imm, ctx); - if (!is64) - emit_zext_32_rd_t1(&rd, ctx); - emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); - emit(rv_bne(RV_REG_T1, RV_REG_ZERO, rvoff >> 1), ctx); + if (!is64) { + if (is_signed_bpf_cond(BPF_OP(code))) + emit_sext_32_rd(&rd, ctx); + else + emit_zext_32_rd_t1(&rd, ctx); + } + e = ctx->ninsns; + + /* Adjust for extra insns */ + rvoff -= (e - s) << 2; + + if (BPF_OP(code) == BPF_JSET) { + /* Adjust for and */ + rvoff -= 4; + emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx); + emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, + ctx); + } else { + emit_branch(BPF_OP(code), rd, RV_REG_T1, rvoff, ctx); + } break; /* function call */ case BPF_JMP | BPF_CALL: { bool fixed; - int i, ret; + int ret; u64 addr; mark_call(ctx); @@ -1215,20 +1240,9 @@ out_be: &fixed); if (ret < 0) return ret; - if (fixed) { - emit_imm(RV_REG_T1, addr, ctx); - } else { - i = ctx->ninsns; - emit_imm(RV_REG_T1, addr, ctx); - for (i = ctx->ninsns - i; i < 8; i++) { - /* nop */ - emit(rv_addi(RV_REG_ZERO, RV_REG_ZERO, 0), - ctx); - } - } - emit(rv_jalr(RV_REG_RA, RV_REG_T1, 0), ctx); - rd = bpf_to_rv_reg(BPF_REG_0, ctx); - emit(rv_addi(rd, RV_REG_A0, 0), ctx); + ret = emit_call(fixed, addr, ctx); + if (ret) + return ret; break; } /* tail call */ @@ -1243,9 +1257,7 @@ out_be: break; rvoff = epilogue_offset(ctx); - if (is_21b_check(rvoff, i)) - return -1; - emit(rv_jal(RV_REG_ZERO, rvoff >> 1), ctx); + emit_jump_and_link(RV_REG_ZERO, rvoff, false, ctx); break; /* dst = imm64 */ @@ -1426,6 +1438,10 @@ static void build_prologue(struct rv_jit_context *ctx) { int stack_adjust = 0, store_offset, bpf_stack_adjust; + bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); + if (bpf_stack_adjust) + mark_fp(ctx); + if (seen_reg(RV_REG_RA, ctx)) stack_adjust += 8; stack_adjust += 8; /* RV_REG_FP */ @@ -1443,7 +1459,6 @@ static void build_prologue(struct rv_jit_context *ctx) stack_adjust += 8; stack_adjust = round_up(stack_adjust, 16); - bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16); stack_adjust += bpf_stack_adjust; store_offset = stack_adjust - 8; @@ -1502,10 +1517,10 @@ static void build_prologue(struct rv_jit_context *ctx) static void build_epilogue(struct rv_jit_context *ctx) { - __build_epilogue(RV_REG_RA, ctx); + __build_epilogue(false, ctx); } -static int build_body(struct rv_jit_context *ctx, bool extra_pass) +static int build_body(struct rv_jit_context *ctx, bool extra_pass, int *offset) { const struct bpf_prog *prog = ctx->prog; int i; @@ -1517,12 +1532,12 @@ static int build_body(struct rv_jit_context *ctx, bool extra_pass) ret = emit_insn(insn, ctx, extra_pass); if (ret > 0) { i++; - if (ctx->insns == NULL) - ctx->offset[i] = ctx->ninsns; + if (offset) + offset[i] = ctx->ninsns; continue; } - if (ctx->insns == NULL) - ctx->offset[i] = ctx->ninsns; + if (offset) + offset[i] = ctx->ninsns; if (ret) return ret; } @@ -1548,9 +1563,10 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) { bool tmp_blinded = false, extra_pass = false; struct bpf_prog *tmp, *orig_prog = prog; + int pass = 0, prev_ninsns = 0, i; struct rv_jit_data *jit_data; + unsigned int image_size = 0; struct rv_jit_context *ctx; - unsigned int image_size; if (!prog->jit_requested) return orig_prog; @@ -1587,33 +1603,59 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *prog) prog = orig_prog; goto out_offset; } + for (i = 0; i < prog->len; i++) { + prev_ninsns += 32; + ctx->offset[i] = prev_ninsns; + } - /* First pass generates the ctx->offset, but does not emit an image. */ - if (build_body(ctx, extra_pass)) { - prog = orig_prog; - goto out_offset; + for (i = 0; i < 16; i++) { + pass++; + ctx->ninsns = 0; + if (build_body(ctx, extra_pass, ctx->offset)) { + prog = orig_prog; + goto out_offset; + } + build_prologue(ctx); + ctx->epilogue_offset = ctx->ninsns; + build_epilogue(ctx); + + if (ctx->ninsns == prev_ninsns) { + if (jit_data->header) + break; + + image_size = sizeof(u32) * ctx->ninsns; + jit_data->header = + bpf_jit_binary_alloc(image_size, + &jit_data->image, + sizeof(u32), + bpf_fill_ill_insns); + if (!jit_data->header) { + prog = orig_prog; + goto out_offset; + } + + ctx->insns = (u32 *)jit_data->image; + /* Now, when the image is allocated, the image + * can potentially shrink more (auipc/jalr -> + * jal). + */ + } + prev_ninsns = ctx->ninsns; } - build_prologue(ctx); - ctx->epilogue_offset = ctx->ninsns; - build_epilogue(ctx); - /* Allocate image, now that we know the size. */ - image_size = sizeof(u32) * ctx->ninsns; - jit_data->header = bpf_jit_binary_alloc(image_size, &jit_data->image, - sizeof(u32), - bpf_fill_ill_insns); - if (!jit_data->header) { + if (i == 16) { + pr_err("bpf-jit: image did not converge in <%d passes!\n", i); + bpf_jit_binary_free(jit_data->header); prog = orig_prog; goto out_offset; } - /* Second, real pass, that acutally emits the image. */ - ctx->insns = (u32 *)jit_data->image; skip_init_ctx: + pass++; ctx->ninsns = 0; build_prologue(ctx); - if (build_body(ctx, extra_pass)) { + if (build_body(ctx, extra_pass, NULL)) { bpf_jit_binary_free(jit_data->header); prog = orig_prog; goto out_offset; @@ -1621,7 +1663,7 @@ skip_init_ctx: build_epilogue(ctx); if (bpf_jit_enable > 1) - bpf_jit_dump(prog->len, image_size, 2, ctx->insns); + bpf_jit_dump(prog->len, image_size, pass, ctx->insns); prog->bpf_func = (void *)ctx->insns; prog->jited = 1; @@ -1641,3 +1683,16 @@ out: tmp : orig_prog); return prog; } + +void *bpf_jit_alloc_exec(unsigned long size) +{ + return __vmalloc_node_range(size, PAGE_SIZE, BPF_JIT_REGION_START, + BPF_JIT_REGION_END, GFP_KERNEL, + PAGE_KERNEL_EXEC, 0, NUMA_NO_NODE, + __builtin_return_address(0)); +} + +void bpf_jit_free_exec(void *addr) +{ + return vfree(addr); +} diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 5e8949953660..1f6a0388a65f 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -93,6 +93,7 @@ config X86 select ARCH_USE_QUEUED_RWLOCKS select ARCH_USE_QUEUED_SPINLOCKS select ARCH_WANT_BATCHED_UNMAP_TLB_FLUSH + select ARCH_WANT_DEFAULT_BPF_JIT if X86_64 select ARCH_WANTS_DYNAMIC_TASK_STRUCT select ARCH_WANT_HUGE_PMD_SHARE select ARCH_WANTS_THP_SWAP if X86_64 diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b8be18427277..4c8a2d1f8470 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -10,10 +10,12 @@ #include <linux/if_vlan.h> #include <linux/bpf.h> #include <linux/memory.h> +#include <linux/sort.h> #include <asm/extable.h> #include <asm/set_memory.h> #include <asm/nospec-branch.h> #include <asm/text-patching.h> +#include <asm/asm-prototypes.h> static u8 *emit_code(u8 *ptr, u32 bytes, unsigned int len) { @@ -1530,6 +1532,154 @@ int arch_prepare_bpf_trampoline(void *image, struct btf_func_model *m, u32 flags return 0; } +static int emit_cond_near_jump(u8 **pprog, void *func, void *ip, u8 jmp_cond) +{ + u8 *prog = *pprog; + int cnt = 0; + s64 offset; + + offset = func - (ip + 2 + 4); + if (!is_simm32(offset)) { + pr_err("Target %p is out of range\n", func); + return -EINVAL; + } + EMIT2_off32(0x0F, jmp_cond + 0x10, offset); + *pprog = prog; + return 0; +} + +static void emit_nops(u8 **pprog, unsigned int len) +{ + unsigned int i, noplen; + u8 *prog = *pprog; + int cnt = 0; + + while (len > 0) { + noplen = len; + + if (noplen > ASM_NOP_MAX) + noplen = ASM_NOP_MAX; + + for (i = 0; i < noplen; i++) + EMIT1(ideal_nops[noplen][i]); + len -= noplen; + } + + *pprog = prog; +} + +static int emit_fallback_jump(u8 **pprog) +{ + u8 *prog = *pprog; + int err = 0; + +#ifdef CONFIG_RETPOLINE + /* Note that this assumes the the compiler uses external + * thunks for indirect calls. Both clang and GCC use the same + * naming convention for external thunks. + */ + err = emit_jump(&prog, __x86_indirect_thunk_rdx, prog); +#else + int cnt = 0; + + EMIT2(0xFF, 0xE2); /* jmp rdx */ +#endif + *pprog = prog; + return err; +} + +static int emit_bpf_dispatcher(u8 **pprog, int a, int b, s64 *progs) +{ + u8 *jg_reloc, *jg_target, *prog = *pprog; + int pivot, err, jg_bytes = 1, cnt = 0; + s64 jg_offset; + + if (a == b) { + /* Leaf node of recursion, i.e. not a range of indices + * anymore. + */ + EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ + if (!is_simm32(progs[a])) + return -1; + EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), + progs[a]); + err = emit_cond_near_jump(&prog, /* je func */ + (void *)progs[a], prog, + X86_JE); + if (err) + return err; + + err = emit_fallback_jump(&prog); /* jmp thunk/indirect */ + if (err) + return err; + + *pprog = prog; + return 0; + } + + /* Not a leaf node, so we pivot, and recursively descend into + * the lower and upper ranges. + */ + pivot = (b - a) / 2; + EMIT1(add_1mod(0x48, BPF_REG_3)); /* cmp rdx,func */ + if (!is_simm32(progs[a + pivot])) + return -1; + EMIT2_off32(0x81, add_1reg(0xF8, BPF_REG_3), progs[a + pivot]); + + if (pivot > 2) { /* jg upper_part */ + /* Require near jump. */ + jg_bytes = 4; + EMIT2_off32(0x0F, X86_JG + 0x10, 0); + } else { + EMIT2(X86_JG, 0); + } + jg_reloc = prog; + + err = emit_bpf_dispatcher(&prog, a, a + pivot, /* emit lower_part */ + progs); + if (err) + return err; + + /* From Intel 64 and IA-32 Architectures Optimization + * Reference Manual, 3.4.1.4 Code Alignment, Assembly/Compiler + * Coding Rule 11: All branch targets should be 16-byte + * aligned. + */ + jg_target = PTR_ALIGN(prog, 16); + if (jg_target != prog) + emit_nops(&prog, jg_target - prog); + jg_offset = prog - jg_reloc; + emit_code(jg_reloc - jg_bytes, jg_offset, jg_bytes); + + err = emit_bpf_dispatcher(&prog, a + pivot + 1, /* emit upper_part */ + b, progs); + if (err) + return err; + + *pprog = prog; + return 0; +} + +static int cmp_ips(const void *a, const void *b) +{ + const s64 *ipa = a; + const s64 *ipb = b; + + if (*ipa > *ipb) + return 1; + if (*ipa < *ipb) + return -1; + return 0; +} + +int arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) +{ + u8 *prog = image; + + sort(funcs, num_funcs, sizeof(funcs[0]), cmp_ips, NULL); + return emit_bpf_dispatcher(&prog, 0, num_funcs - 1, funcs); +} + struct x64_jit_data { struct bpf_binary_header *header; int *addrs; diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index f73cd917c44f..42058fad6a3c 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -269,7 +269,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring, bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); - xsk_umem_discard_addr(umem); + xsk_umem_release_addr(umem); return true; } @@ -306,7 +306,7 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring, bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); - xsk_umem_discard_addr_rq(umem); + xsk_umem_release_addr_rq(umem); return true; } diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c index cf9b8b22d24f..0af1f0b951c0 100644 --- a/drivers/net/ethernet/intel/ice/ice_xsk.c +++ b/drivers/net/ethernet/intel/ice/ice_xsk.c @@ -555,7 +555,7 @@ ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) rx_buf->handle = handle + umem->headroom; - xsk_umem_discard_addr(umem); + xsk_umem_release_addr(umem); return true; } @@ -591,7 +591,7 @@ ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf) rx_buf->handle = handle + umem->headroom; - xsk_umem_discard_addr_rq(umem); + xsk_umem_release_addr_rq(umem); return true; } diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c index b43be9f14105..74b540ebb3dc 100644 --- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c +++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c @@ -277,7 +277,7 @@ static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring, bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); - xsk_umem_discard_addr(umem); + xsk_umem_release_addr(umem); return true; } @@ -304,7 +304,7 @@ static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring, bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom); - xsk_umem_discard_addr_rq(umem); + xsk_umem_release_addr_rq(umem); return true; } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c index 475b6bd5d29b..62fc8a128a8d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c @@ -35,7 +35,7 @@ int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq, */ dma_info->addr = xdp_umem_get_dma(umem, handle); - xsk_umem_discard_addr_rq(umem); + xsk_umem_release_addr_rq(umem); dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE, DMA_BIDIRECTIONAL); diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 9be71c195d74..a11d5b7dbbf3 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -85,6 +85,7 @@ int cgroup_bpf_inherit(struct cgroup *cgrp); void cgroup_bpf_offline(struct cgroup *cgrp); int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_prog *replace_prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); @@ -93,7 +94,8 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags); + struct bpf_prog *replace_prog, enum bpf_attach_type type, + u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 085a59afba85..b14e51d56a82 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -471,11 +471,69 @@ struct bpf_trampoline { void *image; u64 selector; }; + +#define BPF_DISPATCHER_MAX 48 /* Fits in 2048B */ + +struct bpf_dispatcher_prog { + struct bpf_prog *prog; + refcount_t users; +}; + +struct bpf_dispatcher { + /* dispatcher mutex */ + struct mutex mutex; + void *func; + struct bpf_dispatcher_prog progs[BPF_DISPATCHER_MAX]; + int num_progs; + void *image; + u32 image_off; +}; + +static __always_inline unsigned int bpf_dispatcher_nopfunc( + const void *ctx, + const struct bpf_insn *insnsi, + unsigned int (*bpf_func)(const void *, + const struct bpf_insn *)) +{ + return bpf_func(ctx, insnsi); +} #ifdef CONFIG_BPF_JIT struct bpf_trampoline *bpf_trampoline_lookup(u64 key); int bpf_trampoline_link_prog(struct bpf_prog *prog); int bpf_trampoline_unlink_prog(struct bpf_prog *prog); void bpf_trampoline_put(struct bpf_trampoline *tr); +void *bpf_jit_alloc_exec_page(void); +#define BPF_DISPATCHER_INIT(name) { \ + .mutex = __MUTEX_INITIALIZER(name.mutex), \ + .func = &name##func, \ + .progs = {}, \ + .num_progs = 0, \ + .image = NULL, \ + .image_off = 0 \ +} + +#define DEFINE_BPF_DISPATCHER(name) \ + noinline unsigned int name##func( \ + const void *ctx, \ + const struct bpf_insn *insnsi, \ + unsigned int (*bpf_func)(const void *, \ + const struct bpf_insn *)) \ + { \ + return bpf_func(ctx, insnsi); \ + } \ + EXPORT_SYMBOL(name##func); \ + struct bpf_dispatcher name = BPF_DISPATCHER_INIT(name); +#define DECLARE_BPF_DISPATCHER(name) \ + unsigned int name##func( \ + const void *ctx, \ + const struct bpf_insn *insnsi, \ + unsigned int (*bpf_func)(const void *, \ + const struct bpf_insn *)); \ + extern struct bpf_dispatcher name; +#define BPF_DISPATCHER_FUNC(name) name##func +#define BPF_DISPATCHER_PTR(name) (&name) +void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, + struct bpf_prog *to); #else static inline struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { @@ -490,6 +548,13 @@ static inline int bpf_trampoline_unlink_prog(struct bpf_prog *prog) return -ENOTSUPP; } static inline void bpf_trampoline_put(struct bpf_trampoline *tr) {} +#define DEFINE_BPF_DISPATCHER(name) +#define DECLARE_BPF_DISPATCHER(name) +#define BPF_DISPATCHER_FUNC(name) bpf_dispatcher_nopfunc +#define BPF_DISPATCHER_PTR(name) NULL +static inline void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, + struct bpf_prog *from, + struct bpf_prog *to) {} #endif struct bpf_func_info_aux { @@ -897,14 +962,14 @@ struct sk_buff; struct bpf_dtab_netdev *__dev_map_lookup_elem(struct bpf_map *map, u32 key); struct bpf_dtab_netdev *__dev_map_hash_lookup_elem(struct bpf_map *map, u32 key); -void __dev_map_flush(struct bpf_map *map); +void __dev_map_flush(void); int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp, struct net_device *dev_rx); int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb, struct bpf_prog *xdp_prog); struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); -void __cpu_map_flush(struct bpf_map *map); +void __cpu_map_flush(void); int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, struct net_device *dev_rx); @@ -943,6 +1008,8 @@ int btf_distill_func_proto(struct bpf_verifier_log *log, int btf_check_func_arg_match(struct bpf_verifier_env *env, int subprog); +struct bpf_prog *bpf_prog_by_id(u32 id); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1004,7 +1071,7 @@ static inline struct net_device *__dev_map_hash_lookup_elem(struct bpf_map *map return NULL; } -static inline void __dev_map_flush(struct bpf_map *map) +static inline void __dev_map_flush(void) { } @@ -1033,7 +1100,7 @@ struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) return NULL; } -static inline void __cpu_map_flush(struct bpf_map *map) +static inline void __cpu_map_flush(void) { } @@ -1074,6 +1141,11 @@ static inline int bpf_prog_test_run_flow_dissector(struct bpf_prog *prog, static inline void bpf_map_put(struct bpf_map *map) { } + +static inline struct bpf_prog *bpf_prog_by_id(u32 id) +{ + return ERR_PTR(-ENOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, diff --git a/include/linux/filter.h b/include/linux/filter.h index 345f3748e0fb..70e6dd960bca 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -559,23 +559,26 @@ struct sk_filter { DECLARE_STATIC_KEY_FALSE(bpf_stats_enabled_key); -#define BPF_PROG_RUN(prog, ctx) ({ \ - u32 ret; \ - cant_sleep(); \ - if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ - struct bpf_prog_stats *stats; \ - u64 start = sched_clock(); \ - ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ - stats = this_cpu_ptr(prog->aux->stats); \ - u64_stats_update_begin(&stats->syncp); \ - stats->cnt++; \ - stats->nsecs += sched_clock() - start; \ - u64_stats_update_end(&stats->syncp); \ - } else { \ - ret = (*(prog)->bpf_func)(ctx, (prog)->insnsi); \ - } \ +#define __BPF_PROG_RUN(prog, ctx, dfunc) ({ \ + u32 ret; \ + cant_sleep(); \ + if (static_branch_unlikely(&bpf_stats_enabled_key)) { \ + struct bpf_prog_stats *stats; \ + u64 start = sched_clock(); \ + ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + stats = this_cpu_ptr(prog->aux->stats); \ + u64_stats_update_begin(&stats->syncp); \ + stats->cnt++; \ + stats->nsecs += sched_clock() - start; \ + u64_stats_update_end(&stats->syncp); \ + } else { \ + ret = dfunc(ctx, (prog)->insnsi, (prog)->bpf_func); \ + } \ ret; }) +#define BPF_PROG_RUN(prog, ctx) __BPF_PROG_RUN(prog, ctx, \ + bpf_dispatcher_nopfunc) + #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN struct bpf_skb_data_end { @@ -589,7 +592,6 @@ struct bpf_redirect_info { u32 tgt_index; void *tgt_value; struct bpf_map *map; - struct bpf_map *map_to_flush; u32 kern_flags; }; @@ -699,6 +701,8 @@ static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, return res; } +DECLARE_BPF_DISPATCHER(bpf_dispatcher_xdp) + static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, struct xdp_buff *xdp) { @@ -708,9 +712,12 @@ static __always_inline u32 bpf_prog_run_xdp(const struct bpf_prog *prog, * already takes rcu_read_lock() when fetching the program, so * it's not necessary here anymore. */ - return BPF_PROG_RUN(prog, xdp); + return __BPF_PROG_RUN(prog, xdp, + BPF_DISPATCHER_FUNC(bpf_dispatcher_xdp)); } +void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog); + static inline u32 bpf_prog_insn_size(const struct bpf_prog *prog) { return prog->len * sizeof(struct bpf_insn); diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index e3780e4b74e1..e86ec48ef627 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -72,7 +72,6 @@ struct xdp_umem { struct xsk_map { struct bpf_map map; - struct list_head __percpu *flush_list; spinlock_t lock; /* Synchronize map updates */ struct xdp_sock *xsk_map[]; }; @@ -119,8 +118,8 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp); bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs); /* Used from netdev driver */ bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt); -u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); -void xsk_umem_discard_addr(struct xdp_umem *umem); +bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr); +void xsk_umem_release_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc); void xsk_umem_consume_tx_done(struct xdp_umem *umem); @@ -139,9 +138,8 @@ void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs, struct xdp_sock **map_entry); int xsk_map_inc(struct xsk_map *map); void xsk_map_put(struct xsk_map *map); -int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, - struct xdp_sock *xs); -void __xsk_map_flush(struct bpf_map *map); +int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp); +void __xsk_map_flush(void); static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map, u32 key) @@ -199,7 +197,7 @@ static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt) return xsk_umem_has_addrs(umem, cnt - rq->length); } -static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) +static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) { struct xdp_umem_fq_reuse *rq = umem->fq_reuse; @@ -210,12 +208,12 @@ static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) return addr; } -static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) +static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem) { struct xdp_umem_fq_reuse *rq = umem->fq_reuse; if (!rq->length) - xsk_umem_discard_addr(umem); + xsk_umem_release_addr(umem); else rq->length--; } @@ -260,7 +258,7 @@ static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) return NULL; } -static inline void xsk_umem_discard_addr(struct xdp_umem *umem) +static inline void xsk_umem_release_addr(struct xdp_umem *umem) { } @@ -334,7 +332,7 @@ static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr) return NULL; } -static inline void xsk_umem_discard_addr_rq(struct xdp_umem *umem) +static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem) { } @@ -369,13 +367,12 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle, return 0; } -static inline int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, - struct xdp_sock *xs) +static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) { return -EOPNOTSUPP; } -static inline void __xsk_map_flush(struct bpf_map *map) +static inline void __xsk_map_flush(void) { } diff --git a/include/uapi/linux/audit.h b/include/uapi/linux/audit.h index 3ad935527177..a534d71e689a 100644 --- a/include/uapi/linux/audit.h +++ b/include/uapi/linux/audit.h @@ -116,6 +116,7 @@ #define AUDIT_FANOTIFY 1331 /* Fanotify access decision */ #define AUDIT_TIME_INJOFFSET 1332 /* Timekeeping offset injected */ #define AUDIT_TIME_ADJNTPVAL 1333 /* NTP value adjustment */ +#define AUDIT_BPF 1334 /* BPF subsystem */ #define AUDIT_AVC 1400 /* SE Linux avc denial or grant */ #define AUDIT_SELINUX_ERR 1401 /* Internal SE Linux Errors */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index dbbcf0b02970..7df436da542d 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -231,6 +231,11 @@ enum bpf_attach_type { * When children program makes decision (like picking TCP CA or sock bind) * parent program has a chance to override it. * + * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of + * programs for a cgroup. Though it's possible to replace an old program at + * any position by also specifying BPF_F_REPLACE flag and position itself in + * replace_bpf_fd attribute. Old program at this position will be released. + * * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. * A cgroup with NONE doesn't allow any programs in sub-cgroups. * Ex1: @@ -249,6 +254,7 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) #define BPF_F_ALLOW_MULTI (1U << 1) +#define BPF_F_REPLACE (1U << 2) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -442,6 +448,10 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; + __u32 replace_bpf_fd; /* previously attached eBPF + * program to replace if + * BPF_F_REPLACE is used + */ }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ diff --git a/include/uapi/linux/btf.h b/include/uapi/linux/btf.h index c02dec97e1ce..1a2898c482ee 100644 --- a/include/uapi/linux/btf.h +++ b/include/uapi/linux/btf.h @@ -142,7 +142,8 @@ struct btf_param { enum { BTF_VAR_STATIC = 0, - BTF_VAR_GLOBAL_ALLOCATED, + BTF_VAR_GLOBAL_ALLOCATED = 1, + BTF_VAR_GLOBAL_EXTERN = 2, }; /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe diff --git a/init/Kconfig b/init/Kconfig index a34064a031a5..890aaa62efde 100644 --- a/init/Kconfig +++ b/init/Kconfig @@ -1604,6 +1604,9 @@ config BPF_SYSCALL Enable the bpf() system call that allows to manipulate eBPF programs and maps via file descriptors. +config ARCH_WANT_DEFAULT_BPF_JIT + bool + config BPF_JIT_ALWAYS_ON bool "Permanently enable BPF JIT and remove BPF interpreter" depends on BPF_SYSCALL && HAVE_EBPF_JIT && BPF_JIT @@ -1611,6 +1614,10 @@ config BPF_JIT_ALWAYS_ON Enables BPF JIT and removes BPF interpreter to avoid speculative execution of BPF instructions by the interpreter +config BPF_JIT_DEFAULT_ON + def_bool ARCH_WANT_DEFAULT_BPF_JIT || BPF_JIT_ALWAYS_ON + depends on HAVE_EBPF_JIT && BPF_JIT + config USERFAULTFD bool "Enable userfaultfd() system call" depends on MMU diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile index 3f671bf617e8..d4f330351f87 100644 --- a/kernel/bpf/Makefile +++ b/kernel/bpf/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o obj-$(CONFIG_BPF_SYSCALL) += disasm.o obj-$(CONFIG_BPF_JIT) += trampoline.o obj-$(CONFIG_BPF_SYSCALL) += btf.o +obj-$(CONFIG_BPF_JIT) += dispatcher.o ifeq ($(CONFIG_NET),y) obj-$(CONFIG_BPF_SYSCALL) += devmap.o obj-$(CONFIG_BPF_SYSCALL) += cpumap.o diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 4fb20ab179fe..ee871d4a51f7 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -103,8 +103,7 @@ static u32 prog_list_length(struct list_head *head) * if parent has overridable or multi-prog, allow attaching */ static bool hierarchy_allows_attach(struct cgroup *cgrp, - enum bpf_attach_type type, - u32 new_flags) + enum bpf_attach_type type) { struct cgroup *p; @@ -283,31 +282,34 @@ cleanup: * propagate the change to descendants * @cgrp: The cgroup which descendants to traverse * @prog: A program to attach + * @replace_prog: Previously attached program to replace if BPF_F_REPLACE is set * @type: Type of attach operation * @flags: Option flags * * Must be called with cgroup_mutex held. */ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + struct bpf_prog *replace_prog, enum bpf_attach_type type, u32 flags) { + u32 saved_flags = (flags & (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI)); struct list_head *progs = &cgrp->bpf.progs[type]; struct bpf_prog *old_prog = NULL; struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE], *old_storage[MAX_BPF_CGROUP_STORAGE_TYPE] = {NULL}; + struct bpf_prog_list *pl, *replace_pl = NULL; enum bpf_cgroup_storage_type stype; - struct bpf_prog_list *pl; - bool pl_was_allocated; int err; - if ((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) + if (((flags & BPF_F_ALLOW_OVERRIDE) && (flags & BPF_F_ALLOW_MULTI)) || + ((flags & BPF_F_REPLACE) && !(flags & BPF_F_ALLOW_MULTI))) /* invalid combination */ return -EINVAL; - if (!hierarchy_allows_attach(cgrp, type, flags)) + if (!hierarchy_allows_attach(cgrp, type)) return -EPERM; - if (!list_empty(progs) && cgrp->bpf.flags[type] != flags) + if (!list_empty(progs) && cgrp->bpf.flags[type] != saved_flags) /* Disallow attaching non-overridable on top * of existing overridable in this cgroup. * Disallow attaching multi-prog if overridable or none @@ -317,6 +319,21 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, if (prog_list_length(progs) >= BPF_CGROUP_MAX_PROGS) return -E2BIG; + if (flags & BPF_F_ALLOW_MULTI) { + list_for_each_entry(pl, progs, node) { + if (pl->prog == prog) + /* disallow attaching the same prog twice */ + return -EINVAL; + if (pl->prog == replace_prog) + replace_pl = pl; + } + if ((flags & BPF_F_REPLACE) && !replace_pl) + /* prog to replace not found for cgroup */ + return -ENOENT; + } else if (!list_empty(progs)) { + replace_pl = list_first_entry(progs, typeof(*pl), node); + } + for_each_cgroup_storage_type(stype) { storage[stype] = bpf_cgroup_storage_alloc(prog, stype); if (IS_ERR(storage[stype])) { @@ -327,53 +344,28 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, } } - if (flags & BPF_F_ALLOW_MULTI) { - list_for_each_entry(pl, progs, node) { - if (pl->prog == prog) { - /* disallow attaching the same prog twice */ - for_each_cgroup_storage_type(stype) - bpf_cgroup_storage_free(storage[stype]); - return -EINVAL; - } + if (replace_pl) { + pl = replace_pl; + old_prog = pl->prog; + for_each_cgroup_storage_type(stype) { + old_storage[stype] = pl->storage[stype]; + bpf_cgroup_storage_unlink(old_storage[stype]); } - + } else { pl = kmalloc(sizeof(*pl), GFP_KERNEL); if (!pl) { for_each_cgroup_storage_type(stype) bpf_cgroup_storage_free(storage[stype]); return -ENOMEM; } - - pl_was_allocated = true; - pl->prog = prog; - for_each_cgroup_storage_type(stype) - pl->storage[stype] = storage[stype]; list_add_tail(&pl->node, progs); - } else { - if (list_empty(progs)) { - pl = kmalloc(sizeof(*pl), GFP_KERNEL); - if (!pl) { - for_each_cgroup_storage_type(stype) - bpf_cgroup_storage_free(storage[stype]); - return -ENOMEM; - } - pl_was_allocated = true; - list_add_tail(&pl->node, progs); - } else { - pl = list_first_entry(progs, typeof(*pl), node); - old_prog = pl->prog; - for_each_cgroup_storage_type(stype) { - old_storage[stype] = pl->storage[stype]; - bpf_cgroup_storage_unlink(old_storage[stype]); - } - pl_was_allocated = false; - } - pl->prog = prog; - for_each_cgroup_storage_type(stype) - pl->storage[stype] = storage[stype]; } - cgrp->bpf.flags[type] = flags; + pl->prog = prog; + for_each_cgroup_storage_type(stype) + pl->storage[stype] = storage[stype]; + + cgrp->bpf.flags[type] = saved_flags; err = update_effective_progs(cgrp, type); if (err) @@ -401,7 +393,7 @@ cleanup: pl->storage[stype] = old_storage[stype]; bpf_cgroup_storage_link(old_storage[stype], cgrp, type); } - if (pl_was_allocated) { + if (!replace_pl) { list_del(&pl->node); kfree(pl); } @@ -539,6 +531,7 @@ int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, int cgroup_bpf_prog_attach(const union bpf_attr *attr, enum bpf_prog_type ptype, struct bpf_prog *prog) { + struct bpf_prog *replace_prog = NULL; struct cgroup *cgrp; int ret; @@ -546,8 +539,20 @@ int cgroup_bpf_prog_attach(const union bpf_attr *attr, if (IS_ERR(cgrp)) return PTR_ERR(cgrp); - ret = cgroup_bpf_attach(cgrp, prog, attr->attach_type, + if ((attr->attach_flags & BPF_F_ALLOW_MULTI) && + (attr->attach_flags & BPF_F_REPLACE)) { + replace_prog = bpf_prog_get_type(attr->replace_bpf_fd, ptype); + if (IS_ERR(replace_prog)) { + cgroup_put(cgrp); + return PTR_ERR(replace_prog); + } + } + + ret = cgroup_bpf_attach(cgrp, prog, replace_prog, attr->attach_type, attr->attach_flags); + + if (replace_prog) + bpf_prog_put(replace_prog); cgroup_put(cgrp); return ret; } diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index af6b738cf435..29d47aae0dd1 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -222,8 +222,6 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, u32 pages, delta; int ret; - BUG_ON(fp_old == NULL); - size = round_up(size, PAGE_SIZE); pages = size / PAGE_SIZE; if (pages <= fp_old->pages) @@ -520,9 +518,9 @@ void bpf_prog_kallsyms_del_all(struct bpf_prog *fp) #ifdef CONFIG_BPF_JIT /* All BPF JIT sysctl knobs here. */ -int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_ALWAYS_ON); +int bpf_jit_enable __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); +int bpf_jit_kallsyms __read_mostly = IS_BUILTIN(CONFIG_BPF_JIT_DEFAULT_ON); int bpf_jit_harden __read_mostly; -int bpf_jit_kallsyms __read_mostly; long bpf_jit_limit __read_mostly; static __always_inline void diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c index ef49e17ae47c..70f71b154fa5 100644 --- a/kernel/bpf/cpumap.c +++ b/kernel/bpf/cpumap.c @@ -72,17 +72,18 @@ struct bpf_cpu_map { struct bpf_map map; /* Below members specific for map type */ struct bpf_cpu_map_entry **cpu_map; - struct list_head __percpu *flush_list; }; -static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx); +static DEFINE_PER_CPU(struct list_head, cpu_map_flush_list); + +static int bq_flush_to_queue(struct xdp_bulk_queue *bq); static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) { struct bpf_cpu_map *cmap; int err = -ENOMEM; - int ret, cpu; u64 cost; + int ret; if (!capable(CAP_SYS_ADMIN)) return ERR_PTR(-EPERM); @@ -106,7 +107,6 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) /* make sure page count doesn't overflow */ cost = (u64) cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *); - cost += sizeof(struct list_head) * num_possible_cpus(); /* Notice returns -EPERM on if map size is larger than memlock limit */ ret = bpf_map_charge_init(&cmap->map.memory, cost); @@ -115,23 +115,14 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr) goto free_cmap; } - cmap->flush_list = alloc_percpu(struct list_head); - if (!cmap->flush_list) - goto free_charge; - - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(per_cpu_ptr(cmap->flush_list, cpu)); - /* Alloc array for possible remote "destination" CPUs */ cmap->cpu_map = bpf_map_area_alloc(cmap->map.max_entries * sizeof(struct bpf_cpu_map_entry *), cmap->map.numa_node); if (!cmap->cpu_map) - goto free_percpu; + goto free_charge; return &cmap->map; -free_percpu: - free_percpu(cmap->flush_list); free_charge: bpf_map_charge_finish(&cmap->map.memory); free_cmap: @@ -399,22 +390,14 @@ free_rcu: static void __cpu_map_entry_free(struct rcu_head *rcu) { struct bpf_cpu_map_entry *rcpu; - int cpu; /* This cpu_map_entry have been disconnected from map and one - * RCU graze-period have elapsed. Thus, XDP cannot queue any + * RCU grace-period have elapsed. Thus, XDP cannot queue any * new packets and cannot change/set flush_needed that can * find this entry. */ rcpu = container_of(rcu, struct bpf_cpu_map_entry, rcu); - /* Flush remaining packets in percpu bulkq */ - for_each_online_cpu(cpu) { - struct xdp_bulk_queue *bq = per_cpu_ptr(rcpu->bulkq, cpu); - - /* No concurrent bq_enqueue can run at this point */ - bq_flush_to_queue(bq, false); - } free_percpu(rcpu->bulkq); /* Cannot kthread_stop() here, last put free rcpu resources */ put_cpu_map_entry(rcpu); @@ -436,7 +419,7 @@ static void __cpu_map_entry_free(struct rcu_head *rcu) * percpu bulkq to queue. Due to caller map_delete_elem() disable * preemption, cannot call kthread_stop() to make sure queue is empty. * Instead a work_queue is started for stopping kthread, - * cpu_map_kthread_stop, which waits for an RCU graze period before + * cpu_map_kthread_stop, which waits for an RCU grace period before * stopping kthread, emptying the queue. */ static void __cpu_map_entry_replace(struct bpf_cpu_map *cmap, @@ -507,7 +490,6 @@ static int cpu_map_update_elem(struct bpf_map *map, void *key, void *value, static void cpu_map_free(struct bpf_map *map) { struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); - int cpu; u32 i; /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, @@ -522,18 +504,6 @@ static void cpu_map_free(struct bpf_map *map) bpf_clear_redirect_map(map); synchronize_rcu(); - /* To ensure all pending flush operations have completed wait for flush - * list be empty on _all_ cpus. Because the above synchronize_rcu() - * ensures the map is disconnected from the program we can assume no new - * items will be added to the list. - */ - for_each_online_cpu(cpu) { - struct list_head *flush_list = per_cpu_ptr(cmap->flush_list, cpu); - - while (!list_empty(flush_list)) - cond_resched(); - } - /* For cpu_map the remote CPUs can still be using the entries * (struct bpf_cpu_map_entry). */ @@ -544,10 +514,9 @@ static void cpu_map_free(struct bpf_map *map) if (!rcpu) continue; - /* bq flush and cleanup happens after RCU graze-period */ + /* bq flush and cleanup happens after RCU grace-period */ __cpu_map_entry_replace(cmap, i, NULL); /* call_rcu */ } - free_percpu(cmap->flush_list); bpf_map_area_free(cmap->cpu_map); kfree(cmap); } @@ -599,7 +568,7 @@ const struct bpf_map_ops cpu_map_ops = { .map_check_btf = map_check_no_btf, }; -static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx) +static int bq_flush_to_queue(struct xdp_bulk_queue *bq) { struct bpf_cpu_map_entry *rcpu = bq->obj; unsigned int processed = 0, drops = 0; @@ -620,10 +589,7 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx) err = __ptr_ring_produce(q, xdpf); if (err) { drops++; - if (likely(in_napi_ctx)) - xdp_return_frame_rx_napi(xdpf); - else - xdp_return_frame(xdpf); + xdp_return_frame_rx_napi(xdpf); } processed++; } @@ -642,11 +608,11 @@ static int bq_flush_to_queue(struct xdp_bulk_queue *bq, bool in_napi_ctx) */ static int bq_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_frame *xdpf) { - struct list_head *flush_list = this_cpu_ptr(rcpu->cmap->flush_list); + struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); struct xdp_bulk_queue *bq = this_cpu_ptr(rcpu->bulkq); if (unlikely(bq->count == CPU_MAP_BULK_SIZE)) - bq_flush_to_queue(bq, true); + bq_flush_to_queue(bq); /* Notice, xdp_buff/page MUST be queued here, long enough for * driver to code invoking us to finished, due to driver @@ -681,16 +647,26 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, return 0; } -void __cpu_map_flush(struct bpf_map *map) +void __cpu_map_flush(void) { - struct bpf_cpu_map *cmap = container_of(map, struct bpf_cpu_map, map); - struct list_head *flush_list = this_cpu_ptr(cmap->flush_list); + struct list_head *flush_list = this_cpu_ptr(&cpu_map_flush_list); struct xdp_bulk_queue *bq, *tmp; list_for_each_entry_safe(bq, tmp, flush_list, flush_node) { - bq_flush_to_queue(bq, true); + bq_flush_to_queue(bq); /* If already running, costs spin_lock_irqsave + smb_mb */ wake_up_process(bq->obj->kthread); } } + +static int __init cpu_map_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) + INIT_LIST_HEAD(&per_cpu(cpu_map_flush_list, cpu)); + return 0; +} + +subsys_initcall(cpu_map_init); diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c index 3d3d61b5985b..da9c832fc5c8 100644 --- a/kernel/bpf/devmap.c +++ b/kernel/bpf/devmap.c @@ -75,7 +75,6 @@ struct bpf_dtab_netdev { struct bpf_dtab { struct bpf_map map; struct bpf_dtab_netdev **netdev_map; /* DEVMAP type only */ - struct list_head __percpu *flush_list; struct list_head list; /* these are only used for DEVMAP_HASH type maps */ @@ -85,6 +84,7 @@ struct bpf_dtab { u32 n_buckets; }; +static DEFINE_PER_CPU(struct list_head, dev_map_flush_list); static DEFINE_SPINLOCK(dev_map_lock); static LIST_HEAD(dev_map_list); @@ -109,8 +109,8 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab, static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) { - int err, cpu; - u64 cost; + u64 cost = 0; + int err; /* check sanity of attributes */ if (attr->max_entries == 0 || attr->key_size != 4 || @@ -125,9 +125,6 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) bpf_map_init_from_attr(&dtab->map, attr); - /* make sure page count doesn't overflow */ - cost = (u64) sizeof(struct list_head) * num_possible_cpus(); - if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { dtab->n_buckets = roundup_pow_of_two(dtab->map.max_entries); @@ -143,17 +140,10 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) if (err) return -EINVAL; - dtab->flush_list = alloc_percpu(struct list_head); - if (!dtab->flush_list) - goto free_charge; - - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(per_cpu_ptr(dtab->flush_list, cpu)); - if (attr->map_type == BPF_MAP_TYPE_DEVMAP_HASH) { dtab->dev_index_head = dev_map_create_hash(dtab->n_buckets); if (!dtab->dev_index_head) - goto free_percpu; + goto free_charge; spin_lock_init(&dtab->index_lock); } else { @@ -161,13 +151,11 @@ static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr) sizeof(struct bpf_dtab_netdev *), dtab->map.numa_node); if (!dtab->netdev_map) - goto free_percpu; + goto free_charge; } return 0; -free_percpu: - free_percpu(dtab->flush_list); free_charge: bpf_map_charge_finish(&dtab->map.memory); return -ENOMEM; @@ -201,7 +189,7 @@ static struct bpf_map *dev_map_alloc(union bpf_attr *attr) static void dev_map_free(struct bpf_map *map) { struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - int i, cpu; + int i; /* At this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0, * so the programs (can be more than one that used this map) were @@ -221,18 +209,6 @@ static void dev_map_free(struct bpf_map *map) /* Make sure prior __dev_map_entry_free() have completed. */ rcu_barrier(); - /* To ensure all pending flush operations have completed wait for flush - * list to empty on _all_ cpus. - * Because the above synchronize_rcu() ensures the map is disconnected - * from the program we can assume no new items will be added. - */ - for_each_online_cpu(cpu) { - struct list_head *flush_list = per_cpu_ptr(dtab->flush_list, cpu); - - while (!list_empty(flush_list)) - cond_resched(); - } - if (dtab->map.map_type == BPF_MAP_TYPE_DEVMAP_HASH) { for (i = 0; i < dtab->n_buckets; i++) { struct bpf_dtab_netdev *dev; @@ -266,7 +242,6 @@ static void dev_map_free(struct bpf_map *map) bpf_map_area_free(dtab->netdev_map); } - free_percpu(dtab->flush_list); kfree(dtab); } @@ -345,8 +320,7 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key, return -ENOENT; } -static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags, - bool in_napi_ctx) +static int bq_xmit_all(struct xdp_bulk_queue *bq, u32 flags) { struct bpf_dtab_netdev *obj = bq->obj; struct net_device *dev = obj->dev; @@ -384,11 +358,7 @@ error: for (i = 0; i < bq->count; i++) { struct xdp_frame *xdpf = bq->q[i]; - /* RX path under NAPI protection, can return frames faster */ - if (likely(in_napi_ctx)) - xdp_return_frame_rx_napi(xdpf); - else - xdp_return_frame(xdpf); + xdp_return_frame_rx_napi(xdpf); drops++; } goto out; @@ -401,15 +371,14 @@ error: * net device can be torn down. On devmap tear down we ensure the flush list * is empty before completing to ensure all flush operations have completed. */ -void __dev_map_flush(struct bpf_map *map) +void __dev_map_flush(void) { - struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map); - struct list_head *flush_list = this_cpu_ptr(dtab->flush_list); + struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list); struct xdp_bulk_queue *bq, *tmp; rcu_read_lock(); list_for_each_entry_safe(bq, tmp, flush_list, flush_node) - bq_xmit_all(bq, XDP_XMIT_FLUSH, true); + bq_xmit_all(bq, XDP_XMIT_FLUSH); rcu_read_unlock(); } @@ -436,11 +405,11 @@ static int bq_enqueue(struct bpf_dtab_netdev *obj, struct xdp_frame *xdpf, struct net_device *dev_rx) { - struct list_head *flush_list = this_cpu_ptr(obj->dtab->flush_list); + struct list_head *flush_list = this_cpu_ptr(&dev_map_flush_list); struct xdp_bulk_queue *bq = this_cpu_ptr(obj->bulkq); if (unlikely(bq->count == DEV_MAP_BULK_SIZE)) - bq_xmit_all(bq, 0, true); + bq_xmit_all(bq, 0); /* Ingress dev_rx will be the same for all xdp_frame's in * bulk_queue, because bq stored per-CPU and must be flushed @@ -509,27 +478,11 @@ static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key) return dev ? &dev->ifindex : NULL; } -static void dev_map_flush_old(struct bpf_dtab_netdev *dev) -{ - if (dev->dev->netdev_ops->ndo_xdp_xmit) { - struct xdp_bulk_queue *bq; - int cpu; - - rcu_read_lock(); - for_each_online_cpu(cpu) { - bq = per_cpu_ptr(dev->bulkq, cpu); - bq_xmit_all(bq, XDP_XMIT_FLUSH, false); - } - rcu_read_unlock(); - } -} - static void __dev_map_entry_free(struct rcu_head *rcu) { struct bpf_dtab_netdev *dev; dev = container_of(rcu, struct bpf_dtab_netdev, rcu); - dev_map_flush_old(dev); free_percpu(dev->bulkq); dev_put(dev->dev); kfree(dev); @@ -810,10 +763,15 @@ static struct notifier_block dev_map_notifier = { static int __init dev_map_init(void) { + int cpu; + /* Assure tracepoint shadow struct _bpf_dtab_netdev is in sync */ BUILD_BUG_ON(offsetof(struct bpf_dtab_netdev, dev) != offsetof(struct _bpf_dtab_netdev, dev)); register_netdevice_notifier(&dev_map_notifier); + + for_each_possible_cpu(cpu) + INIT_LIST_HEAD(&per_cpu(dev_map_flush_list, cpu)); return 0; } diff --git a/kernel/bpf/dispatcher.c b/kernel/bpf/dispatcher.c new file mode 100644 index 000000000000..204ee61a3904 --- /dev/null +++ b/kernel/bpf/dispatcher.c @@ -0,0 +1,158 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Copyright(c) 2019 Intel Corporation. */ + +#include <linux/hash.h> +#include <linux/bpf.h> +#include <linux/filter.h> + +/* The BPF dispatcher is a multiway branch code generator. The + * dispatcher is a mechanism to avoid the performance penalty of an + * indirect call, which is expensive when retpolines are enabled. A + * dispatch client registers a BPF program into the dispatcher, and if + * there is available room in the dispatcher a direct call to the BPF + * program will be generated. All calls to the BPF programs called via + * the dispatcher will then be a direct call, instead of an + * indirect. The dispatcher hijacks a trampoline function it via the + * __fentry__ of the trampoline. The trampoline function has the + * following signature: + * + * unsigned int trampoline(const void *ctx, const struct bpf_insn *insnsi, + * unsigned int (*bpf_func)(const void *, + * const struct bpf_insn *)); + */ + +static struct bpf_dispatcher_prog *bpf_dispatcher_find_prog( + struct bpf_dispatcher *d, struct bpf_prog *prog) +{ + int i; + + for (i = 0; i < BPF_DISPATCHER_MAX; i++) { + if (prog == d->progs[i].prog) + return &d->progs[i]; + } + return NULL; +} + +static struct bpf_dispatcher_prog *bpf_dispatcher_find_free( + struct bpf_dispatcher *d) +{ + return bpf_dispatcher_find_prog(d, NULL); +} + +static bool bpf_dispatcher_add_prog(struct bpf_dispatcher *d, + struct bpf_prog *prog) +{ + struct bpf_dispatcher_prog *entry; + + if (!prog) + return false; + + entry = bpf_dispatcher_find_prog(d, prog); + if (entry) { + refcount_inc(&entry->users); + return false; + } + + entry = bpf_dispatcher_find_free(d); + if (!entry) + return false; + + bpf_prog_inc(prog); + entry->prog = prog; + refcount_set(&entry->users, 1); + d->num_progs++; + return true; +} + +static bool bpf_dispatcher_remove_prog(struct bpf_dispatcher *d, + struct bpf_prog *prog) +{ + struct bpf_dispatcher_prog *entry; + + if (!prog) + return false; + + entry = bpf_dispatcher_find_prog(d, prog); + if (!entry) + return false; + + if (refcount_dec_and_test(&entry->users)) { + entry->prog = NULL; + bpf_prog_put(prog); + d->num_progs--; + return true; + } + return false; +} + +int __weak arch_prepare_bpf_dispatcher(void *image, s64 *funcs, int num_funcs) +{ + return -ENOTSUPP; +} + +static int bpf_dispatcher_prepare(struct bpf_dispatcher *d, void *image) +{ + s64 ips[BPF_DISPATCHER_MAX] = {}, *ipsp = &ips[0]; + int i; + + for (i = 0; i < BPF_DISPATCHER_MAX; i++) { + if (d->progs[i].prog) + *ipsp++ = (s64)(uintptr_t)d->progs[i].prog->bpf_func; + } + return arch_prepare_bpf_dispatcher(image, &ips[0], d->num_progs); +} + +static void bpf_dispatcher_update(struct bpf_dispatcher *d, int prev_num_progs) +{ + void *old, *new; + u32 noff; + int err; + + if (!prev_num_progs) { + old = NULL; + noff = 0; + } else { + old = d->image + d->image_off; + noff = d->image_off ^ (PAGE_SIZE / 2); + } + + new = d->num_progs ? d->image + noff : NULL; + if (new) { + if (bpf_dispatcher_prepare(d, new)) + return; + } + + err = bpf_arch_text_poke(d->func, BPF_MOD_JUMP, old, new); + if (err || !new) + return; + + d->image_off = noff; +} + +void bpf_dispatcher_change_prog(struct bpf_dispatcher *d, struct bpf_prog *from, + struct bpf_prog *to) +{ + bool changed = false; + int prev_num_progs; + + if (from == to) + return; + + mutex_lock(&d->mutex); + if (!d->image) { + d->image = bpf_jit_alloc_exec_page(); + if (!d->image) + goto out; + } + + prev_num_progs = d->num_progs; + changed |= bpf_dispatcher_remove_prog(d, from); + changed |= bpf_dispatcher_add_prog(d, to); + + if (!changed) + goto out; + + bpf_dispatcher_update(d, prev_num_progs); +out: + mutex_unlock(&d->mutex); +} diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index e3461ec59570..81ee8595dfee 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -23,6 +23,7 @@ #include <linux/timekeeping.h> #include <linux/ctype.h> #include <linux/nospec.h> +#include <linux/audit.h> #include <uapi/linux/btf.h> #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \ @@ -1306,6 +1307,36 @@ static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) return 0; } +enum bpf_audit { + BPF_AUDIT_LOAD, + BPF_AUDIT_UNLOAD, + BPF_AUDIT_MAX, +}; + +static const char * const bpf_audit_str[BPF_AUDIT_MAX] = { + [BPF_AUDIT_LOAD] = "LOAD", + [BPF_AUDIT_UNLOAD] = "UNLOAD", +}; + +static void bpf_audit_prog(const struct bpf_prog *prog, unsigned int op) +{ + struct audit_context *ctx = NULL; + struct audit_buffer *ab; + + if (WARN_ON_ONCE(op >= BPF_AUDIT_MAX)) + return; + if (audit_enabled == AUDIT_OFF) + return; + if (op == BPF_AUDIT_LOAD) + ctx = audit_context(); + ab = audit_log_start(ctx, GFP_ATOMIC, AUDIT_BPF); + if (unlikely(!ab)) + return; + audit_log_format(ab, "prog-id=%u op=%s", + prog->aux->id, bpf_audit_str[op]); + audit_log_end(ab); +} + int __bpf_prog_charge(struct user_struct *user, u32 pages) { unsigned long memlock_limit = rlimit(RLIMIT_MEMLOCK) >> PAGE_SHIFT; @@ -1421,6 +1452,7 @@ static void __bpf_prog_put(struct bpf_prog *prog, bool do_idr_lock) { if (atomic64_dec_and_test(&prog->aux->refcnt)) { perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_UNLOAD, 0); + bpf_audit_prog(prog, BPF_AUDIT_UNLOAD); /* bpf_prog_free_id() must be called first */ bpf_prog_free_id(prog, do_idr_lock); __bpf_prog_put_noref(prog, true); @@ -1830,6 +1862,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr) */ bpf_prog_kallsyms_add(prog); perf_event_bpf_event(prog, PERF_BPF_EVENT_PROG_LOAD, 0); + bpf_audit_prog(prog, BPF_AUDIT_LOAD); err = bpf_prog_new_fd(prog); if (err < 0) @@ -2040,10 +2073,10 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog, } } -#define BPF_PROG_ATTACH_LAST_FIELD attach_flags +#define BPF_PROG_ATTACH_LAST_FIELD replace_bpf_fd #define BPF_F_ATTACH_MASK \ - (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI) + (BPF_F_ALLOW_OVERRIDE | BPF_F_ALLOW_MULTI | BPF_F_REPLACE) static int bpf_prog_attach(const union bpf_attr *attr) { @@ -2305,17 +2338,12 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr, #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id -static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) +struct bpf_prog *bpf_prog_by_id(u32 id) { struct bpf_prog *prog; - u32 id = attr->prog_id; - int fd; - - if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) - return -EINVAL; - if (!capable(CAP_SYS_ADMIN)) - return -EPERM; + if (!id) + return ERR_PTR(-ENOENT); spin_lock_bh(&prog_idr_lock); prog = idr_find(&prog_idr, id); @@ -2324,7 +2352,22 @@ static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) else prog = ERR_PTR(-ENOENT); spin_unlock_bh(&prog_idr_lock); + return prog; +} + +static int bpf_prog_get_fd_by_id(const union bpf_attr *attr) +{ + struct bpf_prog *prog; + u32 id = attr->prog_id; + int fd; + + if (CHECK_ATTR(BPF_PROG_GET_FD_BY_ID)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + prog = bpf_prog_by_id(id); if (IS_ERR(prog)) return PTR_ERR(prog); diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 23b0d5cfd47e..505f4e4b31d2 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -14,6 +14,22 @@ static struct hlist_head trampoline_table[TRAMPOLINE_TABLE_SIZE]; /* serializes access to trampoline_table */ static DEFINE_MUTEX(trampoline_mutex); +void *bpf_jit_alloc_exec_page(void) +{ + void *image; + + image = bpf_jit_alloc_exec(PAGE_SIZE); + if (!image) + return NULL; + + set_vm_flush_reset_perms(image); + /* Keep image as writeable. The alternative is to keep flipping ro/rw + * everytime new program is attached or detached. + */ + set_memory_x((long)image, 1); + return image; +} + struct bpf_trampoline *bpf_trampoline_lookup(u64 key) { struct bpf_trampoline *tr; @@ -34,7 +50,7 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key) goto out; /* is_root was checked earlier. No need for bpf_jit_charge_modmem() */ - image = bpf_jit_alloc_exec(PAGE_SIZE); + image = bpf_jit_alloc_exec_page(); if (!image) { kfree(tr); tr = NULL; @@ -48,12 +64,6 @@ struct bpf_trampoline *bpf_trampoline_lookup(u64 key) mutex_init(&tr->mutex); for (i = 0; i < BPF_TRAMP_MAX; i++) INIT_HLIST_HEAD(&tr->progs_hlist[i]); - - set_vm_flush_reset_perms(image); - /* Keep image as writeable. The alternative is to keep flipping ro/rw - * everytime new program is attached or detached. - */ - set_memory_x((long)image, 1); tr->image = image; out: mutex_unlock(&trampoline_mutex); diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c index 90c4fce1c981..2cc5c8f4c800 100644 --- a/kernel/bpf/xskmap.c +++ b/kernel/bpf/xskmap.c @@ -72,9 +72,9 @@ static void xsk_map_sock_delete(struct xdp_sock *xs, static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) { struct bpf_map_memory mem; - int cpu, err, numa_node; + int err, numa_node; struct xsk_map *m; - u64 cost, size; + u64 size; if (!capable(CAP_NET_ADMIN)) return ERR_PTR(-EPERM); @@ -86,9 +86,8 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) numa_node = bpf_map_attr_numa_node(attr); size = struct_size(m, xsk_map, attr->max_entries); - cost = size + array_size(sizeof(*m->flush_list), num_possible_cpus()); - err = bpf_map_charge_init(&mem, cost); + err = bpf_map_charge_init(&mem, size); if (err < 0) return ERR_PTR(err); @@ -102,16 +101,6 @@ static struct bpf_map *xsk_map_alloc(union bpf_attr *attr) bpf_map_charge_move(&m->map.memory, &mem); spin_lock_init(&m->lock); - m->flush_list = alloc_percpu(struct list_head); - if (!m->flush_list) { - bpf_map_charge_finish(&m->map.memory); - bpf_map_area_free(m); - return ERR_PTR(-ENOMEM); - } - - for_each_possible_cpu(cpu) - INIT_LIST_HEAD(per_cpu_ptr(m->flush_list, cpu)); - return &m->map; } @@ -121,7 +110,6 @@ static void xsk_map_free(struct bpf_map *map) bpf_clear_redirect_map(map); synchronize_net(); - free_percpu(m->flush_list); bpf_map_area_free(m); } diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 735af8f15f95..725365df066d 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6288,12 +6288,13 @@ void cgroup_sk_free(struct sock_cgroup_data *skcd) #ifdef CONFIG_CGROUP_BPF int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, u32 flags) + struct bpf_prog *replace_prog, enum bpf_attach_type type, + u32 flags) { int ret; mutex_lock(&cgroup_mutex); - ret = __cgroup_bpf_attach(cgrp, prog, type, flags); + ret = __cgroup_bpf_attach(cgrp, prog, replace_prog, type, flags); mutex_unlock(&cgroup_mutex); return ret; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index f79205d4444f..d555c0d8657d 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -15,7 +15,7 @@ #include <trace/events/bpf_test_run.h> static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, - u32 *retval, u32 *time) + u32 *retval, u32 *time, bool xdp) { struct bpf_cgroup_storage *storage[MAX_BPF_CGROUP_STORAGE_TYPE] = { NULL }; enum bpf_cgroup_storage_type stype; @@ -41,7 +41,11 @@ static int bpf_test_run(struct bpf_prog *prog, void *ctx, u32 repeat, time_start = ktime_get_ns(); for (i = 0; i < repeat; i++) { bpf_cgroup_storage_set(storage); - *retval = BPF_PROG_RUN(prog, ctx); + + if (xdp) + *retval = bpf_prog_run_xdp(prog, ctx); + else + *retval = BPF_PROG_RUN(prog, ctx); if (signal_pending(current)) { ret = -EINTR; @@ -247,34 +251,53 @@ static int convert___skb_to_skb(struct sk_buff *skb, struct __sk_buff *__skb) return 0; /* make sure the fields we don't use are zeroed */ - if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, priority))) + if (!range_is_zero(__skb, 0, offsetof(struct __sk_buff, mark))) + return -EINVAL; + + /* mark is allowed */ + + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, mark), + offsetof(struct __sk_buff, priority))) return -EINVAL; /* priority is allowed */ - if (!range_is_zero(__skb, offsetof(struct __sk_buff, priority) + - sizeof_field(struct __sk_buff, priority), + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, priority), offsetof(struct __sk_buff, cb))) return -EINVAL; /* cb is allowed */ - if (!range_is_zero(__skb, offsetof(struct __sk_buff, cb) + - sizeof_field(struct __sk_buff, cb), + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, cb), offsetof(struct __sk_buff, tstamp))) return -EINVAL; /* tstamp is allowed */ + /* wire_len is allowed */ + /* gso_segs is allowed */ - if (!range_is_zero(__skb, offsetof(struct __sk_buff, tstamp) + - sizeof_field(struct __sk_buff, tstamp), + if (!range_is_zero(__skb, offsetofend(struct __sk_buff, gso_segs), sizeof(struct __sk_buff))) return -EINVAL; + skb->mark = __skb->mark; skb->priority = __skb->priority; skb->tstamp = __skb->tstamp; memcpy(&cb->data, __skb->cb, QDISC_CB_PRIV_LEN); + if (__skb->wire_len == 0) { + cb->pkt_len = skb->len; + } else { + if (__skb->wire_len < skb->len || + __skb->wire_len > GSO_MAX_SIZE) + return -EINVAL; + cb->pkt_len = __skb->wire_len; + } + + if (__skb->gso_segs > GSO_MAX_SEGS) + return -EINVAL; + skb_shinfo(skb)->gso_segs = __skb->gso_segs; + return 0; } @@ -285,9 +308,12 @@ static void convert_skb_to___skb(struct sk_buff *skb, struct __sk_buff *__skb) if (!__skb) return; + __skb->mark = skb->mark; __skb->priority = skb->priority; __skb->tstamp = skb->tstamp; memcpy(__skb->cb, &cb->data, QDISC_CB_PRIV_LEN); + __skb->wire_len = cb->pkt_len; + __skb->gso_segs = skb_shinfo(skb)->gso_segs; } int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, @@ -359,7 +385,7 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, ret = convert___skb_to_skb(skb, ctx); if (ret) goto out; - ret = bpf_test_run(prog, skb, repeat, &retval, &duration); + ret = bpf_test_run(prog, skb, repeat, &retval, &duration, false); if (ret) goto out; if (!is_l2) { @@ -416,8 +442,8 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0); xdp.rxq = &rxqueue->xdp_rxq; - - ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration); + bpf_prog_change_xdp(NULL, prog); + ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true); if (ret) goto out; if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN || @@ -425,6 +451,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, size = xdp.data_end - xdp.data; ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration); out: + bpf_prog_change_xdp(prog, NULL); kfree(data); return ret; } @@ -437,8 +464,7 @@ static int verify_user_bpf_flow_keys(struct bpf_flow_keys *ctx) /* flags is allowed */ - if (!range_is_zero(ctx, offsetof(struct bpf_flow_keys, flags) + - sizeof_field(struct bpf_flow_keys, flags), + if (!range_is_zero(ctx, offsetofend(struct bpf_flow_keys, flags), sizeof(struct bpf_flow_keys))) return -EINVAL; diff --git a/net/core/dev.c b/net/core/dev.c index 0ad39c87b7fd..f1b8afcfbc0f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -8542,7 +8542,17 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, struct netlink_ext_ack *extack, u32 flags, struct bpf_prog *prog) { + bool non_hw = !(flags & XDP_FLAGS_HW_MODE); + struct bpf_prog *prev_prog = NULL; struct netdev_bpf xdp; + int err; + + if (non_hw) { + prev_prog = bpf_prog_by_id(__dev_xdp_query(dev, bpf_op, + XDP_QUERY_PROG)); + if (IS_ERR(prev_prog)) + prev_prog = NULL; + } memset(&xdp, 0, sizeof(xdp)); if (flags & XDP_FLAGS_HW_MODE) @@ -8553,7 +8563,14 @@ static int dev_xdp_install(struct net_device *dev, bpf_op_t bpf_op, xdp.flags = flags; xdp.prog = prog; - return bpf_op(dev, &xdp); + err = bpf_op(dev, &xdp); + if (!err && non_hw) + bpf_prog_change_xdp(prev_prog, prog); + + if (prev_prog) + bpf_prog_put(prev_prog); + + return err; } static void dev_xdp_uninstall(struct net_device *dev) diff --git a/net/core/filter.c b/net/core/filter.c index 28b3c258188c..42fd17c48c5f 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -3511,36 +3511,16 @@ err: } static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, - struct bpf_map *map, - struct xdp_buff *xdp, - u32 index) + struct bpf_map *map, struct xdp_buff *xdp) { - int err; - switch (map->map_type) { case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: { - struct bpf_dtab_netdev *dst = fwd; - - err = dev_map_enqueue(dst, xdp, dev_rx); - if (unlikely(err)) - return err; - break; - } - case BPF_MAP_TYPE_CPUMAP: { - struct bpf_cpu_map_entry *rcpu = fwd; - - err = cpu_map_enqueue(rcpu, xdp, dev_rx); - if (unlikely(err)) - return err; - break; - } - case BPF_MAP_TYPE_XSKMAP: { - struct xdp_sock *xs = fwd; - - err = __xsk_map_redirect(map, xdp, xs); - return err; - } + case BPF_MAP_TYPE_DEVMAP_HASH: + return dev_map_enqueue(fwd, xdp, dev_rx); + case BPF_MAP_TYPE_CPUMAP: + return cpu_map_enqueue(fwd, xdp, dev_rx); + case BPF_MAP_TYPE_XSKMAP: + return __xsk_map_redirect(fwd, xdp); default: break; } @@ -3549,26 +3529,9 @@ static int __bpf_tx_xdp_map(struct net_device *dev_rx, void *fwd, void xdp_do_flush_map(void) { - struct bpf_redirect_info *ri = this_cpu_ptr(&bpf_redirect_info); - struct bpf_map *map = ri->map_to_flush; - - ri->map_to_flush = NULL; - if (map) { - switch (map->map_type) { - case BPF_MAP_TYPE_DEVMAP: - case BPF_MAP_TYPE_DEVMAP_HASH: - __dev_map_flush(map); - break; - case BPF_MAP_TYPE_CPUMAP: - __cpu_map_flush(map); - break; - case BPF_MAP_TYPE_XSKMAP: - __xsk_map_flush(map); - break; - default: - break; - } - } + __dev_map_flush(); + __cpu_map_flush(); + __xsk_map_flush(); } EXPORT_SYMBOL_GPL(xdp_do_flush_map); @@ -3617,14 +3580,10 @@ static int xdp_do_redirect_map(struct net_device *dev, struct xdp_buff *xdp, ri->tgt_value = NULL; WRITE_ONCE(ri->map, NULL); - if (ri->map_to_flush && unlikely(ri->map_to_flush != map)) - xdp_do_flush_map(); - - err = __bpf_tx_xdp_map(dev, fwd, map, xdp, index); + err = __bpf_tx_xdp_map(dev, fwd, map, xdp); if (unlikely(err)) goto err; - ri->map_to_flush = map; _trace_xdp_redirect_map(dev, xdp_prog, fwd, map, index); return 0; err: @@ -8941,3 +8900,11 @@ const struct bpf_verifier_ops sk_reuseport_verifier_ops = { const struct bpf_prog_ops sk_reuseport_prog_ops = { }; #endif /* CONFIG_INET */ + +DEFINE_BPF_DISPATCHER(bpf_dispatcher_xdp) + +void bpf_prog_change_xdp(struct bpf_prog *prev_prog, struct bpf_prog *prog) +{ + bpf_dispatcher_change_prog(BPF_DISPATCHER_PTR(bpf_dispatcher_xdp), + prev_prog, prog); +} diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 328f661b83b2..02ada7ab8c6e 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -31,6 +31,8 @@ #define TX_BATCH_SIZE 16 +static DEFINE_PER_CPU(struct list_head, xskmap_flush_list); + bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) { return READ_ONCE(xs->rx) && READ_ONCE(xs->umem) && @@ -39,21 +41,21 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt) { - return xskq_has_addrs(umem->fq, cnt); + return xskq_cons_has_entries(umem->fq, cnt); } EXPORT_SYMBOL(xsk_umem_has_addrs); -u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) +bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) { - return xskq_peek_addr(umem->fq, addr, umem); + return xskq_cons_peek_addr(umem->fq, addr, umem); } EXPORT_SYMBOL(xsk_umem_peek_addr); -void xsk_umem_discard_addr(struct xdp_umem *umem) +void xsk_umem_release_addr(struct xdp_umem *umem) { - xskq_discard_addr(umem->fq); + xskq_cons_release(umem->fq); } -EXPORT_SYMBOL(xsk_umem_discard_addr); +EXPORT_SYMBOL(xsk_umem_release_addr); void xsk_set_rx_need_wakeup(struct xdp_umem *umem) { @@ -124,7 +126,7 @@ static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf, void *to_buf = xdp_umem_get_data(umem, addr); addr = xsk_umem_add_offset_to_addr(addr); - if (xskq_crosses_non_contig_pg(umem, addr, len + metalen)) { + if (xskq_cons_crosses_non_contig_pg(umem, addr, len + metalen)) { void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr; u64 page_start = addr & ~(PAGE_SIZE - 1); u64 first_len = PAGE_SIZE - (addr - page_start); @@ -146,7 +148,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) u32 metalen; int err; - if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) || + if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) || len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { xs->rx_dropped++; return -ENOSPC; @@ -165,9 +167,9 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) offset += metalen; addr = xsk_umem_adjust_offset(xs->umem, addr, offset); - err = xskq_produce_batch_desc(xs->rx, addr, len); + err = xskq_prod_reserve_desc(xs->rx, addr, len); if (!err) { - xskq_discard_addr(xs->umem->fq); + xskq_cons_release(xs->umem->fq); xdp_return_buff(xdp); return 0; } @@ -178,7 +180,7 @@ static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len) { - int err = xskq_produce_batch_desc(xs->rx, (u64)xdp->handle, len); + int err = xskq_prod_reserve_desc(xs->rx, xdp->handle, len); if (err) xs->rx_dropped++; @@ -214,7 +216,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) static void xsk_flush(struct xdp_sock *xs) { - xskq_produce_flush_desc(xs->rx); + xskq_prod_submit(xs->rx); xs->sk.sk_data_ready(&xs->sk); } @@ -234,7 +236,7 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) goto out_unlock; } - if (!xskq_peek_addr(xs->umem->fq, &addr, xs->umem) || + if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) || len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) { err = -ENOSPC; goto out_drop; @@ -245,12 +247,12 @@ int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) memcpy(buffer, xdp->data_meta, len + metalen); addr = xsk_umem_adjust_offset(xs->umem, addr, metalen); - err = xskq_produce_batch_desc(xs->rx, addr, len); + err = xskq_prod_reserve_desc(xs->rx, addr, len); if (err) goto out_drop; - xskq_discard_addr(xs->umem->fq); - xskq_produce_flush_desc(xs->rx); + xskq_cons_release(xs->umem->fq); + xskq_prod_submit(xs->rx); spin_unlock_bh(&xs->rx_lock); @@ -264,11 +266,9 @@ out_unlock: return err; } -int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, - struct xdp_sock *xs) +int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp) { - struct xsk_map *m = container_of(map, struct xsk_map, map); - struct list_head *flush_list = this_cpu_ptr(m->flush_list); + struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); int err; err = xsk_rcv(xs, xdp); @@ -281,10 +281,9 @@ int __xsk_map_redirect(struct bpf_map *map, struct xdp_buff *xdp, return 0; } -void __xsk_map_flush(struct bpf_map *map) +void __xsk_map_flush(void) { - struct xsk_map *m = container_of(map, struct xsk_map, map); - struct list_head *flush_list = this_cpu_ptr(m->flush_list); + struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list); struct xdp_sock *xs, *tmp; list_for_each_entry_safe(xs, tmp, flush_list, flush_node) { @@ -295,7 +294,7 @@ void __xsk_map_flush(struct bpf_map *map) void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) { - xskq_produce_flush_addr_n(umem->cq, nb_entries); + xskq_prod_submit_n(umem->cq, nb_entries); } EXPORT_SYMBOL(xsk_umem_complete_tx); @@ -317,13 +316,18 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc) rcu_read_lock(); list_for_each_entry_rcu(xs, &umem->xsk_list, list) { - if (!xskq_peek_desc(xs->tx, desc, umem)) + if (!xskq_cons_peek_desc(xs->tx, desc, umem)) continue; - if (xskq_produce_addr_lazy(umem->cq, desc->addr)) + /* This is the backpreassure mechanism for the Tx path. + * Reserve space in the completion queue and only proceed + * if there is space in it. This avoids having to implement + * any buffering in the Tx path. + */ + if (xskq_prod_reserve_addr(umem->cq, desc->addr)) goto out; - xskq_discard_desc(xs->tx); + xskq_cons_release(xs->tx); rcu_read_unlock(); return true; } @@ -358,7 +362,7 @@ static void xsk_destruct_skb(struct sk_buff *skb) unsigned long flags; spin_lock_irqsave(&xs->tx_completion_lock, flags); - WARN_ON_ONCE(xskq_produce_addr(xs->umem->cq, addr)); + xskq_prod_submit_addr(xs->umem->cq, addr); spin_unlock_irqrestore(&xs->tx_completion_lock, flags); sock_wfree(skb); @@ -378,7 +382,7 @@ static int xsk_generic_xmit(struct sock *sk) if (xs->queue_id >= xs->dev->real_num_tx_queues) goto out; - while (xskq_peek_desc(xs->tx, &desc, xs->umem)) { + while (xskq_cons_peek_desc(xs->tx, &desc, xs->umem)) { char *buffer; u64 addr; u32 len; @@ -399,7 +403,12 @@ static int xsk_generic_xmit(struct sock *sk) addr = desc.addr; buffer = xdp_umem_get_data(xs->umem, addr); err = skb_store_bits(skb, 0, buffer, len); - if (unlikely(err) || xskq_reserve_addr(xs->umem->cq)) { + /* This is the backpreassure mechanism for the Tx path. + * Reserve space in the completion queue and only proceed + * if there is space in it. This avoids having to implement + * any buffering in the Tx path. + */ + if (unlikely(err) || xskq_prod_reserve(xs->umem->cq)) { kfree_skb(skb); goto out; } @@ -411,7 +420,7 @@ static int xsk_generic_xmit(struct sock *sk) skb->destructor = xsk_destruct_skb; err = dev_direct_xmit(skb, xs->queue_id); - xskq_discard_desc(xs->tx); + xskq_cons_release(xs->tx); /* Ignore NET_XMIT_CN as packet might have been sent */ if (err == NET_XMIT_DROP || err == NETDEV_TX_BUSY) { /* SKB completed but not sent */ @@ -477,9 +486,9 @@ static __poll_t xsk_poll(struct file *file, struct socket *sock, __xsk_sendmsg(sk); } - if (xs->rx && !xskq_empty_desc(xs->rx)) + if (xs->rx && !xskq_prod_is_empty(xs->rx)) mask |= EPOLLIN | EPOLLRDNORM; - if (xs->tx && !xskq_full_desc(xs->tx)) + if (xs->tx && !xskq_cons_is_full(xs->tx)) mask |= EPOLLOUT | EPOLLWRNORM; return mask; @@ -1183,7 +1192,7 @@ static struct pernet_operations xsk_net_ops = { static int __init xsk_init(void) { - int err; + int err, cpu; err = proto_register(&xsk_proto, 0 /* no slab */); if (err) @@ -1201,6 +1210,8 @@ static int __init xsk_init(void) if (err) goto out_pernet; + for_each_possible_cpu(cpu) + INIT_LIST_HEAD(&per_cpu(xskmap_flush_list, cpu)); return 0; out_pernet: diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c index b66504592d9b..c90e9c1e3c63 100644 --- a/net/xdp/xsk_queue.c +++ b/net/xdp/xsk_queue.c @@ -18,14 +18,14 @@ void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask) q->chunk_mask = chunk_mask; } -static u32 xskq_umem_get_ring_size(struct xsk_queue *q) +static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue) { - return sizeof(struct xdp_umem_ring) + q->nentries * sizeof(u64); -} + struct xdp_umem_ring *umem_ring; + struct xdp_rxtx_ring *rxtx_ring; -static u32 xskq_rxtx_get_ring_size(struct xsk_queue *q) -{ - return sizeof(struct xdp_ring) + q->nentries * sizeof(struct xdp_desc); + if (umem_queue) + return struct_size(umem_ring, desc, q->nentries); + return struct_size(rxtx_ring, desc, q->nentries); } struct xsk_queue *xskq_create(u32 nentries, bool umem_queue) @@ -43,8 +43,7 @@ struct xsk_queue *xskq_create(u32 nentries, bool umem_queue) gfp_flags = GFP_KERNEL | __GFP_ZERO | __GFP_NOWARN | __GFP_COMP | __GFP_NORETRY; - size = umem_queue ? xskq_umem_get_ring_size(q) : - xskq_rxtx_get_ring_size(q); + size = xskq_get_ring_size(q, umem_queue); q->ring = (struct xdp_ring *)__get_free_pages(gfp_flags, get_order(size)); diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index eddae4688862..bec2af11853a 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -10,9 +10,6 @@ #include <linux/if_xdp.h> #include <net/xdp_sock.h> -#define RX_BATCH_SIZE 16 -#define LAZY_UPDATE_THRESHOLD 128 - struct xdp_ring { u32 producer ____cacheline_aligned_in_smp; u32 consumer ____cacheline_aligned_in_smp; @@ -36,10 +33,8 @@ struct xsk_queue { u64 size; u32 ring_mask; u32 nentries; - u32 prod_head; - u32 prod_tail; - u32 cons_head; - u32 cons_tail; + u32 cached_prod; + u32 cached_cons; struct xdp_ring *ring; u64 invalid_descs; }; @@ -86,56 +81,31 @@ struct xsk_queue { * now and again after circling through the ring. */ -/* Common functions operating for both RXTX and umem queues */ - -static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) -{ - return q ? q->invalid_descs : 0; -} - -static inline u32 xskq_nb_avail(struct xsk_queue *q, u32 dcnt) -{ - u32 entries = q->prod_tail - q->cons_tail; - - if (entries == 0) { - /* Refresh the local pointer */ - q->prod_tail = READ_ONCE(q->ring->producer); - entries = q->prod_tail - q->cons_tail; - } - - return (entries > dcnt) ? dcnt : entries; -} - -static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) -{ - u32 free_entries = q->nentries - (producer - q->cons_tail); - - if (free_entries >= dcnt) - return free_entries; - - /* Refresh the local tail pointer */ - q->cons_tail = READ_ONCE(q->ring->consumer); - return q->nentries - (producer - q->cons_tail); -} - -static inline bool xskq_has_addrs(struct xsk_queue *q, u32 cnt) -{ - u32 entries = q->prod_tail - q->cons_tail; - - if (entries >= cnt) - return true; - - /* Refresh the local pointer. */ - q->prod_tail = READ_ONCE(q->ring->producer); - entries = q->prod_tail - q->cons_tail; - - return entries >= cnt; -} +/* The operations on the rings are the following: + * + * producer consumer + * + * RESERVE entries PEEK in the ring for entries + * WRITE data into the ring READ data from the ring + * SUBMIT entries RELEASE entries + * + * The producer reserves one or more entries in the ring. It can then + * fill in these entries and finally submit them so that they can be + * seen and read by the consumer. + * + * The consumer peeks into the ring to see if the producer has written + * any new entries. If so, the producer can then read these entries + * and when it is done reading them release them back to the producer + * so that the producer can use these slots to fill in new entries. + * + * The function names below reflect these operations. + */ -/* UMEM queue */ +/* Functions that read and validate content from consumer rings. */ -static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr, - u64 length) +static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem, + u64 addr, + u64 length) { bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE; bool next_pg_contig = @@ -145,9 +115,16 @@ static inline bool xskq_crosses_non_contig_pg(struct xdp_umem *umem, u64 addr, return cross_pg && !next_pg_contig; } -static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) +static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q, + u64 addr, + u64 length, + struct xdp_umem *umem) { - if (addr >= q->size) { + u64 base_addr = xsk_umem_extract_addr(addr); + + addr = xsk_umem_add_offset_to_addr(addr); + if (base_addr >= q->size || addr >= q->size || + xskq_cons_crosses_non_contig_pg(umem, addr, length)) { q->invalid_descs++; return false; } @@ -155,15 +132,9 @@ static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) return true; } -static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr, - u64 length, - struct xdp_umem *umem) +static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr) { - u64 base_addr = xsk_umem_extract_addr(addr); - - addr = xsk_umem_add_offset_to_addr(addr); - if (base_addr >= q->size || addr >= q->size || - xskq_crosses_non_contig_pg(umem, addr, length)) { + if (addr >= q->size) { q->invalid_descs++; return false; } @@ -171,204 +142,240 @@ static inline bool xskq_is_valid_addr_unaligned(struct xsk_queue *q, u64 addr, return true; } -static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr, - struct xdp_umem *umem) +static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr, + struct xdp_umem *umem) { - while (q->cons_tail != q->cons_head) { - struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; - unsigned int idx = q->cons_tail & q->ring_mask; + struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; - *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask; + while (q->cached_cons != q->cached_prod) { + u32 idx = q->cached_cons & q->ring_mask; + + *addr = ring->desc[idx] & q->chunk_mask; if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { - if (xskq_is_valid_addr_unaligned(q, *addr, + if (xskq_cons_is_valid_unaligned(q, *addr, umem->chunk_size_nohr, umem)) - return addr; + return true; goto out; } - if (xskq_is_valid_addr(q, *addr)) - return addr; + if (xskq_cons_is_valid_addr(q, *addr)) + return true; out: - q->cons_tail++; + q->cached_cons++; } - return NULL; + return false; } -static inline u64 *xskq_peek_addr(struct xsk_queue *q, u64 *addr, - struct xdp_umem *umem) +static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q, + struct xdp_desc *d, + struct xdp_umem *umem) { - if (q->cons_tail == q->cons_head) { - smp_mb(); /* D, matches A */ - WRITE_ONCE(q->ring->consumer, q->cons_tail); - q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); + if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { + if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem)) + return false; + + if (d->len > umem->chunk_size_nohr || d->options) { + q->invalid_descs++; + return false; + } - /* Order consumer and data */ - smp_rmb(); + return true; } - return xskq_validate_addr(q, addr, umem); -} + if (!xskq_cons_is_valid_addr(q, d->addr)) + return false; -static inline void xskq_discard_addr(struct xsk_queue *q) -{ - q->cons_tail++; + if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) || + d->options) { + q->invalid_descs++; + return false; + } + + return true; } -static inline int xskq_produce_addr(struct xsk_queue *q, u64 addr) +static inline bool xskq_cons_read_desc(struct xsk_queue *q, + struct xdp_desc *desc, + struct xdp_umem *umem) { - struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; - - if (xskq_nb_free(q, q->prod_tail, 1) == 0) - return -ENOSPC; + while (q->cached_cons != q->cached_prod) { + struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; + u32 idx = q->cached_cons & q->ring_mask; - /* A, matches D */ - ring->desc[q->prod_tail++ & q->ring_mask] = addr; + *desc = ring->desc[idx]; + if (xskq_cons_is_valid_desc(q, desc, umem)) + return true; - /* Order producer and data */ - smp_wmb(); /* B, matches C */ + q->cached_cons++; + } - WRITE_ONCE(q->ring->producer, q->prod_tail); - return 0; + return false; } -static inline int xskq_produce_addr_lazy(struct xsk_queue *q, u64 addr) +/* Functions for consumers */ + +static inline void __xskq_cons_release(struct xsk_queue *q) { - struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; + smp_mb(); /* D, matches A */ + WRITE_ONCE(q->ring->consumer, q->cached_cons); +} - if (xskq_nb_free(q, q->prod_head, LAZY_UPDATE_THRESHOLD) == 0) - return -ENOSPC; +static inline void __xskq_cons_peek(struct xsk_queue *q) +{ + /* Refresh the local pointer */ + q->cached_prod = READ_ONCE(q->ring->producer); + smp_rmb(); /* C, matches B */ +} - /* A, matches D */ - ring->desc[q->prod_head++ & q->ring_mask] = addr; - return 0; +static inline void xskq_cons_get_entries(struct xsk_queue *q) +{ + __xskq_cons_release(q); + __xskq_cons_peek(q); } -static inline void xskq_produce_flush_addr_n(struct xsk_queue *q, - u32 nb_entries) +static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt) { - /* Order producer and data */ - smp_wmb(); /* B, matches C */ + u32 entries = q->cached_prod - q->cached_cons; - q->prod_tail += nb_entries; - WRITE_ONCE(q->ring->producer, q->prod_tail); + if (entries >= cnt) + return true; + + __xskq_cons_peek(q); + entries = q->cached_prod - q->cached_cons; + + return entries >= cnt; } -static inline int xskq_reserve_addr(struct xsk_queue *q) +static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr, + struct xdp_umem *umem) { - if (xskq_nb_free(q, q->prod_head, 1) == 0) - return -ENOSPC; + if (q->cached_prod == q->cached_cons) + xskq_cons_get_entries(q); + return xskq_cons_read_addr(q, addr, umem); +} - /* A, matches D */ - q->prod_head++; - return 0; +static inline bool xskq_cons_peek_desc(struct xsk_queue *q, + struct xdp_desc *desc, + struct xdp_umem *umem) +{ + if (q->cached_prod == q->cached_cons) + xskq_cons_get_entries(q); + return xskq_cons_read_desc(q, desc, umem); } -/* Rx/Tx queue */ +static inline void xskq_cons_release(struct xsk_queue *q) +{ + /* To improve performance, only update local state here. + * Reflect this to global state when we get new entries + * from the ring in xskq_cons_get_entries(). + */ + q->cached_cons++; +} -static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d, - struct xdp_umem *umem) +static inline bool xskq_cons_is_full(struct xsk_queue *q) { - if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) { - if (!xskq_is_valid_addr_unaligned(q, d->addr, d->len, umem)) - return false; + /* No barriers needed since data is not accessed */ + return READ_ONCE(q->ring->producer) - READ_ONCE(q->ring->consumer) == + q->nentries; +} - if (d->len > umem->chunk_size_nohr || d->options) { - q->invalid_descs++; - return false; - } +/* Functions for producers */ - return true; - } +static inline bool xskq_prod_is_full(struct xsk_queue *q) +{ + u32 free_entries = q->nentries - (q->cached_prod - q->cached_cons); - if (!xskq_is_valid_addr(q, d->addr)) + if (free_entries) return false; - if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) || - d->options) { - q->invalid_descs++; - return false; - } + /* Refresh the local tail pointer */ + q->cached_cons = READ_ONCE(q->ring->consumer); + free_entries = q->nentries - (q->cached_prod - q->cached_cons); - return true; + return !free_entries; } -static inline struct xdp_desc *xskq_validate_desc(struct xsk_queue *q, - struct xdp_desc *desc, - struct xdp_umem *umem) +static inline int xskq_prod_reserve(struct xsk_queue *q) { - while (q->cons_tail != q->cons_head) { - struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; - unsigned int idx = q->cons_tail & q->ring_mask; - - *desc = READ_ONCE(ring->desc[idx]); - if (xskq_is_valid_desc(q, desc, umem)) - return desc; - - q->cons_tail++; - } + if (xskq_prod_is_full(q)) + return -ENOSPC; - return NULL; + /* A, matches D */ + q->cached_prod++; + return 0; } -static inline struct xdp_desc *xskq_peek_desc(struct xsk_queue *q, - struct xdp_desc *desc, - struct xdp_umem *umem) +static inline int xskq_prod_reserve_addr(struct xsk_queue *q, u64 addr) { - if (q->cons_tail == q->cons_head) { - smp_mb(); /* D, matches A */ - WRITE_ONCE(q->ring->consumer, q->cons_tail); - q->cons_head = q->cons_tail + xskq_nb_avail(q, RX_BATCH_SIZE); - - /* Order consumer and data */ - smp_rmb(); /* C, matches B */ - } + struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; - return xskq_validate_desc(q, desc, umem); -} + if (xskq_prod_is_full(q)) + return -ENOSPC; -static inline void xskq_discard_desc(struct xsk_queue *q) -{ - q->cons_tail++; + /* A, matches D */ + ring->desc[q->cached_prod++ & q->ring_mask] = addr; + return 0; } -static inline int xskq_produce_batch_desc(struct xsk_queue *q, - u64 addr, u32 len) +static inline int xskq_prod_reserve_desc(struct xsk_queue *q, + u64 addr, u32 len) { struct xdp_rxtx_ring *ring = (struct xdp_rxtx_ring *)q->ring; - unsigned int idx; + u32 idx; - if (xskq_nb_free(q, q->prod_head, 1) == 0) + if (xskq_prod_is_full(q)) return -ENOSPC; /* A, matches D */ - idx = (q->prod_head++) & q->ring_mask; + idx = q->cached_prod++ & q->ring_mask; ring->desc[idx].addr = addr; ring->desc[idx].len = len; return 0; } -static inline void xskq_produce_flush_desc(struct xsk_queue *q) +static inline void __xskq_prod_submit(struct xsk_queue *q, u32 idx) { - /* Order producer and data */ smp_wmb(); /* B, matches C */ - q->prod_tail = q->prod_head; - WRITE_ONCE(q->ring->producer, q->prod_tail); + WRITE_ONCE(q->ring->producer, idx); +} + +static inline void xskq_prod_submit(struct xsk_queue *q) +{ + __xskq_prod_submit(q, q->cached_prod); +} + +static inline void xskq_prod_submit_addr(struct xsk_queue *q, u64 addr) +{ + struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; + u32 idx = q->ring->producer; + + ring->desc[idx++ & q->ring_mask] = addr; + + __xskq_prod_submit(q, idx); } -static inline bool xskq_full_desc(struct xsk_queue *q) +static inline void xskq_prod_submit_n(struct xsk_queue *q, u32 nb_entries) { - return xskq_nb_avail(q, q->nentries) == q->nentries; + __xskq_prod_submit(q, q->ring->producer + nb_entries); } -static inline bool xskq_empty_desc(struct xsk_queue *q) +static inline bool xskq_prod_is_empty(struct xsk_queue *q) { - return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries; + /* No barriers needed since data is not accessed */ + return READ_ONCE(q->ring->consumer) == READ_ONCE(q->ring->producer); +} + +/* For both producers and consumers */ + +static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q) +{ + return q ? q->invalid_descs : 0; } void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask); diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile index 1fc42ad8ff49..5b89c0370f33 100644 --- a/samples/bpf/Makefile +++ b/samples/bpf/Makefile @@ -38,6 +38,8 @@ tprogs-y += tc_l2_redirect tprogs-y += lwt_len_hist tprogs-y += xdp_tx_iptunnel tprogs-y += test_map_in_map +tprogs-y += per_socket_stats_example +tprogs-y += xdp_redirect tprogs-y += xdp_redirect_map tprogs-y += xdp_redirect_cpu tprogs-y += xdp_monitor @@ -196,7 +198,7 @@ endif TPROGCFLAGS_bpf_load.o += -Wno-unused-variable -TPROGS_LDLIBS += $(LIBBPF) -lelf +TPROGS_LDLIBS += $(LIBBPF) -lelf -lz TPROGLDLIBS_tracex4 += -lrt TPROGLDLIBS_trace_output += -lrt TPROGLDLIBS_map_perf_test += -lrt @@ -234,6 +236,7 @@ BTF_LLVM_PROBE := $(shell echo "int main() { return 0; }" | \ readelf -S ./llvm_btf_verify.o | grep BTF; \ /bin/rm -f ./llvm_btf_verify.o) +BPF_EXTRA_CFLAGS += -fno-stack-protector ifneq ($(BTF_LLVM_PROBE),) BPF_EXTRA_CFLAGS += -g else diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 3e553eed95a7..38a8852cb57f 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -98,7 +98,7 @@ int main(int argc, char **argv) xdp_flags |= XDP_FLAGS_SKB_MODE; break; case 'N': - xdp_flags |= XDP_FLAGS_DRV_MODE; + /* default, set below */ break; case 'F': xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -109,6 +109,9 @@ int main(int argc, char **argv) } } + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) + xdp_flags |= XDP_FLAGS_DRV_MODE; + if (optind == argc) { usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_adjust_tail_user.c b/samples/bpf/xdp_adjust_tail_user.c index d86e9ad0356b..008789eb6ada 100644 --- a/samples/bpf/xdp_adjust_tail_user.c +++ b/samples/bpf/xdp_adjust_tail_user.c @@ -120,7 +120,7 @@ int main(int argc, char **argv) xdp_flags |= XDP_FLAGS_SKB_MODE; break; case 'N': - xdp_flags |= XDP_FLAGS_DRV_MODE; + /* default, set below */ break; case 'F': xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -132,6 +132,9 @@ int main(int argc, char **argv) opt_flags[opt] = 0; } + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) + xdp_flags |= XDP_FLAGS_DRV_MODE; + for (i = 0; i < strlen(optstr); i++) { if (opt_flags[(unsigned int)optstr[i]]) { fprintf(stderr, "Missing argument -%c\n", optstr[i]); diff --git a/samples/bpf/xdp_fwd_user.c b/samples/bpf/xdp_fwd_user.c index 97ff1dad7669..c30f9acfdb84 100644 --- a/samples/bpf/xdp_fwd_user.c +++ b/samples/bpf/xdp_fwd_user.c @@ -27,11 +27,13 @@ #include "libbpf.h" #include <bpf/bpf.h> +static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; + static int do_attach(int idx, int prog_fd, int map_fd, const char *name) { int err; - err = bpf_set_link_xdp_fd(idx, prog_fd, 0); + err = bpf_set_link_xdp_fd(idx, prog_fd, xdp_flags); if (err < 0) { printf("ERROR: failed to attach program to %s\n", name); return err; @@ -49,7 +51,7 @@ static int do_detach(int idx, const char *name) { int err; - err = bpf_set_link_xdp_fd(idx, -1, 0); + err = bpf_set_link_xdp_fd(idx, -1, xdp_flags); if (err < 0) printf("ERROR: failed to detach program from %s\n", name); @@ -83,11 +85,17 @@ int main(int argc, char **argv) int attach = 1; int ret = 0; - while ((opt = getopt(argc, argv, ":dD")) != -1) { + while ((opt = getopt(argc, argv, ":dDSF")) != -1) { switch (opt) { case 'd': attach = 0; break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; + case 'F': + xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; + break; case 'D': prog_name = "xdp_fwd_direct"; break; @@ -97,6 +105,9 @@ int main(int argc, char **argv) } } + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) + xdp_flags |= XDP_FLAGS_DRV_MODE; + if (optind == argc) { usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c index 0da6e9e7132e..79a2fb7d16cb 100644 --- a/samples/bpf/xdp_redirect_cpu_user.c +++ b/samples/bpf/xdp_redirect_cpu_user.c @@ -16,6 +16,10 @@ static const char *__doc__ = #include <getopt.h> #include <net/if.h> #include <time.h> +#include <linux/limits.h> + +#define __must_check +#include <linux/err.h> #include <arpa/inet.h> #include <linux/if_link.h> @@ -46,6 +50,10 @@ static int cpus_count_map_fd; static int cpus_iterator_map_fd; static int exception_cnt_map_fd; +#define NUM_TP 5 +struct bpf_link *tp_links[NUM_TP] = { 0 }; +static int tp_cnt = 0; + /* Exit return codes */ #define EXIT_OK 0 #define EXIT_FAIL 1 @@ -88,6 +96,10 @@ static void int_exit(int sig) printf("program on interface changed, not removing\n"); } } + /* Detach tracepoints */ + while (tp_cnt) + bpf_link__destroy(tp_links[--tp_cnt]); + exit(EXIT_OK); } @@ -588,23 +600,61 @@ static void stats_poll(int interval, bool use_separators, char *prog_name, free_stats_record(prev); } +static struct bpf_link * attach_tp(struct bpf_object *obj, + const char *tp_category, + const char* tp_name) +{ + struct bpf_program *prog; + struct bpf_link *link; + char sec_name[PATH_MAX]; + int len; + + len = snprintf(sec_name, PATH_MAX, "tracepoint/%s/%s", + tp_category, tp_name); + if (len < 0) + exit(EXIT_FAIL); + + prog = bpf_object__find_program_by_title(obj, sec_name); + if (!prog) { + fprintf(stderr, "ERR: finding progsec: %s\n", sec_name); + exit(EXIT_FAIL_BPF); + } + + link = bpf_program__attach_tracepoint(prog, tp_category, tp_name); + if (IS_ERR(link)) + exit(EXIT_FAIL_BPF); + + return link; +} + +static void init_tracepoints(struct bpf_object *obj) { + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_err"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_redirect_map_err"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_exception"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_enqueue"); + tp_links[tp_cnt++] = attach_tp(obj, "xdp", "xdp_cpumap_kthread"); +} + static int init_map_fds(struct bpf_object *obj) { - cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); - rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); + /* Maps updated by tracepoints */ redirect_err_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "redirect_err_cnt"); + exception_cnt_map_fd = + bpf_object__find_map_fd_by_name(obj, "exception_cnt"); cpumap_enqueue_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "cpumap_enqueue_cnt"); cpumap_kthread_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "cpumap_kthread_cnt"); + + /* Maps used by XDP */ + rx_cnt_map_fd = bpf_object__find_map_fd_by_name(obj, "rx_cnt"); + cpu_map_fd = bpf_object__find_map_fd_by_name(obj, "cpu_map"); cpus_available_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_available"); cpus_count_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_count"); cpus_iterator_map_fd = bpf_object__find_map_fd_by_name(obj, "cpus_iterator"); - exception_cnt_map_fd = - bpf_object__find_map_fd_by_name(obj, "exception_cnt"); if (cpu_map_fd < 0 || rx_cnt_map_fd < 0 || redirect_err_cnt_map_fd < 0 || cpumap_enqueue_cnt_map_fd < 0 || @@ -662,6 +712,7 @@ int main(int argc, char **argv) strerror(errno)); return EXIT_FAIL; } + init_tracepoints(obj); if (init_map_fds(obj) < 0) { fprintf(stderr, "bpf_object__find_map_fd_by_name failed\n"); return EXIT_FAIL; @@ -728,6 +779,10 @@ int main(int argc, char **argv) return EXIT_FAIL_OPTION; } } + + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) + xdp_flags |= XDP_FLAGS_DRV_MODE; + /* Required option */ if (ifindex == -1) { fprintf(stderr, "ERR: required option --dev missing\n"); diff --git a/samples/bpf/xdp_redirect_map_user.c b/samples/bpf/xdp_redirect_map_user.c index f70ee33907fd..cc840661faab 100644 --- a/samples/bpf/xdp_redirect_map_user.c +++ b/samples/bpf/xdp_redirect_map_user.c @@ -116,7 +116,7 @@ int main(int argc, char **argv) xdp_flags |= XDP_FLAGS_SKB_MODE; break; case 'N': - xdp_flags |= XDP_FLAGS_DRV_MODE; + /* default, set below */ break; case 'F': xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -127,6 +127,9 @@ int main(int argc, char **argv) } } + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) + xdp_flags |= XDP_FLAGS_DRV_MODE; + if (optind == argc) { printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]); return 1; diff --git a/samples/bpf/xdp_redirect_user.c b/samples/bpf/xdp_redirect_user.c index 5440cd620607..71dff8e3382a 100644 --- a/samples/bpf/xdp_redirect_user.c +++ b/samples/bpf/xdp_redirect_user.c @@ -117,7 +117,7 @@ int main(int argc, char **argv) xdp_flags |= XDP_FLAGS_SKB_MODE; break; case 'N': - xdp_flags |= XDP_FLAGS_DRV_MODE; + /* default, set below */ break; case 'F': xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -128,6 +128,9 @@ int main(int argc, char **argv) } } + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) + xdp_flags |= XDP_FLAGS_DRV_MODE; + if (optind == argc) { printf("usage: %s <IFNAME|IFINDEX>_IN <IFNAME|IFINDEX>_OUT\n", argv[0]); return 1; diff --git a/samples/bpf/xdp_router_ipv4_user.c b/samples/bpf/xdp_router_ipv4_user.c index 1469b66ebad1..fef286c5add2 100644 --- a/samples/bpf/xdp_router_ipv4_user.c +++ b/samples/bpf/xdp_router_ipv4_user.c @@ -662,6 +662,9 @@ int main(int ac, char **argv) } } + if (!(flags & XDP_FLAGS_SKB_MODE)) + flags |= XDP_FLAGS_DRV_MODE; + if (optind == ac) { usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_rxq_info_user.c b/samples/bpf/xdp_rxq_info_user.c index 8fc3ad01de72..fc4983fd6959 100644 --- a/samples/bpf/xdp_rxq_info_user.c +++ b/samples/bpf/xdp_rxq_info_user.c @@ -551,6 +551,10 @@ int main(int argc, char **argv) return EXIT_FAIL_OPTION; } } + + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) + xdp_flags |= XDP_FLAGS_DRV_MODE; + /* Required option */ if (ifindex == -1) { fprintf(stderr, "ERR: required option --dev missing\n"); diff --git a/samples/bpf/xdp_sample_pkts_user.c b/samples/bpf/xdp_sample_pkts_user.c index a5760e8bf2c4..8c1af1b7372d 100644 --- a/samples/bpf/xdp_sample_pkts_user.c +++ b/samples/bpf/xdp_sample_pkts_user.c @@ -52,13 +52,13 @@ static int do_detach(int idx, const char *name) __u32 curr_prog_id = 0; int err = 0; - err = bpf_get_link_xdp_id(idx, &curr_prog_id, 0); + err = bpf_get_link_xdp_id(idx, &curr_prog_id, xdp_flags); if (err) { printf("bpf_get_link_xdp_id failed\n"); return err; } if (prog_id == curr_prog_id) { - err = bpf_set_link_xdp_fd(idx, -1, 0); + err = bpf_set_link_xdp_fd(idx, -1, xdp_flags); if (err < 0) printf("ERROR: failed to detach prog from %s\n", name); } else if (!curr_prog_id) { @@ -115,7 +115,7 @@ int main(int argc, char **argv) .prog_type = BPF_PROG_TYPE_XDP, }; struct perf_buffer_opts pb_opts = {}; - const char *optstr = "F"; + const char *optstr = "FS"; int prog_fd, map_fd, opt; struct bpf_object *obj; struct bpf_map *map; @@ -127,12 +127,18 @@ int main(int argc, char **argv) case 'F': xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; break; + case 'S': + xdp_flags |= XDP_FLAGS_SKB_MODE; + break; default: usage(basename(argv[0])); return 1; } } + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) + xdp_flags |= XDP_FLAGS_DRV_MODE; + if (optind == argc) { usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 2fe4c7f5ffe5..5f33b5530032 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -231,7 +231,7 @@ int main(int argc, char **argv) xdp_flags |= XDP_FLAGS_SKB_MODE; break; case 'N': - xdp_flags |= XDP_FLAGS_DRV_MODE; + /* default, set below */ break; case 'F': xdp_flags &= ~XDP_FLAGS_UPDATE_IF_NOEXIST; @@ -243,6 +243,9 @@ int main(int argc, char **argv) opt_flags[opt] = 0; } + if (!(xdp_flags & XDP_FLAGS_SKB_MODE)) + xdp_flags |= XDP_FLAGS_DRV_MODE; + for (i = 0; i < strlen(optstr); i++) { if (opt_flags[(unsigned int)optstr[i]]) { fprintf(stderr, "Missing argument -%c\n", optstr[i]); diff --git a/samples/bpf/xdpsock_user.c b/samples/bpf/xdpsock_user.c index a15480010828..d74c4c83fc93 100644 --- a/samples/bpf/xdpsock_user.c +++ b/samples/bpf/xdpsock_user.c @@ -10,6 +10,9 @@ #include <linux/if_link.h> #include <linux/if_xdp.h> #include <linux/if_ether.h> +#include <linux/ip.h> +#include <linux/udp.h> +#include <arpa/inet.h> #include <locale.h> #include <net/ethernet.h> #include <net/if.h> @@ -45,12 +48,14 @@ #endif #define NUM_FRAMES (4 * 1024) -#define BATCH_SIZE 64 +#define MIN_PKT_SIZE 64 #define DEBUG_HEXDUMP 0 typedef __u64 u64; typedef __u32 u32; +typedef __u16 u16; +typedef __u8 u8; static unsigned long prev_time; @@ -65,6 +70,13 @@ static u32 opt_xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST; static const char *opt_if = ""; static int opt_ifindex; static int opt_queue; +static unsigned long opt_duration; +static unsigned long start_time; +static bool benchmark_done; +static u32 opt_batch_size = 64; +static int opt_pkt_count; +static u16 opt_pkt_size = MIN_PKT_SIZE; +static u32 opt_pkt_fill_pattern = 0x12345678; static int opt_poll; static int opt_interval = 1; static u32 opt_xdp_bind_flags = XDP_USE_NEED_WAKEUP; @@ -167,10 +179,21 @@ static void dump_stats(void) } } +static bool is_benchmark_done(void) +{ + if (opt_duration > 0) { + unsigned long dt = (get_nsecs() - start_time); + + if (dt >= opt_duration) + benchmark_done = true; + } + return benchmark_done; +} + static void *poller(void *arg) { (void)arg; - for (;;) { + while (!is_benchmark_done()) { sleep(opt_interval); dump_stats(); } @@ -196,6 +219,11 @@ static void remove_xdp_program(void) static void int_exit(int sig) { + benchmark_done = true; +} + +static void xdpsock_cleanup(void) +{ struct xsk_umem *umem = xsks[0]->umem->umem; int i; @@ -204,8 +232,6 @@ static void int_exit(int sig) xsk_socket__delete(xsks[i]->xsk); (void)xsk_umem__delete(umem); remove_xdp_program(); - - exit(EXIT_SUCCESS); } static void __exit_with_error(int error, const char *file, const char *func, @@ -220,13 +246,6 @@ static void __exit_with_error(int error, const char *file, const char *func, #define exit_with_error(error) __exit_with_error(error, __FILE__, __func__, \ __LINE__) - -static const char pkt_data[] = - "\x3c\xfd\xfe\x9e\x7f\x71\xec\xb1\xd7\x98\x3a\xc0\x08\x00\x45\x00" - "\x00\x2e\x00\x00\x00\x00\x40\x11\x88\x97\x05\x08\x07\x08\xc8\x14" - "\x1e\x04\x10\x92\x10\x92\x00\x1a\x6d\xa3\x34\x33\x1f\x69\x40\x6b" - "\x54\x59\xb6\x14\x2d\x11\x44\xbf\xaf\xd9\xbe\xaa"; - static void swap_mac_addresses(void *data) { struct ether_header *eth = (struct ether_header *)data; @@ -274,11 +293,243 @@ static void hex_dump(void *pkt, size_t length, u64 addr) printf("\n"); } -static size_t gen_eth_frame(struct xsk_umem_info *umem, u64 addr) +static void *memset32_htonl(void *dest, u32 val, u32 size) +{ + u32 *ptr = (u32 *)dest; + int i; + + val = htonl(val); + + for (i = 0; i < (size & (~0x3)); i += 4) + ptr[i >> 2] = val; + + for (; i < size; i++) + ((char *)dest)[i] = ((char *)&val)[i & 3]; + + return dest; +} + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static inline unsigned short from32to16(unsigned int x) +{ + /* add up 16-bit and 16-bit for 16+c bit */ + x = (x & 0xffff) + (x >> 16); + /* add up carry.. */ + x = (x & 0xffff) + (x >> 16); + return x; +} + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static unsigned int do_csum(const unsigned char *buff, int len) +{ + unsigned int result = 0; + int odd; + + if (len <= 0) + goto out; + odd = 1 & (unsigned long)buff; + if (odd) { +#ifdef __LITTLE_ENDIAN + result += (*buff << 8); +#else + result = *buff; +#endif + len--; + buff++; + } + if (len >= 2) { + if (2 & (unsigned long)buff) { + result += *(unsigned short *)buff; + len -= 2; + buff += 2; + } + if (len >= 4) { + const unsigned char *end = buff + + ((unsigned int)len & ~3); + unsigned int carry = 0; + + do { + unsigned int w = *(unsigned int *)buff; + + buff += 4; + result += carry; + result += w; + carry = (w > result); + } while (buff < end); + result += carry; + result = (result & 0xffff) + (result >> 16); + } + if (len & 2) { + result += *(unsigned short *)buff; + buff += 2; + } + } + if (len & 1) +#ifdef __LITTLE_ENDIAN + result += *buff; +#else + result += (*buff << 8); +#endif + result = from32to16(result); + if (odd) + result = ((result >> 8) & 0xff) | ((result & 0xff) << 8); +out: + return result; +} + +__sum16 ip_fast_csum(const void *iph, unsigned int ihl); + +/* + * This is a version of ip_compute_csum() optimized for IP headers, + * which always checksum on 4 octet boundaries. + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +__sum16 ip_fast_csum(const void *iph, unsigned int ihl) +{ + return (__force __sum16)~do_csum(iph, ihl * 4); +} + +/* + * Fold a partial checksum + * This function code has been taken from + * Linux kernel include/asm-generic/checksum.h + */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __sum16)~sum; +} + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + +__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, __wsum sum); + +/* + * This function code has been taken from + * Linux kernel lib/checksum.c + */ +__wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, __wsum sum) +{ + unsigned long long s = (__force u32)sum; + + s += (__force u32)saddr; + s += (__force u32)daddr; +#ifdef __BIG_ENDIAN__ + s += proto + len; +#else + s += (proto + len) << 8; +#endif + return (__force __wsum)from64to32(s); +} + +/* + * This function has been taken from + * Linux kernel include/asm-generic/checksum.h + */ +static inline __sum16 +csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +static inline u16 udp_csum(u32 saddr, u32 daddr, u32 len, + u8 proto, u16 *udp_pkt) +{ + u32 csum = 0; + u32 cnt = 0; + + /* udp hdr and data */ + for (; cnt < len; cnt += 2) + csum += udp_pkt[cnt >> 1]; + + return csum_tcpudp_magic(saddr, daddr, len, proto, csum); +} + +#define ETH_FCS_SIZE 4 + +#define PKT_HDR_SIZE (sizeof(struct ethhdr) + sizeof(struct iphdr) + \ + sizeof(struct udphdr)) + +#define PKT_SIZE (opt_pkt_size - ETH_FCS_SIZE) +#define IP_PKT_SIZE (PKT_SIZE - sizeof(struct ethhdr)) +#define UDP_PKT_SIZE (IP_PKT_SIZE - sizeof(struct iphdr)) +#define UDP_PKT_DATA_SIZE (UDP_PKT_SIZE - sizeof(struct udphdr)) + +static u8 pkt_data[XSK_UMEM__DEFAULT_FRAME_SIZE]; + +static void gen_eth_hdr_data(void) +{ + struct udphdr *udp_hdr = (struct udphdr *)(pkt_data + + sizeof(struct ethhdr) + + sizeof(struct iphdr)); + struct iphdr *ip_hdr = (struct iphdr *)(pkt_data + + sizeof(struct ethhdr)); + struct ethhdr *eth_hdr = (struct ethhdr *)pkt_data; + + /* ethernet header */ + memcpy(eth_hdr->h_dest, "\x3c\xfd\xfe\x9e\x7f\x71", ETH_ALEN); + memcpy(eth_hdr->h_source, "\xec\xb1\xd7\x98\x3a\xc0", ETH_ALEN); + eth_hdr->h_proto = htons(ETH_P_IP); + + /* IP header */ + ip_hdr->version = IPVERSION; + ip_hdr->ihl = 0x5; /* 20 byte header */ + ip_hdr->tos = 0x0; + ip_hdr->tot_len = htons(IP_PKT_SIZE); + ip_hdr->id = 0; + ip_hdr->frag_off = 0; + ip_hdr->ttl = IPDEFTTL; + ip_hdr->protocol = IPPROTO_UDP; + ip_hdr->saddr = htonl(0x0a0a0a10); + ip_hdr->daddr = htonl(0x0a0a0a20); + + /* IP header checksum */ + ip_hdr->check = 0; + ip_hdr->check = ip_fast_csum((const void *)ip_hdr, ip_hdr->ihl); + + /* UDP header */ + udp_hdr->source = htons(0x1000); + udp_hdr->dest = htons(0x1000); + udp_hdr->len = htons(UDP_PKT_SIZE); + + /* UDP data */ + memset32_htonl(pkt_data + PKT_HDR_SIZE, opt_pkt_fill_pattern, + UDP_PKT_DATA_SIZE); + + /* UDP header checksum */ + udp_hdr->check = 0; + udp_hdr->check = udp_csum(ip_hdr->saddr, ip_hdr->daddr, UDP_PKT_SIZE, + IPPROTO_UDP, (u16 *)udp_hdr); +} + +static void gen_eth_frame(struct xsk_umem_info *umem, u64 addr) { memcpy(xsk_umem__get_data(umem->buffer, addr), pkt_data, - sizeof(pkt_data) - 1); - return sizeof(pkt_data) - 1; + PKT_SIZE); } static struct xsk_umem_info *xsk_configure_umem(void *buffer, u64 size) @@ -375,6 +626,11 @@ static struct option long_options[] = { {"unaligned", no_argument, 0, 'u'}, {"shared-umem", no_argument, 0, 'M'}, {"force", no_argument, 0, 'F'}, + {"duration", required_argument, 0, 'd'}, + {"batch-size", required_argument, 0, 'b'}, + {"tx-pkt-count", required_argument, 0, 'C'}, + {"tx-pkt-size", required_argument, 0, 's'}, + {"tx-pkt-pattern", required_argument, 0, 'P'}, {0, 0, 0, 0} }; @@ -399,8 +655,21 @@ static void usage(const char *prog) " -u, --unaligned Enable unaligned chunk placement\n" " -M, --shared-umem Enable XDP_SHARED_UMEM\n" " -F, --force Force loading the XDP prog\n" + " -d, --duration=n Duration in secs to run command.\n" + " Default: forever.\n" + " -b, --batch-size=n Batch size for sending or receiving\n" + " packets. Default: %d\n" + " -C, --tx-pkt-count=n Number of packets to send.\n" + " Default: Continuous packets.\n" + " -s, --tx-pkt-size=n Transmit packet size.\n" + " (Default: %d bytes)\n" + " Min size: %d, Max size %d.\n" + " -P, --tx-pkt-pattern=nPacket fill pattern. Default: 0x%x\n" "\n"; - fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE); + fprintf(stderr, str, prog, XSK_UMEM__DEFAULT_FRAME_SIZE, + opt_batch_size, MIN_PKT_SIZE, MIN_PKT_SIZE, + XSK_UMEM__DEFAULT_FRAME_SIZE, opt_pkt_fill_pattern); + exit(EXIT_FAILURE); } @@ -411,7 +680,7 @@ static void parse_command_line(int argc, char **argv) opterr = 0; for (;;) { - c = getopt_long(argc, argv, "Frtli:q:psSNn:czf:muM", + c = getopt_long(argc, argv, "Frtli:q:pSNn:czf:muMd:b:C:s:P:", long_options, &option_index); if (c == -1) break; @@ -440,7 +709,7 @@ static void parse_command_line(int argc, char **argv) opt_xdp_bind_flags |= XDP_COPY; break; case 'N': - opt_xdp_flags |= XDP_FLAGS_DRV_MODE; + /* default, set below */ break; case 'n': opt_interval = atoi(optarg); @@ -469,11 +738,37 @@ static void parse_command_line(int argc, char **argv) case 'M': opt_num_xsks = MAX_SOCKS; break; + case 'd': + opt_duration = atoi(optarg); + opt_duration *= 1000000000; + break; + case 'b': + opt_batch_size = atoi(optarg); + break; + case 'C': + opt_pkt_count = atoi(optarg); + break; + case 's': + opt_pkt_size = atoi(optarg); + if (opt_pkt_size > (XSK_UMEM__DEFAULT_FRAME_SIZE) || + opt_pkt_size < MIN_PKT_SIZE) { + fprintf(stderr, + "ERROR: Invalid frame size %d\n", + opt_pkt_size); + usage(basename(argv[0])); + } + break; + case 'P': + opt_pkt_fill_pattern = strtol(optarg, NULL, 16); + break; default: usage(basename(argv[0])); } } + if (!(opt_xdp_flags & XDP_FLAGS_SKB_MODE)) + opt_xdp_flags |= XDP_FLAGS_DRV_MODE; + opt_ifindex = if_nametoindex(opt_if); if (!opt_ifindex) { fprintf(stderr, "ERROR: interface \"%s\" does not exist\n", @@ -513,7 +808,7 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); - ndescs = (xsk->outstanding_tx > BATCH_SIZE) ? BATCH_SIZE : + ndescs = (xsk->outstanding_tx > opt_batch_size) ? opt_batch_size : xsk->outstanding_tx; /* re-add completed Tx buffers */ @@ -542,7 +837,8 @@ static inline void complete_tx_l2fwd(struct xsk_socket_info *xsk, } } -static inline void complete_tx_only(struct xsk_socket_info *xsk) +static inline void complete_tx_only(struct xsk_socket_info *xsk, + int batch_size) { unsigned int rcvd; u32 idx; @@ -553,7 +849,7 @@ static inline void complete_tx_only(struct xsk_socket_info *xsk) if (!opt_need_wakeup || xsk_ring_prod__needs_wakeup(&xsk->tx)) kick_tx(xsk); - rcvd = xsk_ring_cons__peek(&xsk->umem->cq, BATCH_SIZE, &idx); + rcvd = xsk_ring_cons__peek(&xsk->umem->cq, batch_size, &idx); if (rcvd > 0) { xsk_ring_cons__release(&xsk->umem->cq, rcvd); xsk->outstanding_tx -= rcvd; @@ -567,7 +863,7 @@ static void rx_drop(struct xsk_socket_info *xsk, struct pollfd *fds) u32 idx_rx = 0, idx_fq = 0; int ret; - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); if (!rcvd) { if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) ret = poll(fds, num_socks, opt_timeout); @@ -619,36 +915,68 @@ static void rx_drop_all(void) for (i = 0; i < num_socks; i++) rx_drop(xsks[i], fds); + + if (benchmark_done) + break; } } -static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb) +static void tx_only(struct xsk_socket_info *xsk, u32 frame_nb, int batch_size) { u32 idx; + unsigned int i; - if (xsk_ring_prod__reserve(&xsk->tx, BATCH_SIZE, &idx) == BATCH_SIZE) { - unsigned int i; - - for (i = 0; i < BATCH_SIZE; i++) { - xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->addr = - (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; - xsk_ring_prod__tx_desc(&xsk->tx, idx + i)->len = - sizeof(pkt_data) - 1; - } + while (xsk_ring_prod__reserve(&xsk->tx, batch_size, &idx) < + batch_size) { + complete_tx_only(xsk, batch_size); + } - xsk_ring_prod__submit(&xsk->tx, BATCH_SIZE); - xsk->outstanding_tx += BATCH_SIZE; - frame_nb += BATCH_SIZE; - frame_nb %= NUM_FRAMES; + for (i = 0; i < batch_size; i++) { + struct xdp_desc *tx_desc = xsk_ring_prod__tx_desc(&xsk->tx, + idx + i); + tx_desc->addr = (frame_nb + i) << XSK_UMEM__DEFAULT_FRAME_SHIFT; + tx_desc->len = PKT_SIZE; } - complete_tx_only(xsk); + xsk_ring_prod__submit(&xsk->tx, batch_size); + xsk->outstanding_tx += batch_size; + frame_nb += batch_size; + frame_nb %= NUM_FRAMES; + complete_tx_only(xsk, batch_size); +} + +static inline int get_batch_size(int pkt_cnt) +{ + if (!opt_pkt_count) + return opt_batch_size; + + if (pkt_cnt + opt_batch_size <= opt_pkt_count) + return opt_batch_size; + + return opt_pkt_count - pkt_cnt; +} + +static void complete_tx_only_all(void) +{ + bool pending; + int i; + + do { + pending = false; + for (i = 0; i < num_socks; i++) { + if (xsks[i]->outstanding_tx) { + complete_tx_only(xsks[i], opt_batch_size); + pending = !!xsks[i]->outstanding_tx; + } + } + } while (pending); } static void tx_only_all(void) { struct pollfd fds[MAX_SOCKS] = {}; u32 frame_nb[MAX_SOCKS] = {}; + int pkt_cnt = 0; int i, ret; for (i = 0; i < num_socks; i++) { @@ -656,7 +984,9 @@ static void tx_only_all(void) fds[0].events = POLLOUT; } - for (;;) { + while ((opt_pkt_count && pkt_cnt < opt_pkt_count) || !opt_pkt_count) { + int batch_size = get_batch_size(pkt_cnt); + if (opt_poll) { ret = poll(fds, num_socks, opt_timeout); if (ret <= 0) @@ -667,8 +997,16 @@ static void tx_only_all(void) } for (i = 0; i < num_socks; i++) - tx_only(xsks[i], frame_nb[i]); + tx_only(xsks[i], frame_nb[i], batch_size); + + pkt_cnt += batch_size; + + if (benchmark_done) + break; } + + if (opt_pkt_count) + complete_tx_only_all(); } static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) @@ -679,7 +1017,7 @@ static void l2fwd(struct xsk_socket_info *xsk, struct pollfd *fds) complete_tx_l2fwd(xsk, fds); - rcvd = xsk_ring_cons__peek(&xsk->rx, BATCH_SIZE, &idx_rx); + rcvd = xsk_ring_cons__peek(&xsk->rx, opt_batch_size, &idx_rx); if (!rcvd) { if (xsk_ring_prod__needs_wakeup(&xsk->umem->fq)) ret = poll(fds, num_socks, opt_timeout); @@ -736,6 +1074,9 @@ static void l2fwd_all(void) for (i = 0; i < num_socks; i++) l2fwd(xsks[i], fds); + + if (benchmark_done) + break; } } @@ -831,9 +1172,12 @@ int main(int argc, char **argv) for (i = 0; i < opt_num_xsks; i++) xsks[num_socks++] = xsk_configure_socket(umem, rx, tx); - if (opt_bench == BENCH_TXONLY) + if (opt_bench == BENCH_TXONLY) { + gen_eth_hdr_data(); + for (i = 0; i < NUM_FRAMES; i++) gen_eth_frame(umem, i * opt_xsk_frame_size); + } if (opt_num_xsks > 1 && opt_bench != BENCH_TXONLY) enter_xsks_into_map(obj); @@ -849,6 +1193,7 @@ int main(int argc, char **argv) exit_with_error(ret); prev_time = get_nsecs(); + start_time = prev_time; if (opt_bench == BENCH_RXDROP) rx_drop_all(); @@ -857,5 +1202,11 @@ int main(int argc, char **argv) else l2fwd_all(); + benchmark_done = true; + + pthread_join(pt, NULL); + + xdpsock_cleanup(); + return 0; } diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst new file mode 100644 index 000000000000..86a87da97d0b --- /dev/null +++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst @@ -0,0 +1,305 @@ +================ +bpftool-gen +================ +------------------------------------------------------------------------------- +tool for BPF code-generation +------------------------------------------------------------------------------- + +:Manual section: 8 + +SYNOPSIS +======== + + **bpftool** [*OPTIONS*] **gen** *COMMAND* + + *OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] } + + *COMMAND* := { **skeleton | **help** } + +GEN COMMANDS +============= + +| **bpftool** **gen skeleton** *FILE* +| **bpftool** **gen help** + +DESCRIPTION +=========== + **bpftool gen skeleton** *FILE* + Generate BPF skeleton C header file for a given *FILE*. + + BPF skeleton is an alternative interface to existing libbpf + APIs for working with BPF objects. Skeleton code is intended + to significantly shorten and simplify code to load and work + with BPF programs from userspace side. Generated code is + tailored to specific input BPF object *FILE*, reflecting its + structure by listing out available maps, program, variables, + etc. Skeleton eliminates the need to lookup mentioned + components by name. Instead, if skeleton instantiation + succeeds, they are populated in skeleton structure as valid + libbpf types (e.g., struct bpf_map pointer) and can be + passed to existing generic libbpf APIs. + + In addition to simple and reliable access to maps and + programs, skeleton provides a storage for BPF links (struct + bpf_link) for each BPF program within BPF object. When + requested, supported BPF programs will be automatically + attached and resulting BPF links stored for further use by + user in pre-allocated fields in skeleton struct. For BPF + programs that can't be automatically attached by libbpf, + user can attach them manually, but store resulting BPF link + in per-program link field. All such set up links will be + automatically destroyed on BPF skeleton destruction. This + eliminates the need for users to manage links manually and + rely on libbpf support to detach programs and free up + resources. + + Another facility provided by BPF skeleton is an interface to + global variables of all supported kinds: mutable, read-only, + as well as extern ones. This interface allows to pre-setup + initial values of variables before BPF object is loaded and + verified by kernel. For non-read-only variables, the same + interface can be used to fetch values of global variables on + userspace side, even if they are modified by BPF code. + + During skeleton generation, contents of source BPF object + *FILE* is embedded within generated code and is thus not + necessary to keep around. This ensures skeleton and BPF + object file are matching 1-to-1 and always stay in sync. + Generated code is dual-licensed under LGPL-2.1 and + BSD-2-Clause licenses. + + It is a design goal and guarantee that skeleton interfaces + are interoperable with generic libbpf APIs. User should + always be able to use skeleton API to create and load BPF + object, and later use libbpf APIs to keep working with + specific maps, programs, etc. + + As part of skeleton, few custom functions are generated. + Each of them is prefixed with object name, derived from + object file name. I.e., if BPF object file name is + **example.o**, BPF object name will be **example**. The + following custom functions are provided in such case: + + - **example__open** and **example__open_opts**. + These functions are used to instantiate skeleton. It + corresponds to libbpf's **bpf_object__open()** API. + **_opts** variants accepts extra **bpf_object_open_opts** + options. + + - **example__load**. + This function creates maps, loads and verifies BPF + programs, initializes global data maps. It corresponds to + libppf's **bpf_object__load** API. + + - **example__open_and_load** combines **example__open** and + **example__load** invocations in one commonly used + operation. + + - **example__attach** and **example__detach** + This pair of functions allow to attach and detach, + correspondingly, already loaded BPF object. Only BPF + programs of types supported by libbpf for auto-attachment + will be auto-attached and their corresponding BPF links + instantiated. For other BPF programs, user can manually + create a BPF link and assign it to corresponding fields in + skeleton struct. **example__detach** will detach both + links created automatically, as well as those populated by + user manually. + + - **example__destroy** + Detach and unload BPF programs, free up all the resources + used by skeleton and BPF object. + + If BPF object has global variables, corresponding structs + with memory layout corresponding to global data data section + layout will be created. Currently supported ones are: *.data*, + *.bss*, *.rodata*, and *.kconfig* structs/data sections. + These data sections/structs can be used to set up initial + values of variables, if set before **example__load**. + Afterwards, if target kernel supports memory-mapped BPF + arrays, same structs can be used to fetch and update + (non-read-only) data from userspace, with same simplicity + as for BPF side. + + **bpftool gen help** + Print short help message. + +OPTIONS +======= + -h, --help + Print short generic help message (similar to **bpftool help**). + + -V, --version + Print version number (similar to **bpftool version**). + + -j, --json + Generate JSON output. For commands that cannot produce JSON, + this option has no effect. + + -p, --pretty + Generate human-readable JSON output. Implies **-j**. + + -d, --debug + Print all logs available from libbpf, including debug-level + information. + +EXAMPLES +======== +**$ cat example.c** +:: + + #include <stdbool.h> + #include <linux/ptrace.h> + #include <linux/bpf.h> + #include "bpf_helpers.h" + + const volatile int param1 = 42; + bool global_flag = true; + struct { int x; } data = {}; + + struct { + __uint(type, BPF_MAP_TYPE_HASH); + __uint(max_entries, 128); + __type(key, int); + __type(value, long); + } my_map SEC(".maps"); + + SEC("raw_tp/sys_enter") + int handle_sys_enter(struct pt_regs *ctx) + { + static long my_static_var; + if (global_flag) + my_static_var++; + else + data.x += param1; + return 0; + } + + SEC("raw_tp/sys_exit") + int handle_sys_exit(struct pt_regs *ctx) + { + int zero = 0; + bpf_map_lookup_elem(&my_map, &zero); + return 0; + } + +This is example BPF application with two BPF programs and a mix of BPF maps +and global variables. + +**$ bpftool gen skeleton example.o** +:: + + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + + /* THIS FILE IS AUTOGENERATED! */ + #ifndef __EXAMPLE_SKEL_H__ + #define __EXAMPLE_SKEL_H__ + + #include <stdlib.h> + #include <libbpf.h> + + struct example { + struct bpf_object_skeleton *skeleton; + struct bpf_object *obj; + struct { + struct bpf_map *rodata; + struct bpf_map *data; + struct bpf_map *bss; + struct bpf_map *my_map; + } maps; + struct { + struct bpf_program *handle_sys_enter; + struct bpf_program *handle_sys_exit; + } progs; + struct { + struct bpf_link *handle_sys_enter; + struct bpf_link *handle_sys_exit; + } links; + struct example__bss { + struct { + int x; + } data; + } *bss; + struct example__data { + _Bool global_flag; + long int handle_sys_enter_my_static_var; + } *data; + struct example__rodata { + int param1; + } *rodata; + }; + + static void example__destroy(struct example *obj); + static inline struct example *example__open_opts( + const struct bpf_object_open_opts *opts); + static inline struct example *example__open(); + static inline int example__load(struct example *obj); + static inline struct example *example__open_and_load(); + static inline int example__attach(struct example *obj); + static inline void example__detach(struct example *obj); + + #endif /* __EXAMPLE_SKEL_H__ */ + +**$ cat example_user.c** +:: + + #include "example.skel.h" + + int main() + { + struct example *skel; + int err = 0; + + skel = example__open(); + if (!skel) + goto cleanup; + + skel->rodata->param1 = 128; + + err = example__load(skel); + if (err) + goto cleanup; + + err = example__attach(skel); + if (err) + goto cleanup; + + /* all libbpf APIs are usable */ + printf("my_map name: %s\n", bpf_map__name(skel->maps.my_map)); + printf("sys_enter prog FD: %d\n", + bpf_program__fd(skel->progs.handle_sys_enter)); + + /* detach and re-attach sys_exit program */ + bpf_link__destroy(skel->links.handle_sys_exit); + skel->links.handle_sys_exit = + bpf_program__attach(skel->progs.handle_sys_exit); + + printf("my_static_var: %ld\n", + skel->bss->handle_sys_enter_my_static_var); + + cleanup: + example__destroy(skel); + return err; + } + +**# ./example_user** +:: + + my_map name: my_map + sys_enter prog FD: 8 + my_static_var: 7 + +This is a stripped-out version of skeleton generated for above example code. + +SEE ALSO +======== + **bpf**\ (2), + **bpf-helpers**\ (7), + **bpftool**\ (8), + **bpftool-map**\ (8), + **bpftool-prog**\ (8), + **bpftool-cgroup**\ (8), + **bpftool-feature**\ (8), + **bpftool-net**\ (8), + **bpftool-perf**\ (8), + **bpftool-btf**\ (8) diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst index 1c0f7146aab0..cdeae8ae90ba 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-map.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst @@ -39,9 +39,9 @@ MAP COMMANDS | **bpftool** **map freeze** *MAP* | **bpftool** **map help** | -| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } +| *MAP* := { **id** *MAP_ID* | **pinned** *FILE* | **name** *MAP_NAME* } | *DATA* := { [**hex**] *BYTES* } -| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } | *VALUE* := { *DATA* | *MAP* | *PROG* } | *UPDATE_FLAGS* := { **any** | **exist** | **noexist** } | *TYPE* := { **hash** | **array** | **prog_array** | **perf_event_array** | **percpu_hash** @@ -55,8 +55,9 @@ DESCRIPTION =========== **bpftool map { show | list }** [*MAP*] Show information about loaded maps. If *MAP* is specified - show information only about given map, otherwise list all - maps currently loaded on the system. + show information only about given maps, otherwise list all + maps currently loaded on the system. In case of **name**, + *MAP* may match several maps which will all be shown. Output will start with map ID followed by map type and zero or more named attributes (depending on kernel version). @@ -66,7 +67,8 @@ DESCRIPTION as *FILE*. **bpftool map dump** *MAP* - Dump all entries in a given *MAP*. + Dump all entries in a given *MAP*. In case of **name**, + *MAP* may match several maps which will all be dumped. **bpftool map update** *MAP* [**key** *DATA*] [**value** *VALUE*] [*UPDATE_FLAGS*] Update map entry for a given *KEY*. diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 7a374b3c851d..64ddf8a4c518 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -33,7 +33,7 @@ PROG COMMANDS | **bpftool** **prog help** | | *MAP* := { **id** *MAP_ID* | **pinned** *FILE* } -| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* } +| *PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* | **name** *PROG_NAME* } | *TYPE* := { | **socket** | **kprobe** | **kretprobe** | **classifier** | **action** | | **tracepoint** | **raw_tracepoint** | **xdp** | **perf_event** | **cgroup/skb** | @@ -53,8 +53,10 @@ DESCRIPTION =========== **bpftool prog { show | list }** [*PROG*] Show information about loaded programs. If *PROG* is - specified show information only about given program, otherwise - list all programs currently loaded on the system. + specified show information only about given programs, + otherwise list all programs currently loaded on the system. + In case of **tag** or **name**, *PROG* may match several + programs which will all be shown. Output will start with program ID followed by program type and zero or more named attributes (depending on kernel version). @@ -68,11 +70,15 @@ DESCRIPTION performed via the **kernel.bpf_stats_enabled** sysctl knob. **bpftool prog dump xlated** *PROG* [{ **file** *FILE* | **opcodes** | **visual** | **linum** }] - Dump eBPF instructions of the program from the kernel. By + Dump eBPF instructions of the programs from the kernel. By default, eBPF will be disassembled and printed to standard output in human-readable format. In this case, **opcodes** controls if raw opcodes should be printed as well. + In case of **tag** or **name**, *PROG* may match several + programs which will all be dumped. However, if **file** or + **visual** is specified, *PROG* must match a single program. + If **file** is specified, the binary image will instead be written to *FILE*. @@ -80,15 +86,17 @@ DESCRIPTION built instead, and eBPF instructions will be presented with CFG in DOT format, on standard output. - If the prog has line_info available, the source line will + If the programs have line_info available, the source line will be displayed by default. If **linum** is specified, the filename, line number and line column will also be displayed on top of the source line. **bpftool prog dump jited** *PROG* [{ **file** *FILE* | **opcodes** | **linum** }] Dump jited image (host machine code) of the program. + If *FILE* is specified image will be written to a file, otherwise it will be disassembled and printed to stdout. + *PROG* must match a single program when **file** is specified. **opcodes** controls if raw opcodes will be printed. diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst index 6a9c52ef84a9..34239fda69ed 100644 --- a/tools/bpf/bpftool/Documentation/bpftool.rst +++ b/tools/bpf/bpftool/Documentation/bpftool.rst @@ -81,4 +81,5 @@ SEE ALSO **bpftool-feature**\ (8), **bpftool-net**\ (8), **bpftool-perf**\ (8), - **bpftool-btf**\ (8) + **bpftool-btf**\ (8), + **bpftool-gen**\ (8), diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool index 70493a6da206..754d8395e451 100644 --- a/tools/bpf/bpftool/bash-completion/bpftool +++ b/tools/bpf/bpftool/bash-completion/bpftool @@ -59,6 +59,21 @@ _bpftool_get_map_ids_for_type() command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) ) } +_bpftool_get_map_names() +{ + COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ + command sed -n 's/.*"name": \(.*\),$/\1/p' )" -- "$cur" ) ) +} + +# Takes map type and adds matching map names to the list of suggestions. +_bpftool_get_map_names_for_type() +{ + local type="$1" + COMPREPLY+=( $( compgen -W "$( bpftool -jp map 2>&1 | \ + command grep -C2 "$type" | \ + command sed -n 's/.*"name": \(.*\),$/\1/p' )" -- "$cur" ) ) +} + _bpftool_get_prog_ids() { COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ @@ -71,6 +86,12 @@ _bpftool_get_prog_tags() command sed -n 's/.*"tag": "\(.*\)",$/\1/p' )" -- "$cur" ) ) } +_bpftool_get_prog_names() +{ + COMPREPLY+=( $( compgen -W "$( bpftool -jp prog 2>&1 | \ + command sed -n 's/.*"name": "\(.*\)",$/\1/p' )" -- "$cur" ) ) +} + _bpftool_get_btf_ids() { COMPREPLY+=( $( compgen -W "$( bpftool -jp btf 2>&1 | \ @@ -180,6 +201,52 @@ _bpftool_map_update_get_id() esac } +_bpftool_map_update_get_name() +{ + local command="$1" + + # Is it the map to update, or a map to insert into the map to update? + # Search for "value" keyword. + local idx value + for (( idx=7; idx < ${#words[@]}-1; idx++ )); do + if [[ ${words[idx]} == "value" ]]; then + value=1 + break + fi + done + if [[ $value -eq 0 ]]; then + case "$command" in + push) + _bpftool_get_map_names_for_type stack + ;; + enqueue) + _bpftool_get_map_names_for_type queue + ;; + *) + _bpftool_get_map_names + ;; + esac + return 0 + fi + + # Name to complete is for a value. It can be either prog name or map name. This + # depends on the type of the map to update. + local type=$(_bpftool_map_guess_map_type) + case $type in + array_of_maps|hash_of_maps) + _bpftool_get_map_names + return 0 + ;; + prog_array) + _bpftool_get_prog_names + return 0 + ;; + *) + return 0 + ;; + esac +} + _bpftool() { local cur prev words objword @@ -251,7 +318,8 @@ _bpftool() # Completion depends on object and command in use case $object in prog) - # Complete id, only for subcommands that use prog (but no map) ids + # Complete id and name, only for subcommands that use prog (but no + # map) ids/names. case $command in show|list|dump|pin) case $prev in @@ -259,12 +327,16 @@ _bpftool() _bpftool_get_prog_ids return 0 ;; + name) + _bpftool_get_prog_names + return 0 + ;; esac ;; esac - local PROG_TYPE='id pinned tag' - local MAP_TYPE='id pinned' + local PROG_TYPE='id pinned tag name' + local MAP_TYPE='id pinned name' case $command in show|list) [[ $prev != "$command" ]] && return 0 @@ -315,6 +387,9 @@ _bpftool() id) _bpftool_get_prog_ids ;; + name) + _bpftool_get_map_names + ;; pinned) _filedir ;; @@ -335,6 +410,9 @@ _bpftool() id) _bpftool_get_map_ids ;; + name) + _bpftool_get_map_names + ;; pinned) _filedir ;; @@ -399,6 +477,10 @@ _bpftool() _bpftool_get_map_ids return 0 ;; + name) + _bpftool_get_map_names + return 0 + ;; pinned|pinmaps) _filedir return 0 @@ -447,7 +529,7 @@ _bpftool() esac ;; map) - local MAP_TYPE='id pinned' + local MAP_TYPE='id pinned name' case $command in show|list|dump|peek|pop|dequeue|freeze) case $prev in @@ -473,6 +555,24 @@ _bpftool() esac return 0 ;; + name) + case "$command" in + peek) + _bpftool_get_map_names_for_type stack + _bpftool_get_map_names_for_type queue + ;; + pop) + _bpftool_get_map_names_for_type stack + ;; + dequeue) + _bpftool_get_map_names_for_type queue + ;; + *) + _bpftool_get_map_names + ;; + esac + return 0 + ;; *) return 0 ;; @@ -520,6 +620,10 @@ _bpftool() _bpftool_get_map_ids return 0 ;; + name) + _bpftool_get_map_names + return 0 + ;; key) COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) ;; @@ -545,6 +649,10 @@ _bpftool() _bpftool_map_update_get_id $command return 0 ;; + name) + _bpftool_map_update_get_name $command + return 0 + ;; key) COMPREPLY+=( $( compgen -W 'hex' -- "$cur" ) ) ;; @@ -553,13 +661,13 @@ _bpftool() # map, depending on the type of the map to update. case "$(_bpftool_map_guess_map_type)" in array_of_maps|hash_of_maps) - local MAP_TYPE='id pinned' + local MAP_TYPE='id pinned name' COMPREPLY+=( $( compgen -W "$MAP_TYPE" \ -- "$cur" ) ) return 0 ;; prog_array) - local PROG_TYPE='id pinned tag' + local PROG_TYPE='id pinned tag name' COMPREPLY+=( $( compgen -W "$PROG_TYPE" \ -- "$cur" ) ) return 0 @@ -621,6 +729,10 @@ _bpftool() _bpftool_get_map_ids_for_type perf_event_array return 0 ;; + name) + _bpftool_get_map_names_for_type perf_event_array + return 0 + ;; cpu) return 0 ;; @@ -644,8 +756,8 @@ _bpftool() esac ;; btf) - local PROG_TYPE='id pinned tag' - local MAP_TYPE='id pinned' + local PROG_TYPE='id pinned tag name' + local MAP_TYPE='id pinned name' case $command in dump) case $prev in @@ -676,6 +788,17 @@ _bpftool() esac return 0 ;; + name) + case $pprev in + prog) + _bpftool_get_prog_names + ;; + map) + _bpftool_get_map_names + ;; + esac + return 0 + ;; format) COMPREPLY=( $( compgen -W "c raw" -- "$cur" ) ) ;; @@ -716,6 +839,17 @@ _bpftool() ;; esac ;; + gen) + case $command in + skeleton) + _filedir + ;; + *) + [[ $prev == $object ]] && \ + COMPREPLY=( $( compgen -W 'skeleton help' -- "$cur" ) ) + ;; + esac + ;; cgroup) case $command in show|list|tree) @@ -735,7 +869,7 @@ _bpftool() connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \ getsockopt setsockopt' local ATTACH_FLAGS='multi override' - local PROG_TYPE='id pinned tag' + local PROG_TYPE='id pinned tag name' case $prev in $command) _filedir @@ -760,7 +894,7 @@ _bpftool() elif [[ "$command" == "attach" ]]; then # We have an attach type on the command line, # but it is not the previous word, or - # "id|pinned|tag" (we already checked for + # "id|pinned|tag|name" (we already checked for # that). This should only leave the case when # we need attach flags for "attach" commamnd. _bpftool_one_of_list "$ATTACH_FLAGS" @@ -786,7 +920,7 @@ _bpftool() esac ;; net) - local PROG_TYPE='id pinned tag' + local PROG_TYPE='id pinned tag name' local ATTACH_TYPES='xdp xdpgeneric xdpdrv xdpoffload' case $command in show|list) diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c index 1ef45e55039e..2f017caa678d 100644 --- a/tools/bpf/bpftool/cgroup.c +++ b/tools/bpf/bpftool/cgroup.c @@ -117,6 +117,25 @@ static int count_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type) return prog_cnt; } +static int cgroup_has_attached_progs(int cgroup_fd) +{ + enum bpf_attach_type type; + bool no_prog = true; + + for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { + int count = count_attached_bpf_progs(cgroup_fd, type); + + if (count < 0 && errno != EINVAL) + return -1; + + if (count > 0) { + no_prog = false; + break; + } + } + + return no_prog ? 0 : 1; +} static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, int level) { @@ -161,6 +180,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type, static int do_show(int argc, char **argv) { enum bpf_attach_type type; + int has_attached_progs; const char *path; int cgroup_fd; int ret = -1; @@ -192,6 +212,16 @@ static int do_show(int argc, char **argv) goto exit; } + has_attached_progs = cgroup_has_attached_progs(cgroup_fd); + if (has_attached_progs < 0) { + p_err("can't query bpf programs attached to %s: %s", + path, strerror(errno)); + goto exit_cgroup; + } else if (!has_attached_progs) { + ret = 0; + goto exit_cgroup; + } + if (json_output) jsonw_start_array(json_wtr); else @@ -212,6 +242,7 @@ static int do_show(int argc, char **argv) if (json_output) jsonw_end_array(json_wtr); +exit_cgroup: close(cgroup_fd); exit: return ret; @@ -228,7 +259,7 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, int typeflag, struct FTW *ftw) { enum bpf_attach_type type; - bool skip = true; + int has_attached_progs; int cgroup_fd; if (typeflag != FTW_D) @@ -240,22 +271,13 @@ static int do_show_tree_fn(const char *fpath, const struct stat *sb, return SHOW_TREE_FN_ERR; } - for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) { - int count = count_attached_bpf_progs(cgroup_fd, type); - - if (count < 0 && errno != EINVAL) { - p_err("can't query bpf programs attached to %s: %s", - fpath, strerror(errno)); - close(cgroup_fd); - return SHOW_TREE_FN_ERR; - } - if (count > 0) { - skip = false; - break; - } - } - - if (skip) { + has_attached_progs = cgroup_has_attached_progs(cgroup_fd); + if (has_attached_progs < 0) { + p_err("can't query bpf programs attached to %s: %s", + fpath, strerror(errno)); + close(cgroup_fd); + return SHOW_TREE_FN_ERR; + } else if (!has_attached_progs) { close(cgroup_fd); return 0; } diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c new file mode 100644 index 000000000000..7ce09a9a6999 --- /dev/null +++ b/tools/bpf/bpftool/gen.c @@ -0,0 +1,609 @@ +// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) +/* Copyright (C) 2019 Facebook */ + +#ifndef _GNU_SOURCE +#define _GNU_SOURCE +#endif +#include <ctype.h> +#include <errno.h> +#include <fcntl.h> +#include <linux/err.h> +#include <stdbool.h> +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <bpf.h> +#include <libbpf.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <unistd.h> + +#include "btf.h" +#include "libbpf_internal.h" +#include "json_writer.h" +#include "main.h" + + +#define MAX_OBJ_NAME_LEN 64 + +static void sanitize_identifier(char *name) +{ + int i; + + for (i = 0; name[i]; i++) + if (!isalnum(name[i]) && name[i] != '_') + name[i] = '_'; +} + +static bool str_has_suffix(const char *str, const char *suffix) +{ + size_t i, n1 = strlen(str), n2 = strlen(suffix); + + if (n1 < n2) + return false; + + for (i = 0; i < n2; i++) { + if (str[n1 - i - 1] != suffix[n2 - i - 1]) + return false; + } + + return true; +} + +static void get_obj_name(char *name, const char *file) +{ + /* Using basename() GNU version which doesn't modify arg. */ + strncpy(name, basename(file), MAX_OBJ_NAME_LEN - 1); + name[MAX_OBJ_NAME_LEN - 1] = '\0'; + if (str_has_suffix(name, ".o")) + name[strlen(name) - 2] = '\0'; + sanitize_identifier(name); +} + +static void get_header_guard(char *guard, const char *obj_name) +{ + int i; + + sprintf(guard, "__%s_SKEL_H__", obj_name); + for (i = 0; guard[i]; i++) + guard[i] = toupper(guard[i]); +} + +static const char *get_map_ident(const struct bpf_map *map) +{ + const char *name = bpf_map__name(map); + + if (!bpf_map__is_internal(map)) + return name; + + if (str_has_suffix(name, ".data")) + return "data"; + else if (str_has_suffix(name, ".rodata")) + return "rodata"; + else if (str_has_suffix(name, ".bss")) + return "bss"; + else if (str_has_suffix(name, ".kconfig")) + return "kconfig"; + else + return NULL; +} + +static void codegen_btf_dump_printf(void *ct, const char *fmt, va_list args) +{ + vprintf(fmt, args); +} + +static int codegen_datasec_def(struct bpf_object *obj, + struct btf *btf, + struct btf_dump *d, + const struct btf_type *sec, + const char *obj_name) +{ + const char *sec_name = btf__name_by_offset(btf, sec->name_off); + const struct btf_var_secinfo *sec_var = btf_var_secinfos(sec); + int i, err, off = 0, pad_cnt = 0, vlen = btf_vlen(sec); + const char *sec_ident; + char var_ident[256]; + + if (strcmp(sec_name, ".data") == 0) + sec_ident = "data"; + else if (strcmp(sec_name, ".bss") == 0) + sec_ident = "bss"; + else if (strcmp(sec_name, ".rodata") == 0) + sec_ident = "rodata"; + else if (strcmp(sec_name, ".kconfig") == 0) + sec_ident = "kconfig"; + else + return 0; + + printf(" struct %s__%s {\n", obj_name, sec_ident); + for (i = 0; i < vlen; i++, sec_var++) { + const struct btf_type *var = btf__type_by_id(btf, sec_var->type); + const char *var_name = btf__name_by_offset(btf, var->name_off); + DECLARE_LIBBPF_OPTS(btf_dump_emit_type_decl_opts, opts, + .field_name = var_ident, + .indent_level = 2, + ); + int need_off = sec_var->offset, align_off, align; + __u32 var_type_id = var->type; + const struct btf_type *t; + + t = btf__type_by_id(btf, var_type_id); + while (btf_is_mod(t)) { + var_type_id = t->type; + t = btf__type_by_id(btf, var_type_id); + } + + if (off > need_off) { + p_err("Something is wrong for %s's variable #%d: need offset %d, already at %d.\n", + sec_name, i, need_off, off); + return -EINVAL; + } + + align = btf__align_of(btf, var->type); + if (align <= 0) { + p_err("Failed to determine alignment of variable '%s': %d", + var_name, align); + return -EINVAL; + } + + align_off = (off + align - 1) / align * align; + if (align_off != need_off) { + printf("\t\tchar __pad%d[%d];\n", + pad_cnt, need_off - off); + pad_cnt++; + } + + /* sanitize variable name, e.g., for static vars inside + * a function, it's name is '<function name>.<variable name>', + * which we'll turn into a '<function name>_<variable name>' + */ + var_ident[0] = '\0'; + strncat(var_ident, var_name, sizeof(var_ident) - 1); + sanitize_identifier(var_ident); + + printf("\t\t"); + err = btf_dump__emit_type_decl(d, var_type_id, &opts); + if (err) + return err; + printf(";\n"); + + off = sec_var->offset + sec_var->size; + } + printf(" } *%s;\n", sec_ident); + return 0; +} + +static int codegen_datasecs(struct bpf_object *obj, const char *obj_name) +{ + struct btf *btf = bpf_object__btf(obj); + int n = btf__get_nr_types(btf); + struct btf_dump *d; + int i, err = 0; + + d = btf_dump__new(btf, NULL, NULL, codegen_btf_dump_printf); + if (IS_ERR(d)) + return PTR_ERR(d); + + for (i = 1; i <= n; i++) { + const struct btf_type *t = btf__type_by_id(btf, i); + + if (!btf_is_datasec(t)) + continue; + + err = codegen_datasec_def(obj, btf, d, t, obj_name); + if (err) + goto out; + } +out: + btf_dump__free(d); + return err; +} + +static int codegen(const char *template, ...) +{ + const char *src, *end; + int skip_tabs = 0, n; + char *s, *dst; + va_list args; + char c; + + n = strlen(template); + s = malloc(n + 1); + if (!s) + return -ENOMEM; + src = template; + dst = s; + + /* find out "baseline" indentation to skip */ + while ((c = *src++)) { + if (c == '\t') { + skip_tabs++; + } else if (c == '\n') { + break; + } else { + p_err("unrecognized character at pos %td in template '%s'", + src - template - 1, template); + return -EINVAL; + } + } + + while (*src) { + /* skip baseline indentation tabs */ + for (n = skip_tabs; n > 0; n--, src++) { + if (*src != '\t') { + p_err("not enough tabs at pos %td in template '%s'", + src - template - 1, template); + return -EINVAL; + } + } + /* trim trailing whitespace */ + end = strchrnul(src, '\n'); + for (n = end - src; n > 0 && isspace(src[n - 1]); n--) + ; + memcpy(dst, src, n); + dst += n; + if (*end) + *dst++ = '\n'; + src = *end ? end + 1 : end; + } + *dst++ = '\0'; + + /* print out using adjusted template */ + va_start(args, template); + n = vprintf(s, args); + va_end(args); + + free(s); + return n; +} + +static int do_skeleton(int argc, char **argv) +{ + char header_guard[MAX_OBJ_NAME_LEN + sizeof("__SKEL_H__")]; + size_t i, map_cnt = 0, prog_cnt = 0, file_sz, mmap_sz; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + char obj_name[MAX_OBJ_NAME_LEN], *obj_data; + struct bpf_object *obj = NULL; + const char *file, *ident; + struct bpf_program *prog; + int fd, len, err = -1; + struct bpf_map *map; + struct btf *btf; + struct stat st; + + if (!REQ_ARGS(1)) { + usage(); + return -1; + } + file = GET_ARG(); + + if (argc) { + p_err("extra unknown arguments"); + return -1; + } + + if (stat(file, &st)) { + p_err("failed to stat() %s: %s", file, strerror(errno)); + return -1; + } + file_sz = st.st_size; + mmap_sz = roundup(file_sz, sysconf(_SC_PAGE_SIZE)); + fd = open(file, O_RDONLY); + if (fd < 0) { + p_err("failed to open() %s: %s", file, strerror(errno)); + return -1; + } + obj_data = mmap(NULL, mmap_sz, PROT_READ, MAP_PRIVATE, fd, 0); + if (obj_data == MAP_FAILED) { + obj_data = NULL; + p_err("failed to mmap() %s: %s", file, strerror(errno)); + goto out; + } + get_obj_name(obj_name, file); + opts.object_name = obj_name; + obj = bpf_object__open_mem(obj_data, file_sz, &opts); + if (IS_ERR(obj)) { + obj = NULL; + p_err("failed to open BPF object file: %ld", PTR_ERR(obj)); + goto out; + } + + bpf_object__for_each_map(map, obj) { + ident = get_map_ident(map); + if (!ident) { + p_err("ignoring unrecognized internal map '%s'...", + bpf_map__name(map)); + continue; + } + map_cnt++; + } + bpf_object__for_each_program(prog, obj) { + prog_cnt++; + } + + get_header_guard(header_guard, obj_name); + codegen("\ + \n\ + /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ \n\ + \n\ + /* THIS FILE IS AUTOGENERATED! */ \n\ + #ifndef %2$s \n\ + #define %2$s \n\ + \n\ + #include <stdlib.h> \n\ + #include <libbpf.h> \n\ + \n\ + struct %1$s { \n\ + struct bpf_object_skeleton *skeleton; \n\ + struct bpf_object *obj; \n\ + ", + obj_name, header_guard + ); + + if (map_cnt) { + printf("\tstruct {\n"); + bpf_object__for_each_map(map, obj) { + ident = get_map_ident(map); + if (!ident) + continue; + printf("\t\tstruct bpf_map *%s;\n", ident); + } + printf("\t} maps;\n"); + } + + if (prog_cnt) { + printf("\tstruct {\n"); + bpf_object__for_each_program(prog, obj) { + printf("\t\tstruct bpf_program *%s;\n", + bpf_program__name(prog)); + } + printf("\t} progs;\n"); + printf("\tstruct {\n"); + bpf_object__for_each_program(prog, obj) { + printf("\t\tstruct bpf_link *%s;\n", + bpf_program__name(prog)); + } + printf("\t} links;\n"); + } + + btf = bpf_object__btf(obj); + if (btf) { + err = codegen_datasecs(obj, obj_name); + if (err) + goto out; + } + + codegen("\ + \n\ + }; \n\ + \n\ + static void \n\ + %1$s__destroy(struct %1$s *obj) \n\ + { \n\ + if (!obj) \n\ + return; \n\ + if (obj->skeleton) \n\ + bpf_object__destroy_skeleton(obj->skeleton);\n\ + free(obj); \n\ + } \n\ + \n\ + static inline int \n\ + %1$s__create_skeleton(struct %1$s *obj); \n\ + \n\ + static inline struct %1$s * \n\ + %1$s__open_opts(const struct bpf_object_open_opts *opts) \n\ + { \n\ + struct %1$s *obj; \n\ + \n\ + obj = (typeof(obj))calloc(1, sizeof(*obj)); \n\ + if (!obj) \n\ + return NULL; \n\ + if (%1$s__create_skeleton(obj)) \n\ + goto err; \n\ + if (bpf_object__open_skeleton(obj->skeleton, opts)) \n\ + goto err; \n\ + \n\ + return obj; \n\ + err: \n\ + %1$s__destroy(obj); \n\ + return NULL; \n\ + } \n\ + \n\ + static inline struct %1$s * \n\ + %1$s__open(void) \n\ + { \n\ + return %1$s__open_opts(NULL); \n\ + } \n\ + \n\ + static inline int \n\ + %1$s__load(struct %1$s *obj) \n\ + { \n\ + return bpf_object__load_skeleton(obj->skeleton); \n\ + } \n\ + \n\ + static inline struct %1$s * \n\ + %1$s__open_and_load(void) \n\ + { \n\ + struct %1$s *obj; \n\ + \n\ + obj = %1$s__open(); \n\ + if (!obj) \n\ + return NULL; \n\ + if (%1$s__load(obj)) { \n\ + %1$s__destroy(obj); \n\ + return NULL; \n\ + } \n\ + return obj; \n\ + } \n\ + \n\ + static inline int \n\ + %1$s__attach(struct %1$s *obj) \n\ + { \n\ + return bpf_object__attach_skeleton(obj->skeleton); \n\ + } \n\ + \n\ + static inline void \n\ + %1$s__detach(struct %1$s *obj) \n\ + { \n\ + return bpf_object__detach_skeleton(obj->skeleton); \n\ + } \n\ + ", + obj_name + ); + + codegen("\ + \n\ + \n\ + static inline int \n\ + %1$s__create_skeleton(struct %1$s *obj) \n\ + { \n\ + struct bpf_object_skeleton *s; \n\ + \n\ + s = (typeof(s))calloc(1, sizeof(*s)); \n\ + if (!s) \n\ + return -1; \n\ + obj->skeleton = s; \n\ + \n\ + s->sz = sizeof(*s); \n\ + s->name = \"%1$s\"; \n\ + s->obj = &obj->obj; \n\ + ", + obj_name + ); + if (map_cnt) { + codegen("\ + \n\ + \n\ + /* maps */ \n\ + s->map_cnt = %zu; \n\ + s->map_skel_sz = sizeof(*s->maps); \n\ + s->maps = (typeof(s->maps))calloc(s->map_cnt, s->map_skel_sz);\n\ + if (!s->maps) \n\ + goto err; \n\ + ", + map_cnt + ); + i = 0; + bpf_object__for_each_map(map, obj) { + ident = get_map_ident(map); + + if (!ident) + continue; + + codegen("\ + \n\ + \n\ + s->maps[%zu].name = \"%s\"; \n\ + s->maps[%zu].map = &obj->maps.%s; \n\ + ", + i, bpf_map__name(map), i, ident); + /* memory-mapped internal maps */ + if (bpf_map__is_internal(map) && + (bpf_map__def(map)->map_flags & BPF_F_MMAPABLE)) { + printf("\ts->maps[%zu].mmaped = (void **)&obj->%s;\n", + i, ident); + } + i++; + } + } + if (prog_cnt) { + codegen("\ + \n\ + \n\ + /* programs */ \n\ + s->prog_cnt = %zu; \n\ + s->prog_skel_sz = sizeof(*s->progs); \n\ + s->progs = (typeof(s->progs))calloc(s->prog_cnt, s->prog_skel_sz);\n\ + if (!s->progs) \n\ + goto err; \n\ + ", + prog_cnt + ); + i = 0; + bpf_object__for_each_program(prog, obj) { + codegen("\ + \n\ + \n\ + s->progs[%1$zu].name = \"%2$s\"; \n\ + s->progs[%1$zu].prog = &obj->progs.%2$s;\n\ + s->progs[%1$zu].link = &obj->links.%2$s;\n\ + ", + i, bpf_program__name(prog)); + i++; + } + } + codegen("\ + \n\ + \n\ + s->data_sz = %d; \n\ + s->data = (void *)\"\\ \n\ + ", + file_sz); + + /* embed contents of BPF object file */ + for (i = 0, len = 0; i < file_sz; i++) { + int w = obj_data[i] ? 4 : 2; + + len += w; + if (len > 78) { + printf("\\\n"); + len = w; + } + if (!obj_data[i]) + printf("\\0"); + else + printf("\\x%02x", (unsigned char)obj_data[i]); + } + + codegen("\ + \n\ + \"; \n\ + \n\ + return 0; \n\ + err: \n\ + bpf_object__destroy_skeleton(s); \n\ + return -1; \n\ + } \n\ + \n\ + #endif /* %s */ \n\ + ", + header_guard); + err = 0; +out: + bpf_object__close(obj); + if (obj_data) + munmap(obj_data, mmap_sz); + close(fd); + return err; +} + +static int do_help(int argc, char **argv) +{ + if (json_output) { + jsonw_null(json_wtr); + return 0; + } + + fprintf(stderr, + "Usage: %1$s gen skeleton FILE\n" + " %1$s gen help\n" + "\n" + " " HELP_SPEC_OPTIONS "\n" + "", + bin_name); + + return 0; +} + +static const struct cmd cmds[] = { + { "skeleton", do_skeleton }, + { "help", do_help }, + { 0 } +}; + +int do_gen(int argc, char **argv) +{ + return cmd_select(cmds, argc, argv, do_help); +} diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c index 4764581ff9ea..1fe91c558508 100644 --- a/tools/bpf/bpftool/main.c +++ b/tools/bpf/bpftool/main.c @@ -58,7 +58,7 @@ static int do_help(int argc, char **argv) " %s batch file FILE\n" " %s version\n" "\n" - " OBJECT := { prog | map | cgroup | perf | net | feature | btf }\n" + " OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen }\n" " " HELP_SPEC_OPTIONS "\n" "", bin_name, bin_name, bin_name); @@ -227,6 +227,7 @@ static const struct cmd cmds[] = { { "net", do_net }, { "feature", do_feature }, { "btf", do_btf }, + { "gen", do_gen }, { "version", do_version }, { 0 } }; diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h index 2899095f8254..4e75b58d3989 100644 --- a/tools/bpf/bpftool/main.h +++ b/tools/bpf/bpftool/main.h @@ -42,12 +42,12 @@ #define BPF_TAG_FMT "%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx%02hhx" #define HELP_SPEC_PROGRAM \ - "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG }" + "PROG := { id PROG_ID | pinned FILE | tag PROG_TAG | name PROG_NAME }" #define HELP_SPEC_OPTIONS \ "OPTIONS := { {-j|--json} [{-p|--pretty}] | {-f|--bpffs} |\n" \ "\t {-m|--mapcompat} | {-n|--nomount} }" #define HELP_SPEC_MAP \ - "MAP := { id MAP_ID | pinned FILE }" + "MAP := { id MAP_ID | pinned FILE | name MAP_NAME }" static const char * const prog_type_name[] = { [BPF_PROG_TYPE_UNSPEC] = "unspec", @@ -155,6 +155,7 @@ int do_net(int argc, char **arg); int do_tracelog(int argc, char **arg); int do_feature(int argc, char **argv); int do_btf(int argc, char **argv); +int do_gen(int argc, char **argv); int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what); int prog_parse_fd(int *argc, char ***argv); diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c index de61d73b9030..c01f76fa6876 100644 --- a/tools/bpf/bpftool/map.c +++ b/tools/bpf/bpftool/map.c @@ -91,10 +91,66 @@ static void *alloc_value(struct bpf_map_info *info) return malloc(info->value_size); } -int map_parse_fd(int *argc, char ***argv) +static int map_fd_by_name(char *name, int **fds) { - int fd; + unsigned int id = 0; + int fd, nb_fds = 0; + void *tmp; + int err; + + while (true) { + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + + err = bpf_map_get_next_id(id, &id); + if (err) { + if (errno != ENOENT) { + p_err("%s", strerror(errno)); + goto err_close_fds; + } + return nb_fds; + } + + fd = bpf_map_get_fd_by_id(id); + if (fd < 0) { + p_err("can't get map by id (%u): %s", + id, strerror(errno)); + goto err_close_fds; + } + + err = bpf_obj_get_info_by_fd(fd, &info, &len); + if (err) { + p_err("can't get map info (%u): %s", + id, strerror(errno)); + goto err_close_fd; + } + + if (strncmp(name, info.name, BPF_OBJ_NAME_LEN)) { + close(fd); + continue; + } + + if (nb_fds > 0) { + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); + if (!tmp) { + p_err("failed to realloc"); + goto err_close_fd; + } + *fds = tmp; + } + (*fds)[nb_fds++] = fd; + } + +err_close_fd: + close(fd); +err_close_fds: + while (--nb_fds >= 0) + close((*fds)[nb_fds]); + return -1; +} +static int map_parse_fds(int *argc, char ***argv, int **fds) +{ if (is_prefix(**argv, "id")) { unsigned int id; char *endptr; @@ -108,10 +164,25 @@ int map_parse_fd(int *argc, char ***argv) } NEXT_ARGP(); - fd = bpf_map_get_fd_by_id(id); - if (fd < 0) + (*fds)[0] = bpf_map_get_fd_by_id(id); + if ((*fds)[0] < 0) { p_err("get map by id (%u): %s", id, strerror(errno)); - return fd; + return -1; + } + return 1; + } else if (is_prefix(**argv, "name")) { + char *name; + + NEXT_ARGP(); + + name = **argv; + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + p_err("can't parse name"); + return -1; + } + NEXT_ARGP(); + + return map_fd_by_name(name, fds); } else if (is_prefix(**argv, "pinned")) { char *path; @@ -120,13 +191,43 @@ int map_parse_fd(int *argc, char ***argv) path = **argv; NEXT_ARGP(); - return open_obj_pinned_any(path, BPF_OBJ_MAP); + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_MAP); + if ((*fds)[0] < 0) + return -1; + return 1; } - p_err("expected 'id' or 'pinned', got: '%s'?", **argv); + p_err("expected 'id', 'name' or 'pinned', got: '%s'?", **argv); return -1; } +int map_parse_fd(int *argc, char ***argv) +{ + int *fds = NULL; + int nb_fds, fd; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = map_parse_fds(argc, argv, &fds); + if (nb_fds != 1) { + if (nb_fds > 1) { + p_err("several maps match this handle"); + while (nb_fds--) + close(fds[nb_fds]); + } + fd = -1; + goto exit_free; + } + + fd = fds[0]; +exit_free: + free(fds); + return fd; +} + int map_parse_fd_and_info(int *argc, char ***argv, void *info, __u32 *info_len) { int err; @@ -479,6 +580,21 @@ static int parse_elem(char **argv, struct bpf_map_info *info, return -1; } +static void show_map_header_json(struct bpf_map_info *info, json_writer_t *wtr) +{ + jsonw_uint_field(wtr, "id", info->id); + if (info->type < ARRAY_SIZE(map_type_name)) + jsonw_string_field(wtr, "type", map_type_name[info->type]); + else + jsonw_uint_field(wtr, "type", info->type); + + if (*info->name) + jsonw_string_field(wtr, "name", info->name); + + jsonw_name(wtr, "flags"); + jsonw_printf(wtr, "%d", info->map_flags); +} + static int show_map_close_json(int fd, struct bpf_map_info *info) { char *memlock, *frozen_str; @@ -489,18 +605,7 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) jsonw_start_object(json_wtr); - jsonw_uint_field(json_wtr, "id", info->id); - if (info->type < ARRAY_SIZE(map_type_name)) - jsonw_string_field(json_wtr, "type", - map_type_name[info->type]); - else - jsonw_uint_field(json_wtr, "type", info->type); - - if (*info->name) - jsonw_string_field(json_wtr, "name", info->name); - - jsonw_name(json_wtr, "flags"); - jsonw_printf(json_wtr, "%d", info->map_flags); + show_map_header_json(info, json_wtr); print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); @@ -561,14 +666,8 @@ static int show_map_close_json(int fd, struct bpf_map_info *info) return 0; } -static int show_map_close_plain(int fd, struct bpf_map_info *info) +static void show_map_header_plain(struct bpf_map_info *info) { - char *memlock, *frozen_str; - int frozen = 0; - - memlock = get_fdinfo(fd, "memlock"); - frozen_str = get_fdinfo(fd, "frozen"); - printf("%u: ", info->id); if (info->type < ARRAY_SIZE(map_type_name)) printf("%s ", map_type_name[info->type]); @@ -581,6 +680,17 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) printf("flags 0x%x", info->map_flags); print_dev_plain(info->ifindex, info->netns_dev, info->netns_ino); printf("\n"); +} + +static int show_map_close_plain(int fd, struct bpf_map_info *info) +{ + char *memlock, *frozen_str; + int frozen = 0; + + memlock = get_fdinfo(fd, "memlock"); + frozen_str = get_fdinfo(fd, "frozen"); + + show_map_header_plain(info); printf("\tkey %uB value %uB max_entries %u", info->key_size, info->value_size, info->max_entries); @@ -642,6 +752,50 @@ static int show_map_close_plain(int fd, struct bpf_map_info *info) return 0; } +static int do_show_subset(int argc, char **argv) +{ + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int *fds = NULL; + int nb_fds, i; + int err = -1; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = map_parse_fds(&argc, &argv, &fds); + if (nb_fds < 1) + goto exit_free; + + if (json_output && nb_fds > 1) + jsonw_start_array(json_wtr); /* root array */ + for (i = 0; i < nb_fds; i++) { + err = bpf_obj_get_info_by_fd(fds[i], &info, &len); + if (err) { + p_err("can't get map info: %s", + strerror(errno)); + for (; i < nb_fds; i++) + close(fds[i]); + break; + } + + if (json_output) + show_map_close_json(fds[i], &info); + else + show_map_close_plain(fds[i], &info); + + close(fds[i]); + } + if (json_output && nb_fds > 1) + jsonw_end_array(json_wtr); /* root array */ + +exit_free: + free(fds); + return err; +} + static int do_show(int argc, char **argv) { struct bpf_map_info info = {}; @@ -653,16 +807,8 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&map_table, BPF_OBJ_MAP); - if (argc == 2) { - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); - if (fd < 0) - return -1; - - if (json_output) - return show_map_close_json(fd, &info); - else - return show_map_close_plain(fd, &info); - } + if (argc == 2) + return do_show_subset(argc, argv); if (argc) return BAD_ARG(); @@ -765,26 +911,49 @@ static int dump_map_elem(int fd, void *key, void *value, return 0; } -static int do_dump(int argc, char **argv) +static int maps_have_btf(int *fds, int nb_fds) { struct bpf_map_info info = {}; - void *key, *value, *prev_key; - unsigned int num_elems = 0; __u32 len = sizeof(info); - json_writer_t *btf_wtr; struct btf *btf = NULL; - int err; - int fd; + int err, i; - if (argc != 2) - usage(); + for (i = 0; i < nb_fds; i++) { + err = bpf_obj_get_info_by_fd(fds[i], &info, &len); + if (err) { + p_err("can't get map info: %s", strerror(errno)); + goto err_close; + } - fd = map_parse_fd_and_info(&argc, &argv, &info, &len); - if (fd < 0) - return -1; + err = btf__get_from_id(info.btf_id, &btf); + if (err) { + p_err("failed to get btf"); + goto err_close; + } - key = malloc(info.key_size); - value = alloc_value(&info); + if (!btf) + return 0; + } + + return 1; + +err_close: + for (; i < nb_fds; i++) + close(fds[i]); + return -1; +} + +static int +map_dump(int fd, struct bpf_map_info *info, json_writer_t *wtr, + bool enable_btf, bool show_header) +{ + void *key, *value, *prev_key; + unsigned int num_elems = 0; + struct btf *btf = NULL; + int err; + + key = malloc(info->key_size); + value = alloc_value(info); if (!key || !value) { p_err("mem alloc failed"); err = -1; @@ -793,30 +962,32 @@ static int do_dump(int argc, char **argv) prev_key = NULL; - err = btf__get_from_id(info.btf_id, &btf); - if (err) { - p_err("failed to get btf"); - goto exit_free; + if (enable_btf) { + err = btf__get_from_id(info->btf_id, &btf); + if (err || !btf) { + /* enable_btf is true only if we've already checked + * that all maps have BTF information. + */ + p_err("failed to get btf"); + goto exit_free; + } } - if (json_output) - jsonw_start_array(json_wtr); - else - if (btf) { - btf_wtr = get_btf_writer(); - if (!btf_wtr) { - p_info("failed to create json writer for btf. falling back to plain output"); - btf__free(btf); - btf = NULL; - } else { - jsonw_start_array(btf_wtr); - } + if (wtr) { + if (show_header) { + jsonw_start_object(wtr); /* map object */ + show_map_header_json(info, wtr); + jsonw_name(wtr, "elements"); } + jsonw_start_array(wtr); /* elements */ + } else if (show_header) { + show_map_header_plain(info); + } - if (info.type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && - info.value_size != 8) + if (info->type == BPF_MAP_TYPE_REUSEPORT_SOCKARRAY && + info->value_size != 8) p_info("Warning: cannot read values from %s map with value_size != 8", - map_type_name[info.type]); + map_type_name[info->type]); while (true) { err = bpf_map_get_next_key(fd, prev_key, key); if (err) { @@ -824,15 +995,14 @@ static int do_dump(int argc, char **argv) err = 0; break; } - num_elems += dump_map_elem(fd, key, value, &info, btf, btf_wtr); + num_elems += dump_map_elem(fd, key, value, info, btf, wtr); prev_key = key; } - if (json_output) - jsonw_end_array(json_wtr); - else if (btf) { - jsonw_end_array(btf_wtr); - jsonw_destroy(&btf_wtr); + if (wtr) { + jsonw_end_array(wtr); /* elements */ + if (show_header) + jsonw_end_object(wtr); /* map object */ } else { printf("Found %u element%s\n", num_elems, num_elems != 1 ? "s" : ""); @@ -847,6 +1017,72 @@ exit_free: return err; } +static int do_dump(int argc, char **argv) +{ + json_writer_t *wtr = NULL, *btf_wtr = NULL; + struct bpf_map_info info = {}; + int nb_fds, i = 0, btf = 0; + __u32 len = sizeof(info); + int *fds = NULL; + int err = -1; + + if (argc != 2) + usage(); + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = map_parse_fds(&argc, &argv, &fds); + if (nb_fds < 1) + goto exit_free; + + if (json_output) { + wtr = json_wtr; + } else { + btf = maps_have_btf(fds, nb_fds); + if (btf < 0) + goto exit_close; + if (btf) { + btf_wtr = get_btf_writer(); + if (btf_wtr) { + wtr = btf_wtr; + } else { + p_info("failed to create json writer for btf. falling back to plain output"); + btf = 0; + } + } + } + + if (wtr && nb_fds > 1) + jsonw_start_array(wtr); /* root array */ + for (i = 0; i < nb_fds; i++) { + if (bpf_obj_get_info_by_fd(fds[i], &info, &len)) { + p_err("can't get map info: %s", strerror(errno)); + break; + } + err = map_dump(fds[i], &info, wtr, btf, nb_fds > 1); + if (!wtr && i != nb_fds - 1) + printf("\n"); + + if (err) + break; + close(fds[i]); + } + if (wtr && nb_fds > 1) + jsonw_end_array(wtr); /* root array */ + + if (btf) + jsonw_destroy(&btf_wtr); +exit_close: + for (; i < nb_fds; i++) + close(fds[i]); +exit_free: + free(fds); + return err; +} + static int alloc_key_value(struct bpf_map_info *info, void **key, void **value) { *key = NULL; diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c index 4f52d3151616..d93bee298e54 100644 --- a/tools/bpf/bpftool/net.c +++ b/tools/bpf/bpftool/net.c @@ -18,6 +18,7 @@ #include <bpf.h> #include <nlattr.h> +#include "libbpf_internal.h" #include "main.h" #include "netlink_dumper.h" diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index 2ce9c5ba1934..47a61ac42dc0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -25,6 +25,11 @@ #include "main.h" #include "xlated_dumper.h" +enum dump_mode { + DUMP_JITED, + DUMP_XLATED, +}; + static const char * const attach_type_strings[] = { [BPF_SK_SKB_STREAM_PARSER] = "stream_parser", [BPF_SK_SKB_STREAM_VERDICT] = "stream_verdict", @@ -77,11 +82,12 @@ static void print_boot_time(__u64 nsecs, char *buf, unsigned int size) strftime(buf, size, "%FT%T%z", &load_tm); } -static int prog_fd_by_tag(unsigned char *tag) +static int prog_fd_by_nametag(void *nametag, int **fds, bool tag) { unsigned int id = 0; + int fd, nb_fds = 0; + void *tmp; int err; - int fd; while (true) { struct bpf_prog_info info = {}; @@ -89,36 +95,54 @@ static int prog_fd_by_tag(unsigned char *tag) err = bpf_prog_get_next_id(id, &id); if (err) { - p_err("%s", strerror(errno)); - return -1; + if (errno != ENOENT) { + p_err("%s", strerror(errno)); + goto err_close_fds; + } + return nb_fds; } fd = bpf_prog_get_fd_by_id(id); if (fd < 0) { p_err("can't get prog by id (%u): %s", id, strerror(errno)); - return -1; + goto err_close_fds; } err = bpf_obj_get_info_by_fd(fd, &info, &len); if (err) { p_err("can't get prog info (%u): %s", id, strerror(errno)); - close(fd); - return -1; + goto err_close_fd; } - if (!memcmp(tag, info.tag, BPF_TAG_SIZE)) - return fd; + if ((tag && memcmp(nametag, info.tag, BPF_TAG_SIZE)) || + (!tag && strncmp(nametag, info.name, BPF_OBJ_NAME_LEN))) { + close(fd); + continue; + } - close(fd); + if (nb_fds > 0) { + tmp = realloc(*fds, (nb_fds + 1) * sizeof(int)); + if (!tmp) { + p_err("failed to realloc"); + goto err_close_fd; + } + *fds = tmp; + } + (*fds)[nb_fds++] = fd; } + +err_close_fd: + close(fd); +err_close_fds: + while (--nb_fds >= 0) + close((*fds)[nb_fds]); + return -1; } -int prog_parse_fd(int *argc, char ***argv) +static int prog_parse_fds(int *argc, char ***argv, int **fds) { - int fd; - if (is_prefix(**argv, "id")) { unsigned int id; char *endptr; @@ -132,10 +156,12 @@ int prog_parse_fd(int *argc, char ***argv) } NEXT_ARGP(); - fd = bpf_prog_get_fd_by_id(id); - if (fd < 0) + (*fds)[0] = bpf_prog_get_fd_by_id(id); + if ((*fds)[0] < 0) { p_err("get by id (%u): %s", id, strerror(errno)); - return fd; + return -1; + } + return 1; } else if (is_prefix(**argv, "tag")) { unsigned char tag[BPF_TAG_SIZE]; @@ -149,7 +175,20 @@ int prog_parse_fd(int *argc, char ***argv) } NEXT_ARGP(); - return prog_fd_by_tag(tag); + return prog_fd_by_nametag(tag, fds, true); + } else if (is_prefix(**argv, "name")) { + char *name; + + NEXT_ARGP(); + + name = **argv; + if (strlen(name) > BPF_OBJ_NAME_LEN - 1) { + p_err("can't parse name"); + return -1; + } + NEXT_ARGP(); + + return prog_fd_by_nametag(name, fds, false); } else if (is_prefix(**argv, "pinned")) { char *path; @@ -158,13 +197,43 @@ int prog_parse_fd(int *argc, char ***argv) path = **argv; NEXT_ARGP(); - return open_obj_pinned_any(path, BPF_OBJ_PROG); + (*fds)[0] = open_obj_pinned_any(path, BPF_OBJ_PROG); + if ((*fds)[0] < 0) + return -1; + return 1; } - p_err("expected 'id', 'tag' or 'pinned', got: '%s'?", **argv); + p_err("expected 'id', 'tag', 'name' or 'pinned', got: '%s'?", **argv); return -1; } +int prog_parse_fd(int *argc, char ***argv) +{ + int *fds = NULL; + int nb_fds, fd; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = prog_parse_fds(argc, argv, &fds); + if (nb_fds != 1) { + if (nb_fds > 1) { + p_err("several programs match this handle"); + while (nb_fds--) + close(fds[nb_fds]); + } + fd = -1; + goto exit_free; + } + + fd = fds[0]; +exit_free: + free(fds); + return fd; +} + static void show_prog_maps(int fd, u32 num_maps) { struct bpf_prog_info info = {}; @@ -194,11 +263,8 @@ static void show_prog_maps(int fd, u32 num_maps) } } -static void print_prog_json(struct bpf_prog_info *info, int fd) +static void print_prog_header_json(struct bpf_prog_info *info) { - char *memlock; - - jsonw_start_object(json_wtr); jsonw_uint_field(json_wtr, "id", info->id); if (info->type < ARRAY_SIZE(prog_type_name)) jsonw_string_field(json_wtr, "type", @@ -219,7 +285,14 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_uint_field(json_wtr, "run_time_ns", info->run_time_ns); jsonw_uint_field(json_wtr, "run_cnt", info->run_cnt); } +} +static void print_prog_json(struct bpf_prog_info *info, int fd) +{ + char *memlock; + + jsonw_start_object(json_wtr); + print_prog_header_json(info); print_dev_json(info->ifindex, info->netns_dev, info->netns_ino); if (info->load_time) { @@ -268,10 +341,8 @@ static void print_prog_json(struct bpf_prog_info *info, int fd) jsonw_end_object(json_wtr); } -static void print_prog_plain(struct bpf_prog_info *info, int fd) +static void print_prog_header_plain(struct bpf_prog_info *info) { - char *memlock; - printf("%u: ", info->id); if (info->type < ARRAY_SIZE(prog_type_name)) printf("%s ", prog_type_name[info->type]); @@ -289,6 +360,13 @@ static void print_prog_plain(struct bpf_prog_info *info, int fd) printf(" run_time_ns %lld run_cnt %lld", info->run_time_ns, info->run_cnt); printf("\n"); +} + +static void print_prog_plain(struct bpf_prog_info *info, int fd) +{ + char *memlock; + + print_prog_header_plain(info); if (info->load_time) { char buf[32]; @@ -349,6 +427,40 @@ static int show_prog(int fd) return 0; } +static int do_show_subset(int argc, char **argv) +{ + int *fds = NULL; + int nb_fds, i; + int err = -1; + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = prog_parse_fds(&argc, &argv, &fds); + if (nb_fds < 1) + goto exit_free; + + if (json_output && nb_fds > 1) + jsonw_start_array(json_wtr); /* root array */ + for (i = 0; i < nb_fds; i++) { + err = show_prog(fds[i]); + if (err) { + for (; i < nb_fds; i++) + close(fds[i]); + break; + } + close(fds[i]); + } + if (json_output && nb_fds > 1) + jsonw_end_array(json_wtr); /* root array */ + +exit_free: + free(fds); + return err; +} + static int do_show(int argc, char **argv) { __u32 id = 0; @@ -358,15 +470,8 @@ static int do_show(int argc, char **argv) if (show_pinned) build_pinned_obj_table(&prog_table, BPF_OBJ_PROG); - if (argc == 2) { - fd = prog_parse_fd(&argc, &argv); - if (fd < 0) - return -1; - - err = show_prog(fd); - close(fd); - return err; - } + if (argc == 2) + return do_show_subset(argc, argv); if (argc) return BAD_ARG(); @@ -408,101 +513,32 @@ static int do_show(int argc, char **argv) return err; } -static int do_dump(int argc, char **argv) +static int +prog_dump(struct bpf_prog_info *info, enum dump_mode mode, + char *filepath, bool opcodes, bool visual, bool linum) { - struct bpf_prog_info_linear *info_linear; struct bpf_prog_linfo *prog_linfo = NULL; - enum {DUMP_JITED, DUMP_XLATED} mode; const char *disasm_opt = NULL; - struct bpf_prog_info *info; struct dump_data dd = {}; void *func_info = NULL; struct btf *btf = NULL; - char *filepath = NULL; - bool opcodes = false; - bool visual = false; char func_sig[1024]; unsigned char *buf; - bool linum = false; __u32 member_len; - __u64 arrays; ssize_t n; int fd; - if (is_prefix(*argv, "jited")) { - if (disasm_init()) - return -1; - mode = DUMP_JITED; - } else if (is_prefix(*argv, "xlated")) { - mode = DUMP_XLATED; - } else { - p_err("expected 'xlated' or 'jited', got: %s", *argv); - return -1; - } - NEXT_ARG(); - - if (argc < 2) - usage(); - - fd = prog_parse_fd(&argc, &argv); - if (fd < 0) - return -1; - - if (is_prefix(*argv, "file")) { - NEXT_ARG(); - if (!argc) { - p_err("expected file path"); - return -1; - } - - filepath = *argv; - NEXT_ARG(); - } else if (is_prefix(*argv, "opcodes")) { - opcodes = true; - NEXT_ARG(); - } else if (is_prefix(*argv, "visual")) { - visual = true; - NEXT_ARG(); - } else if (is_prefix(*argv, "linum")) { - linum = true; - NEXT_ARG(); - } - - if (argc) { - usage(); - return -1; - } - - if (mode == DUMP_JITED) - arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; - else - arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; - - arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; - arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; - arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; - arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; - arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; - - info_linear = bpf_program__get_prog_info_linear(fd, arrays); - close(fd); - if (IS_ERR_OR_NULL(info_linear)) { - p_err("can't get prog info: %s", strerror(errno)); - return -1; - } - - info = &info_linear->info; if (mode == DUMP_JITED) { if (info->jited_prog_len == 0 || !info->jited_prog_insns) { p_info("no instructions returned"); - goto err_free; + return -1; } buf = (unsigned char *)(info->jited_prog_insns); member_len = info->jited_prog_len; } else { /* DUMP_XLATED */ if (info->xlated_prog_len == 0) { p_err("error retrieving insn dump: kernel.kptr_restrict set?"); - goto err_free; + return -1; } buf = (unsigned char *)info->xlated_prog_insns; member_len = info->xlated_prog_len; @@ -510,7 +546,7 @@ static int do_dump(int argc, char **argv) if (info->btf_id && btf__get_from_id(info->btf_id, &btf)) { p_err("failed to get btf"); - goto err_free; + return -1; } func_info = (void *)info->func_info; @@ -526,7 +562,7 @@ static int do_dump(int argc, char **argv) if (fd < 0) { p_err("can't open file %s: %s", filepath, strerror(errno)); - goto err_free; + return -1; } n = write(fd, buf, member_len); @@ -534,7 +570,7 @@ static int do_dump(int argc, char **argv) if (n != member_len) { p_err("error writing output file: %s", n < 0 ? strerror(errno) : "short write"); - goto err_free; + return -1; } if (json_output) @@ -548,7 +584,7 @@ static int do_dump(int argc, char **argv) info->netns_ino, &disasm_opt); if (!name) - goto err_free; + return -1; } if (info->nr_jited_func_lens && info->jited_func_lens) { @@ -643,12 +679,130 @@ static int do_dump(int argc, char **argv) kernel_syms_destroy(&dd); } - free(info_linear); return 0; +} -err_free: - free(info_linear); - return -1; +static int do_dump(int argc, char **argv) +{ + struct bpf_prog_info_linear *info_linear; + char *filepath = NULL; + bool opcodes = false; + bool visual = false; + enum dump_mode mode; + bool linum = false; + int *fds = NULL; + int nb_fds, i = 0; + int err = -1; + __u64 arrays; + + if (is_prefix(*argv, "jited")) { + if (disasm_init()) + return -1; + mode = DUMP_JITED; + } else if (is_prefix(*argv, "xlated")) { + mode = DUMP_XLATED; + } else { + p_err("expected 'xlated' or 'jited', got: %s", *argv); + return -1; + } + NEXT_ARG(); + + if (argc < 2) + usage(); + + fds = malloc(sizeof(int)); + if (!fds) { + p_err("mem alloc failed"); + return -1; + } + nb_fds = prog_parse_fds(&argc, &argv, &fds); + if (nb_fds < 1) + goto exit_free; + + if (is_prefix(*argv, "file")) { + NEXT_ARG(); + if (!argc) { + p_err("expected file path"); + goto exit_close; + } + if (nb_fds > 1) { + p_err("several programs matched"); + goto exit_close; + } + + filepath = *argv; + NEXT_ARG(); + } else if (is_prefix(*argv, "opcodes")) { + opcodes = true; + NEXT_ARG(); + } else if (is_prefix(*argv, "visual")) { + if (nb_fds > 1) { + p_err("several programs matched"); + goto exit_close; + } + + visual = true; + NEXT_ARG(); + } else if (is_prefix(*argv, "linum")) { + linum = true; + NEXT_ARG(); + } + + if (argc) { + usage(); + goto exit_close; + } + + if (mode == DUMP_JITED) + arrays = 1UL << BPF_PROG_INFO_JITED_INSNS; + else + arrays = 1UL << BPF_PROG_INFO_XLATED_INSNS; + + arrays |= 1UL << BPF_PROG_INFO_JITED_KSYMS; + arrays |= 1UL << BPF_PROG_INFO_JITED_FUNC_LENS; + arrays |= 1UL << BPF_PROG_INFO_FUNC_INFO; + arrays |= 1UL << BPF_PROG_INFO_LINE_INFO; + arrays |= 1UL << BPF_PROG_INFO_JITED_LINE_INFO; + + if (json_output && nb_fds > 1) + jsonw_start_array(json_wtr); /* root array */ + for (i = 0; i < nb_fds; i++) { + info_linear = bpf_program__get_prog_info_linear(fds[i], arrays); + if (IS_ERR_OR_NULL(info_linear)) { + p_err("can't get prog info: %s", strerror(errno)); + break; + } + + if (json_output && nb_fds > 1) { + jsonw_start_object(json_wtr); /* prog object */ + print_prog_header_json(&info_linear->info); + jsonw_name(json_wtr, "insns"); + } else if (nb_fds > 1) { + print_prog_header_plain(&info_linear->info); + } + + err = prog_dump(&info_linear->info, mode, filepath, opcodes, + visual, linum); + + if (json_output && nb_fds > 1) + jsonw_end_object(json_wtr); /* prog object */ + else if (i != nb_fds - 1 && nb_fds > 1) + printf("\n"); + + free(info_linear); + if (err) + break; + close(fds[i]); + } + if (json_output && nb_fds > 1) + jsonw_end_array(json_wtr); /* root array */ + +exit_close: + for (; i < nb_fds; i++) + close(fds[i]); +exit_free: + free(fds); + return err; } static int do_pin(int argc, char **argv) diff --git a/tools/include/uapi/asm/bpf_perf_event.h b/tools/include/uapi/asm/bpf_perf_event.h index 13a58531e6fa..39acc149d843 100644 --- a/tools/include/uapi/asm/bpf_perf_event.h +++ b/tools/include/uapi/asm/bpf_perf_event.h @@ -2,6 +2,8 @@ #include "../../arch/arm64/include/uapi/asm/bpf_perf_event.h" #elif defined(__s390__) #include "../../arch/s390/include/uapi/asm/bpf_perf_event.h" +#elif defined(__riscv) +#include "../../arch/riscv/include/uapi/asm/bpf_perf_event.h" #else #include <uapi/asm-generic/bpf_perf_event.h> #endif diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index dbbcf0b02970..7df436da542d 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -231,6 +231,11 @@ enum bpf_attach_type { * When children program makes decision (like picking TCP CA or sock bind) * parent program has a chance to override it. * + * With BPF_F_ALLOW_MULTI a new program is added to the end of the list of + * programs for a cgroup. Though it's possible to replace an old program at + * any position by also specifying BPF_F_REPLACE flag and position itself in + * replace_bpf_fd attribute. Old program at this position will be released. + * * A cgroup with MULTI or OVERRIDE flag allows any attach flags in sub-cgroups. * A cgroup with NONE doesn't allow any programs in sub-cgroups. * Ex1: @@ -249,6 +254,7 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) #define BPF_F_ALLOW_MULTI (1U << 1) +#define BPF_F_REPLACE (1U << 2) /* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the * verifier will perform strict alignment checking as if the kernel @@ -442,6 +448,10 @@ union bpf_attr { __u32 attach_bpf_fd; /* eBPF program to attach */ __u32 attach_type; __u32 attach_flags; + __u32 replace_bpf_fd; /* previously attached eBPF + * program to replace if + * BPF_F_REPLACE is used + */ }; struct { /* anonymous struct used by BPF_PROG_TEST_RUN command */ diff --git a/tools/include/uapi/linux/btf.h b/tools/include/uapi/linux/btf.h index 63ae4a39e58b..1a2898c482ee 100644 --- a/tools/include/uapi/linux/btf.h +++ b/tools/include/uapi/linux/btf.h @@ -22,9 +22,9 @@ struct btf_header { }; /* Max # of type identifier */ -#define BTF_MAX_TYPE 0x0000ffff +#define BTF_MAX_TYPE 0x000fffff /* Max offset into the string section */ -#define BTF_MAX_NAME_OFFSET 0x0000ffff +#define BTF_MAX_NAME_OFFSET 0x00ffffff /* Max # of struct/union/enum members or func args */ #define BTF_MAX_VLEN 0xffff @@ -142,7 +142,8 @@ struct btf_param { enum { BTF_VAR_STATIC = 0, - BTF_VAR_GLOBAL_ALLOCATED, + BTF_VAR_GLOBAL_ALLOCATED = 1, + BTF_VAR_GLOBAL_EXTERN = 2, }; /* BTF_KIND_VAR is followed by a single "struct btf_var" to describe diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index defae23a0169..d4790121adf4 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -56,8 +56,8 @@ ifndef VERBOSE endif FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap bpf reallocarray -FEATURE_DISPLAY = libelf bpf +FEATURE_TESTS = libelf libelf-mmap zlib bpf reallocarray +FEATURE_DISPLAY = libelf zlib bpf INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi FEATURE_CHECK_CFLAGS-bpf = $(INCLUDES) @@ -147,6 +147,7 @@ TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \ sort -u | wc -l) VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ @@ -159,7 +160,7 @@ all: fixdep all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h +$(BPF_IN_SHARED): force elfdep zdep bpfdep bpf_helper_defs.h @(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \ (diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -177,7 +178,7 @@ $(BPF_IN_SHARED): force elfdep bpfdep bpf_helper_defs.h echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force elfdep bpfdep bpf_helper_defs.h +$(BPF_IN_STATIC): force elfdep zdep bpfdep bpf_helper_defs.h $(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) bpf_helper_defs.h: $(srctree)/tools/include/uapi/linux/bpf.h @@ -189,7 +190,7 @@ $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(QUIET_LINK)$(CC) $(LDFLAGS) \ --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ - -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -lz -o $@ @ln -sf $(@F) $(OUTPUT)libbpf.so @ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION) @@ -213,6 +214,7 @@ check_abi: $(OUTPUT)libbpf.so "versioned in $(VERSION_SCRIPT)." >&2; \ readelf -s --wide $(BPF_IN_SHARED) | \ cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ + sed 's/\[.*\]//' | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'| \ sort -u > $(OUTPUT)libbpf_global_syms.tmp; \ readelf -s --wide $(OUTPUT)libbpf.so | \ @@ -249,6 +251,7 @@ install_headers: bpf_helper_defs.h $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ $(call do_install,btf.h,$(prefix)/include/bpf,644); \ $(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \ + $(call do_install,libbpf_common.h,$(prefix)/include/bpf,644); \ $(call do_install,xsk.h,$(prefix)/include/bpf,644); \ $(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \ $(call do_install,bpf_helper_defs.h,$(prefix)/include/bpf,644); \ @@ -277,12 +280,15 @@ clean: -PHONY += force elfdep bpfdep cscope tags +PHONY += force elfdep zdep bpfdep cscope tags force: elfdep: @if [ "$(feature-libelf)" != "1" ]; then echo "No libelf found"; exit 1 ; fi +zdep: + @if [ "$(feature-zlib)" != "1" ]; then echo "No zlib found"; exit 1 ; fi + bpfdep: @if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 98596e15390f..a787d53699c8 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -467,13 +467,28 @@ int bpf_obj_get(const char *pathname) int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, unsigned int flags) { + DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, opts, + .flags = flags, + ); + + return bpf_prog_attach_xattr(prog_fd, target_fd, type, &opts); +} + +int bpf_prog_attach_xattr(int prog_fd, int target_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts) +{ union bpf_attr attr; + if (!OPTS_VALID(opts, bpf_prog_attach_opts)) + return -EINVAL; + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; - attr.attach_flags = flags; + attr.attach_flags = OPTS_GET(opts, flags, 0); + attr.replace_bpf_fd = OPTS_GET(opts, replace_prog_fd, 0); return sys_bpf(BPF_PROG_ATTACH, &attr, sizeof(attr)); } diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 3c791fa8e68e..f0ab8519986e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -28,14 +28,12 @@ #include <stddef.h> #include <stdint.h> +#include "libbpf_common.h" + #ifdef __cplusplus extern "C" { #endif -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - struct bpf_create_map_attr { const char *name; enum bpf_map_type map_type; @@ -128,8 +126,19 @@ LIBBPF_API int bpf_map_get_next_key(int fd, const void *key, void *next_key); LIBBPF_API int bpf_map_freeze(int fd); LIBBPF_API int bpf_obj_pin(int fd, const char *pathname); LIBBPF_API int bpf_obj_get(const char *pathname); + +struct bpf_prog_attach_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + unsigned int flags; + int replace_prog_fd; +}; +#define bpf_prog_attach_opts__last_field replace_prog_fd + LIBBPF_API int bpf_prog_attach(int prog_fd, int attachable_fd, enum bpf_attach_type type, unsigned int flags); +LIBBPF_API int bpf_prog_attach_xattr(int prog_fd, int attachable_fd, + enum bpf_attach_type type, + const struct bpf_prog_attach_opts *opts); LIBBPF_API int bpf_prog_detach(int attachable_fd, enum bpf_attach_type type); LIBBPF_API int bpf_prog_detach2(int prog_fd, int attachable_fd, enum bpf_attach_type type); diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 0c7d28292898..f69cc208778a 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -25,6 +25,9 @@ #ifndef __always_inline #define __always_inline __attribute__((always_inline)) #endif +#ifndef __weak +#define __weak __attribute__((weak)) +#endif /* * Helper structure used by eBPF C program @@ -44,4 +47,12 @@ enum libbpf_pin_type { LIBBPF_PIN_BY_NAME, }; +enum libbpf_tristate { + TRI_NO = 0, + TRI_YES = 1, + TRI_MODULE = 2, +}; + +#define __kconfig __attribute__((section(".kconfig"))) + #endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 88efa2bb7137..5f04f56e1eb6 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -278,6 +278,45 @@ done: return nelems * size; } +int btf__align_of(const struct btf *btf, __u32 id) +{ + const struct btf_type *t = btf__type_by_id(btf, id); + __u16 kind = btf_kind(t); + + switch (kind) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + return min(sizeof(void *), t->size); + case BTF_KIND_PTR: + return sizeof(void *); + case BTF_KIND_TYPEDEF: + case BTF_KIND_VOLATILE: + case BTF_KIND_CONST: + case BTF_KIND_RESTRICT: + return btf__align_of(btf, t->type); + case BTF_KIND_ARRAY: + return btf__align_of(btf, btf_array(t)->type); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + const struct btf_member *m = btf_members(t); + __u16 vlen = btf_vlen(t); + int i, max_align = 1, align; + + for (i = 0; i < vlen; i++, m++) { + align = btf__align_of(btf, m->type); + if (align <= 0) + return align; + max_align = max(max_align, align); + } + + return max_align; + } + default: + pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); + return 0; + } +} + int btf__resolve_type(const struct btf *btf, __u32 type_id) { const struct btf_type *t; @@ -539,6 +578,12 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, return -ENOENT; } + /* .extern datasec size and var offsets were set correctly during + * extern collection step, so just skip straight to sorting variables + */ + if (t->size) + goto sort_vars; + ret = bpf_object__section_size(obj, name, &size); if (ret || !size || (t->size && t->size != size)) { pr_debug("Invalid size for section %s: %u bytes\n", name, size); @@ -575,7 +620,8 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf, vsi->offset = off; } - qsort(t + 1, vars, sizeof(*vsi), compare_vsi_off); +sort_vars: + qsort(btf_var_secinfos(t), vars, sizeof(*vsi), compare_vsi_off); return 0; } diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index d9ac73a02cde..8d73f7f5551f 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -8,14 +8,12 @@ #include <linux/btf.h> #include <linux/types.h> +#include "libbpf_common.h" + #ifdef __cplusplus extern "C" { #endif -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - #define BTF_ELF_SEC ".BTF" #define BTF_EXT_ELF_SEC ".BTF.ext" #define MAPS_ELF_SEC ".maps" @@ -79,6 +77,7 @@ LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); +LIBBPF_API int btf__align_of(const struct btf *btf, __u32 id); LIBBPF_API int btf__fd(const struct btf *btf); LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); @@ -127,6 +126,28 @@ LIBBPF_API void btf_dump__free(struct btf_dump *d); LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); +struct btf_dump_emit_type_decl_opts { + /* size of this struct, for forward/backward compatiblity */ + size_t sz; + /* optional field name for type declaration, e.g.: + * - struct my_struct <FNAME> + * - void (*<FNAME>)(int) + * - char (*<FNAME>)[123] + */ + const char *field_name; + /* extra indentation level (in number of tabs) to emit for multi-line + * type declarations (e.g., anonymous struct); applies for lines + * starting from the second one (first line is assumed to have + * necessary indentation already + */ + int indent_level; +}; +#define btf_dump_emit_type_decl_opts__last_field indent_level + +LIBBPF_API int +btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, + const struct btf_dump_emit_type_decl_opts *opts); + /* * A set of helpers for easier BTF types handling */ diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index cb126d8fcf75..e95f7710f210 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -116,6 +116,8 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...) va_end(args); } +static int btf_dump_mark_referenced(struct btf_dump *d); + struct btf_dump *btf_dump__new(const struct btf *btf, const struct btf_ext *btf_ext, const struct btf_dump_opts *opts, @@ -137,18 +139,39 @@ struct btf_dump *btf_dump__new(const struct btf *btf, if (IS_ERR(d->type_names)) { err = PTR_ERR(d->type_names); d->type_names = NULL; - btf_dump__free(d); - return ERR_PTR(err); } d->ident_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); if (IS_ERR(d->ident_names)) { err = PTR_ERR(d->ident_names); d->ident_names = NULL; - btf_dump__free(d); - return ERR_PTR(err); + goto err; + } + d->type_states = calloc(1 + btf__get_nr_types(d->btf), + sizeof(d->type_states[0])); + if (!d->type_states) { + err = -ENOMEM; + goto err; + } + d->cached_names = calloc(1 + btf__get_nr_types(d->btf), + sizeof(d->cached_names[0])); + if (!d->cached_names) { + err = -ENOMEM; + goto err; } + /* VOID is special */ + d->type_states[0].order_state = ORDERED; + d->type_states[0].emit_state = EMITTED; + + /* eagerly determine referenced types for anon enums */ + err = btf_dump_mark_referenced(d); + if (err) + goto err; + return d; +err: + btf_dump__free(d); + return ERR_PTR(err); } void btf_dump__free(struct btf_dump *d) @@ -175,7 +198,6 @@ void btf_dump__free(struct btf_dump *d) free(d); } -static int btf_dump_mark_referenced(struct btf_dump *d); static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr); static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id); @@ -202,27 +224,6 @@ int btf_dump__dump_type(struct btf_dump *d, __u32 id) if (id > btf__get_nr_types(d->btf)) return -EINVAL; - /* type states are lazily allocated, as they might not be needed */ - if (!d->type_states) { - d->type_states = calloc(1 + btf__get_nr_types(d->btf), - sizeof(d->type_states[0])); - if (!d->type_states) - return -ENOMEM; - d->cached_names = calloc(1 + btf__get_nr_types(d->btf), - sizeof(d->cached_names[0])); - if (!d->cached_names) - return -ENOMEM; - - /* VOID is special */ - d->type_states[0].order_state = ORDERED; - d->type_states[0].emit_state = EMITTED; - - /* eagerly determine referenced types for anon enums */ - err = btf_dump_mark_referenced(d); - if (err) - return err; - } - d->emit_queue_cnt = 0; err = btf_dump_order_type(d, id, false); if (err < 0) @@ -752,41 +753,6 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id) } } -static int btf_align_of(const struct btf *btf, __u32 id) -{ - const struct btf_type *t = btf__type_by_id(btf, id); - __u16 kind = btf_kind(t); - - switch (kind) { - case BTF_KIND_INT: - case BTF_KIND_ENUM: - return min(sizeof(void *), t->size); - case BTF_KIND_PTR: - return sizeof(void *); - case BTF_KIND_TYPEDEF: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - return btf_align_of(btf, t->type); - case BTF_KIND_ARRAY: - return btf_align_of(btf, btf_array(t)->type); - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: { - const struct btf_member *m = btf_members(t); - __u16 vlen = btf_vlen(t); - int i, align = 1; - - for (i = 0; i < vlen; i++, m++) - align = max(align, btf_align_of(btf, m->type)); - - return align; - } - default: - pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t)); - return 1; - } -} - static bool btf_is_struct_packed(const struct btf *btf, __u32 id, const struct btf_type *t) { @@ -794,18 +760,18 @@ static bool btf_is_struct_packed(const struct btf *btf, __u32 id, int align, i, bit_sz; __u16 vlen; - align = btf_align_of(btf, id); + align = btf__align_of(btf, id); /* size of a non-packed struct has to be a multiple of its alignment*/ - if (t->size % align) + if (align && t->size % align) return true; m = btf_members(t); vlen = btf_vlen(t); /* all non-bitfield fields have to be naturally aligned */ for (i = 0; i < vlen; i++, m++) { - align = btf_align_of(btf, m->type); + align = btf__align_of(btf, m->type); bit_sz = btf_member_bitfield_size(t, i); - if (bit_sz == 0 && m->offset % (8 * align) != 0) + if (align && bit_sz == 0 && m->offset % (8 * align) != 0) return true; } @@ -889,7 +855,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, fname = btf_name_of(d, m->name_off); m_sz = btf_member_bitfield_size(t, i); m_off = btf_member_bit_offset(t, i); - align = packed ? 1 : btf_align_of(d->btf, m->type); + align = packed ? 1 : btf__align_of(d->btf, m->type); btf_dump_emit_bit_padding(d, off, m_off, m_sz, align, lvl + 1); btf_dump_printf(d, "\n%s", pfx(lvl + 1)); @@ -907,7 +873,7 @@ static void btf_dump_emit_struct_def(struct btf_dump *d, /* pad at the end, if necessary */ if (is_struct) { - align = packed ? 1 : btf_align_of(d->btf, id); + align = packed ? 1 : btf__align_of(d->btf, id); btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, lvl + 1); } @@ -1051,6 +1017,21 @@ static int btf_dump_push_decl_stack_id(struct btf_dump *d, __u32 id) * of a stack frame. Some care is required to "pop" stack frames after * processing type declaration chain. */ +int btf_dump__emit_type_decl(struct btf_dump *d, __u32 id, + const struct btf_dump_emit_type_decl_opts *opts) +{ + const char *fname; + int lvl; + + if (!OPTS_VALID(opts, btf_dump_emit_type_decl_opts)) + return -EINVAL; + + fname = OPTS_GET(opts, field_name, NULL); + lvl = OPTS_GET(opts, indent_level, 0); + btf_dump_emit_type_decl(d, id, fname, lvl); + return 0; +} + static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id, const char *fname, int lvl) { diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 3f09772192f1..7513165b104f 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -18,6 +18,7 @@ #include <stdarg.h> #include <libgen.h> #include <inttypes.h> +#include <limits.h> #include <string.h> #include <unistd.h> #include <endian.h> @@ -41,9 +42,11 @@ #include <sys/types.h> #include <sys/vfs.h> #include <sys/utsname.h> +#include <sys/resource.h> #include <tools/libc_compat.h> #include <libelf.h> #include <gelf.h> +#include <zlib.h> #include "libbpf.h" #include "bpf.h" @@ -99,14 +102,33 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...) va_end(args); } -#define STRERR_BUFSIZE 128 +static void pr_perm_msg(int err) +{ + struct rlimit limit; + char buf[100]; + + if (err != -EPERM || geteuid() != 0) + return; + + err = getrlimit(RLIMIT_MEMLOCK, &limit); + if (err) + return; + + if (limit.rlim_cur == RLIM_INFINITY) + return; + + if (limit.rlim_cur < 1024) + snprintf(buf, sizeof(buf), "%zu bytes", (size_t)limit.rlim_cur); + else if (limit.rlim_cur < 1024*1024) + snprintf(buf, sizeof(buf), "%.1f KiB", (double)limit.rlim_cur / 1024); + else + snprintf(buf, sizeof(buf), "%.1f MiB", (double)limit.rlim_cur / (1024*1024)); -#define CHECK_ERR(action, err, out) do { \ - err = action; \ - if (err) \ - goto out; \ -} while (0) + pr_warn("permission error while running as root; try raising 'ulimit -l'? current value: %s\n", + buf); +} +#define STRERR_BUFSIZE 128 /* Copied from tools/perf/util/util.h */ #ifndef zfree @@ -146,6 +168,20 @@ struct bpf_capabilities { __u32 array_mmap:1; }; +enum reloc_type { + RELO_LD64, + RELO_CALL, + RELO_DATA, + RELO_EXTERN, +}; + +struct reloc_desc { + enum reloc_type type; + int insn_idx; + int map_idx; + int sym_off; +}; + /* * bpf_prog should be a better name but it has been used in * linux/filter.h. @@ -164,16 +200,7 @@ struct bpf_program { size_t insns_cnt, main_prog_cnt; enum bpf_prog_type type; - struct reloc_desc { - enum { - RELO_LD64, - RELO_CALL, - RELO_DATA, - } type; - int insn_idx; - int map_idx; - int sym_off; - } *reloc_desc; + struct reloc_desc *reloc_desc; int nr_reloc; int log_level; @@ -202,22 +229,29 @@ struct bpf_program { __u32 prog_flags; }; +#define DATA_SEC ".data" +#define BSS_SEC ".bss" +#define RODATA_SEC ".rodata" +#define KCONFIG_SEC ".kconfig" + enum libbpf_map_type { LIBBPF_MAP_UNSPEC, LIBBPF_MAP_DATA, LIBBPF_MAP_BSS, LIBBPF_MAP_RODATA, + LIBBPF_MAP_KCONFIG, }; static const char * const libbpf_type_to_btf_name[] = { - [LIBBPF_MAP_DATA] = ".data", - [LIBBPF_MAP_BSS] = ".bss", - [LIBBPF_MAP_RODATA] = ".rodata", + [LIBBPF_MAP_DATA] = DATA_SEC, + [LIBBPF_MAP_BSS] = BSS_SEC, + [LIBBPF_MAP_RODATA] = RODATA_SEC, + [LIBBPF_MAP_KCONFIG] = KCONFIG_SEC, }; struct bpf_map { - int fd; char *name; + int fd; int sec_idx; size_t sec_offset; int map_ifindex; @@ -228,14 +262,32 @@ struct bpf_map { void *priv; bpf_map_clear_priv_t clear_priv; enum libbpf_map_type libbpf_type; + void *mmaped; char *pin_path; bool pinned; bool reused; }; -struct bpf_secdata { - void *rodata; - void *data; +enum extern_type { + EXT_UNKNOWN, + EXT_CHAR, + EXT_BOOL, + EXT_INT, + EXT_TRISTATE, + EXT_CHAR_ARR, +}; + +struct extern_desc { + const char *name; + int sym_idx; + int btf_id; + enum extern_type type; + int sz; + int align; + int data_off; + bool is_signed; + bool is_weak; + bool is_set; }; static LIST_HEAD(bpf_objects_list); @@ -250,7 +302,11 @@ struct bpf_object { struct bpf_map *maps; size_t nr_maps; size_t maps_cap; - struct bpf_secdata sections; + + char *kconfig; + struct extern_desc *externs; + int nr_extern; + int kconfig_map_idx; bool loaded; bool has_pseudo_calls; @@ -279,6 +335,7 @@ struct bpf_object { int maps_shndx; int btf_maps_shndx; int text_shndx; + int symbols_shndx; int data_shndx; int rodata_shndx; int bss_shndx; @@ -550,6 +607,7 @@ static struct bpf_object *bpf_object__new(const char *path, obj->efile.data_shndx = -1; obj->efile.rodata_shndx = -1; obj->efile.bss_shndx = -1; + obj->kconfig_map_idx = -1; obj->kern_version = get_kernel_version(); obj->loaded = false; @@ -748,13 +806,13 @@ int bpf_object__section_size(const struct bpf_object *obj, const char *name, *size = 0; if (!name) { return -EINVAL; - } else if (!strcmp(name, ".data")) { + } else if (!strcmp(name, DATA_SEC)) { if (obj->efile.data) *size = obj->efile.data->d_size; - } else if (!strcmp(name, ".bss")) { + } else if (!strcmp(name, BSS_SEC)) { if (obj->efile.bss) *size = obj->efile.bss->d_size; - } else if (!strcmp(name, ".rodata")) { + } else if (!strcmp(name, RODATA_SEC)) { if (obj->efile.rodata) *size = obj->efile.rodata->d_size; } else { @@ -835,13 +893,38 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj) return &obj->maps[obj->nr_maps++]; } +static size_t bpf_map_mmap_sz(const struct bpf_map *map) +{ + long page_sz = sysconf(_SC_PAGE_SIZE); + size_t map_sz; + + map_sz = roundup(map->def.value_size, 8) * map->def.max_entries; + map_sz = roundup(map_sz, page_sz); + return map_sz; +} + +static char *internal_map_name(struct bpf_object *obj, + enum libbpf_map_type type) +{ + char map_name[BPF_OBJ_NAME_LEN]; + const char *sfx = libbpf_type_to_btf_name[type]; + int sfx_len = max((size_t)7, strlen(sfx)); + int pfx_len = min((size_t)BPF_OBJ_NAME_LEN - sfx_len - 1, + strlen(obj->name)); + + snprintf(map_name, sizeof(map_name), "%.*s%.*s", pfx_len, obj->name, + sfx_len, libbpf_type_to_btf_name[type]); + + return strdup(map_name); +} + static int bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, - int sec_idx, Elf_Data *data, void **data_buff) + int sec_idx, void *data, size_t data_sz) { - char map_name[BPF_OBJ_NAME_LEN]; struct bpf_map_def *def; struct bpf_map *map; + int err; map = bpf_object__add_map(obj); if (IS_ERR(map)) @@ -850,9 +933,7 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, map->libbpf_type = type; map->sec_idx = sec_idx; map->sec_offset = 0; - snprintf(map_name, sizeof(map_name), "%.8s%.7s", obj->name, - libbpf_type_to_btf_name[type]); - map->name = strdup(map_name); + map->name = internal_map_name(obj, type); if (!map->name) { pr_warn("failed to alloc map name\n"); return -ENOMEM; @@ -861,25 +942,29 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type, def = &map->def; def->type = BPF_MAP_TYPE_ARRAY; def->key_size = sizeof(int); - def->value_size = data->d_size; + def->value_size = data_sz; def->max_entries = 1; - def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; - if (obj->caps.array_mmap) - def->map_flags |= BPF_F_MMAPABLE; + def->map_flags = type == LIBBPF_MAP_RODATA || type == LIBBPF_MAP_KCONFIG + ? BPF_F_RDONLY_PROG : 0; + def->map_flags |= BPF_F_MMAPABLE; pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", - map_name, map->sec_idx, map->sec_offset, def->map_flags); + map->name, map->sec_idx, map->sec_offset, def->map_flags); - if (data_buff) { - *data_buff = malloc(data->d_size); - if (!*data_buff) { - zfree(&map->name); - pr_warn("failed to alloc map content buffer\n"); - return -ENOMEM; - } - memcpy(*data_buff, data->d_buf, data->d_size); + map->mmaped = mmap(NULL, bpf_map_mmap_sz(map), PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_ANONYMOUS, -1, 0); + if (map->mmaped == MAP_FAILED) { + err = -errno; + map->mmaped = NULL; + pr_warn("failed to alloc map '%s' content buffer: %d\n", + map->name, err); + zfree(&map->name); + return err; } + if (data) + memcpy(map->mmaped, data, data_sz); + pr_debug("map %td is \"%s\"\n", map - obj->maps, map->name); return 0; } @@ -888,37 +973,332 @@ static int bpf_object__init_global_data_maps(struct bpf_object *obj) { int err; - if (!obj->caps.global_data) - return 0; /* * Populate obj->maps with libbpf internal maps. */ if (obj->efile.data_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_DATA, obj->efile.data_shndx, - obj->efile.data, - &obj->sections.data); + obj->efile.data->d_buf, + obj->efile.data->d_size); if (err) return err; } if (obj->efile.rodata_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_RODATA, obj->efile.rodata_shndx, - obj->efile.rodata, - &obj->sections.rodata); + obj->efile.rodata->d_buf, + obj->efile.rodata->d_size); if (err) return err; } if (obj->efile.bss_shndx >= 0) { err = bpf_object__init_internal_map(obj, LIBBPF_MAP_BSS, obj->efile.bss_shndx, - obj->efile.bss, NULL); + NULL, + obj->efile.bss->d_size); if (err) return err; } return 0; } + +static struct extern_desc *find_extern_by_name(const struct bpf_object *obj, + const void *name) +{ + int i; + + for (i = 0; i < obj->nr_extern; i++) { + if (strcmp(obj->externs[i].name, name) == 0) + return &obj->externs[i]; + } + return NULL; +} + +static int set_ext_value_tri(struct extern_desc *ext, void *ext_val, + char value) +{ + switch (ext->type) { + case EXT_BOOL: + if (value == 'm') { + pr_warn("extern %s=%c should be tristate or char\n", + ext->name, value); + return -EINVAL; + } + *(bool *)ext_val = value == 'y' ? true : false; + break; + case EXT_TRISTATE: + if (value == 'y') + *(enum libbpf_tristate *)ext_val = TRI_YES; + else if (value == 'm') + *(enum libbpf_tristate *)ext_val = TRI_MODULE; + else /* value == 'n' */ + *(enum libbpf_tristate *)ext_val = TRI_NO; + break; + case EXT_CHAR: + *(char *)ext_val = value; + break; + case EXT_UNKNOWN: + case EXT_INT: + case EXT_CHAR_ARR: + default: + pr_warn("extern %s=%c should be bool, tristate, or char\n", + ext->name, value); + return -EINVAL; + } + ext->is_set = true; + return 0; +} + +static int set_ext_value_str(struct extern_desc *ext, char *ext_val, + const char *value) +{ + size_t len; + + if (ext->type != EXT_CHAR_ARR) { + pr_warn("extern %s=%s should char array\n", ext->name, value); + return -EINVAL; + } + + len = strlen(value); + if (value[len - 1] != '"') { + pr_warn("extern '%s': invalid string config '%s'\n", + ext->name, value); + return -EINVAL; + } + + /* strip quotes */ + len -= 2; + if (len >= ext->sz) { + pr_warn("extern '%s': long string config %s of (%zu bytes) truncated to %d bytes\n", + ext->name, value, len, ext->sz - 1); + len = ext->sz - 1; + } + memcpy(ext_val, value + 1, len); + ext_val[len] = '\0'; + ext->is_set = true; + return 0; +} + +static int parse_u64(const char *value, __u64 *res) +{ + char *value_end; + int err; + + errno = 0; + *res = strtoull(value, &value_end, 0); + if (errno) { + err = -errno; + pr_warn("failed to parse '%s' as integer: %d\n", value, err); + return err; + } + if (*value_end) { + pr_warn("failed to parse '%s' as integer completely\n", value); + return -EINVAL; + } + return 0; +} + +static bool is_ext_value_in_range(const struct extern_desc *ext, __u64 v) +{ + int bit_sz = ext->sz * 8; + + if (ext->sz == 8) + return true; + + /* Validate that value stored in u64 fits in integer of `ext->sz` + * bytes size without any loss of information. If the target integer + * is signed, we rely on the following limits of integer type of + * Y bits and subsequent transformation: + * + * -2^(Y-1) <= X <= 2^(Y-1) - 1 + * 0 <= X + 2^(Y-1) <= 2^Y - 1 + * 0 <= X + 2^(Y-1) < 2^Y + * + * For unsigned target integer, check that all the (64 - Y) bits are + * zero. + */ + if (ext->is_signed) + return v + (1ULL << (bit_sz - 1)) < (1ULL << bit_sz); + else + return (v >> bit_sz) == 0; +} + +static int set_ext_value_num(struct extern_desc *ext, void *ext_val, + __u64 value) +{ + if (ext->type != EXT_INT && ext->type != EXT_CHAR) { + pr_warn("extern %s=%llu should be integer\n", + ext->name, (unsigned long long)value); + return -EINVAL; + } + if (!is_ext_value_in_range(ext, value)) { + pr_warn("extern %s=%llu value doesn't fit in %d bytes\n", + ext->name, (unsigned long long)value, ext->sz); + return -ERANGE; + } + switch (ext->sz) { + case 1: *(__u8 *)ext_val = value; break; + case 2: *(__u16 *)ext_val = value; break; + case 4: *(__u32 *)ext_val = value; break; + case 8: *(__u64 *)ext_val = value; break; + default: + return -EINVAL; + } + ext->is_set = true; + return 0; +} + +static int bpf_object__process_kconfig_line(struct bpf_object *obj, + char *buf, void *data) +{ + struct extern_desc *ext; + char *sep, *value; + int len, err = 0; + void *ext_val; + __u64 num; + + if (strncmp(buf, "CONFIG_", 7)) + return 0; + + sep = strchr(buf, '='); + if (!sep) { + pr_warn("failed to parse '%s': no separator\n", buf); + return -EINVAL; + } + + /* Trim ending '\n' */ + len = strlen(buf); + if (buf[len - 1] == '\n') + buf[len - 1] = '\0'; + /* Split on '=' and ensure that a value is present. */ + *sep = '\0'; + if (!sep[1]) { + *sep = '='; + pr_warn("failed to parse '%s': no value\n", buf); + return -EINVAL; + } + + ext = find_extern_by_name(obj, buf); + if (!ext || ext->is_set) + return 0; + + ext_val = data + ext->data_off; + value = sep + 1; + + switch (*value) { + case 'y': case 'n': case 'm': + err = set_ext_value_tri(ext, ext_val, *value); + break; + case '"': + err = set_ext_value_str(ext, ext_val, value); + break; + default: + /* assume integer */ + err = parse_u64(value, &num); + if (err) { + pr_warn("extern %s=%s should be integer\n", + ext->name, value); + return err; + } + err = set_ext_value_num(ext, ext_val, num); + break; + } + if (err) + return err; + pr_debug("extern %s=%s\n", ext->name, value); + return 0; +} + +static int bpf_object__read_kconfig_file(struct bpf_object *obj, void *data) +{ + char buf[PATH_MAX]; + struct utsname uts; + int len, err = 0; + gzFile file; + + uname(&uts); + len = snprintf(buf, PATH_MAX, "/boot/config-%s", uts.release); + if (len < 0) + return -EINVAL; + else if (len >= PATH_MAX) + return -ENAMETOOLONG; + + /* gzopen also accepts uncompressed files. */ + file = gzopen(buf, "r"); + if (!file) + file = gzopen("/proc/config.gz", "r"); + + if (!file) { + pr_warn("failed to open system Kconfig\n"); + return -ENOENT; + } + + while (gzgets(file, buf, sizeof(buf))) { + err = bpf_object__process_kconfig_line(obj, buf, data); + if (err) { + pr_warn("error parsing system Kconfig line '%s': %d\n", + buf, err); + goto out; + } + } + +out: + gzclose(file); + return err; +} + +static int bpf_object__read_kconfig_mem(struct bpf_object *obj, + const char *config, void *data) +{ + char buf[PATH_MAX]; + int err = 0; + FILE *file; + + file = fmemopen((void *)config, strlen(config), "r"); + if (!file) { + err = -errno; + pr_warn("failed to open in-memory Kconfig: %d\n", err); + return err; + } + + while (fgets(buf, sizeof(buf), file)) { + err = bpf_object__process_kconfig_line(obj, buf, data); + if (err) { + pr_warn("error parsing in-memory Kconfig line '%s': %d\n", + buf, err); + break; + } + } + + fclose(file); + return err; +} + +static int bpf_object__init_kconfig_map(struct bpf_object *obj) +{ + struct extern_desc *last_ext; + size_t map_sz; + int err; + + if (obj->nr_extern == 0) + return 0; + + last_ext = &obj->externs[obj->nr_extern - 1]; + map_sz = last_ext->data_off + last_ext->sz; + + err = bpf_object__init_internal_map(obj, LIBBPF_MAP_KCONFIG, + obj->efile.symbols_shndx, + NULL, map_sz); + if (err) + return err; + + obj->kconfig_map_idx = obj->nr_maps - 1; + + return 0; +} + static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict) { Elf_Data *symbols = obj->efile.symbols; @@ -1242,15 +1622,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n", + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found key [%u], sz = %lld.\n", - map_name, t->type, sz); + pr_debug("map '%s': found key [%u], sz = %zd.\n", + map_name, t->type, (ssize_t)sz); if (map->def.key_size && map->def.key_size != sz) { - pr_warn("map '%s': conflicting key size %u != %lld.\n", - map_name, map->def.key_size, sz); + pr_warn("map '%s': conflicting key size %u != %zd.\n", + map_name, map->def.key_size, (ssize_t)sz); return -EINVAL; } map->def.key_size = sz; @@ -1285,15 +1665,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj, } sz = btf__resolve_size(obj->btf, t->type); if (sz < 0) { - pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", - map_name, t->type, sz); + pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n", + map_name, t->type, (ssize_t)sz); return sz; } - pr_debug("map '%s': found value [%u], sz = %lld.\n", - map_name, t->type, sz); + pr_debug("map '%s': found value [%u], sz = %zd.\n", + map_name, t->type, (ssize_t)sz); if (map->def.value_size && map->def.value_size != sz) { - pr_warn("map '%s': conflicting value size %u != %lld.\n", - map_name, map->def.value_size, sz); + pr_warn("map '%s': conflicting value size %u != %zd.\n", + map_name, map->def.value_size, (ssize_t)sz); return -EINVAL; } map->def.value_size = sz; @@ -1393,21 +1773,20 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, return 0; } -static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps, - const char *pin_root_path) +static int bpf_object__init_maps(struct bpf_object *obj, + const struct bpf_object_open_opts *opts) { - bool strict = !relaxed_maps; + const char *pin_root_path; + bool strict; int err; - err = bpf_object__init_user_maps(obj, strict); - if (err) - return err; - - err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path); - if (err) - return err; + strict = !OPTS_GET(opts, relaxed_maps, false); + pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - err = bpf_object__init_global_data_maps(obj); + err = bpf_object__init_user_maps(obj, strict); + err = err ?: bpf_object__init_user_btf_maps(obj, strict, pin_root_path); + err = err ?: bpf_object__init_global_data_maps(obj); + err = err ?: bpf_object__init_kconfig_map(obj); if (err) return err; @@ -1509,7 +1888,8 @@ static void bpf_object__sanitize_btf_ext(struct bpf_object *obj) static bool bpf_object__is_btf_mandatory(const struct bpf_object *obj) { - return obj->efile.btf_maps_shndx >= 0; + return obj->efile.btf_maps_shndx >= 0 || + obj->nr_extern > 0; } static int bpf_object__init_btf(struct bpf_object *obj, @@ -1526,11 +1906,6 @@ static int bpf_object__init_btf(struct bpf_object *obj, BTF_ELF_SEC, err); goto out; } - err = btf__finalize_data(obj, obj->btf); - if (err) { - pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); - goto out; - } } if (btf_ext_data) { if (!obj->btf) { @@ -1564,6 +1939,30 @@ out: return 0; } +static int bpf_object__finalize_btf(struct bpf_object *obj) +{ + int err; + + if (!obj->btf) + return 0; + + err = btf__finalize_data(obj, obj->btf); + if (!err) + return 0; + + pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + btf_ext__free(obj->btf_ext); + obj->btf_ext = NULL; + + if (bpf_object__is_btf_mandatory(obj)) { + pr_warn("BTF is required, but is missing or corrupted.\n"); + return -ENOENT; + } + return 0; +} + static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) { int err = 0; @@ -1592,8 +1991,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) return 0; } -static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, - const char *pin_root_path) +static int bpf_object__elf_collect(struct bpf_object *obj) { Elf *elf = obj->efile.elf; GElf_Ehdr *ep = &obj->efile.ehdr; @@ -1665,6 +2063,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, return -LIBBPF_ERRNO__FORMAT; } obj->efile.symbols = data; + obj->efile.symbols_shndx = idx; obj->efile.strtabidx = sh.sh_link; } else if (sh.sh_type == SHT_PROGBITS && data->d_size > 0) { if (sh.sh_flags & SHF_EXECINSTR) { @@ -1683,10 +2082,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, name, obj->path, cp); return err; } - } else if (strcmp(name, ".data") == 0) { + } else if (strcmp(name, DATA_SEC) == 0) { obj->efile.data = data; obj->efile.data_shndx = idx; - } else if (strcmp(name, ".rodata") == 0) { + } else if (strcmp(name, RODATA_SEC) == 0) { obj->efile.rodata = data; obj->efile.rodata_shndx = idx; } else { @@ -1716,7 +2115,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, obj->efile.reloc_sects[nr_sects].shdr = sh; obj->efile.reloc_sects[nr_sects].data = data; - } else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) { + } else if (sh.sh_type == SHT_NOBITS && + strcmp(name, BSS_SEC) == 0) { obj->efile.bss = data; obj->efile.bss_shndx = idx; } else { @@ -1728,14 +2128,217 @@ static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, pr_warn("Corrupted ELF file: index of strtab invalid\n"); return -LIBBPF_ERRNO__FORMAT; } - err = bpf_object__init_btf(obj, btf_data, btf_ext_data); - if (!err) - err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path); - if (!err) - err = bpf_object__sanitize_and_load_btf(obj); - if (!err) - err = bpf_object__init_prog_names(obj); - return err; + return bpf_object__init_btf(obj, btf_data, btf_ext_data); +} + +static bool sym_is_extern(const GElf_Sym *sym) +{ + int bind = GELF_ST_BIND(sym->st_info); + /* externs are symbols w/ type=NOTYPE, bind=GLOBAL|WEAK, section=UND */ + return sym->st_shndx == SHN_UNDEF && + (bind == STB_GLOBAL || bind == STB_WEAK) && + GELF_ST_TYPE(sym->st_info) == STT_NOTYPE; +} + +static int find_extern_btf_id(const struct btf *btf, const char *ext_name) +{ + const struct btf_type *t; + const char *var_name; + int i, n; + + if (!btf) + return -ESRCH; + + n = btf__get_nr_types(btf); + for (i = 1; i <= n; i++) { + t = btf__type_by_id(btf, i); + + if (!btf_is_var(t)) + continue; + + var_name = btf__name_by_offset(btf, t->name_off); + if (strcmp(var_name, ext_name)) + continue; + + if (btf_var(t)->linkage != BTF_VAR_GLOBAL_EXTERN) + return -EINVAL; + + return i; + } + + return -ENOENT; +} + +static enum extern_type find_extern_type(const struct btf *btf, int id, + bool *is_signed) +{ + const struct btf_type *t; + const char *name; + + t = skip_mods_and_typedefs(btf, id, NULL); + name = btf__name_by_offset(btf, t->name_off); + + if (is_signed) + *is_signed = false; + switch (btf_kind(t)) { + case BTF_KIND_INT: { + int enc = btf_int_encoding(t); + + if (enc & BTF_INT_BOOL) + return t->size == 1 ? EXT_BOOL : EXT_UNKNOWN; + if (is_signed) + *is_signed = enc & BTF_INT_SIGNED; + if (t->size == 1) + return EXT_CHAR; + if (t->size < 1 || t->size > 8 || (t->size & (t->size - 1))) + return EXT_UNKNOWN; + return EXT_INT; + } + case BTF_KIND_ENUM: + if (t->size != 4) + return EXT_UNKNOWN; + if (strcmp(name, "libbpf_tristate")) + return EXT_UNKNOWN; + return EXT_TRISTATE; + case BTF_KIND_ARRAY: + if (btf_array(t)->nelems == 0) + return EXT_UNKNOWN; + if (find_extern_type(btf, btf_array(t)->type, NULL) != EXT_CHAR) + return EXT_UNKNOWN; + return EXT_CHAR_ARR; + default: + return EXT_UNKNOWN; + } +} + +static int cmp_externs(const void *_a, const void *_b) +{ + const struct extern_desc *a = _a; + const struct extern_desc *b = _b; + + /* descending order by alignment requirements */ + if (a->align != b->align) + return a->align > b->align ? -1 : 1; + /* ascending order by size, within same alignment class */ + if (a->sz != b->sz) + return a->sz < b->sz ? -1 : 1; + /* resolve ties by name */ + return strcmp(a->name, b->name); +} + +static int bpf_object__collect_externs(struct bpf_object *obj) +{ + const struct btf_type *t; + struct extern_desc *ext; + int i, n, off, btf_id; + struct btf_type *sec; + const char *ext_name; + Elf_Scn *scn; + GElf_Shdr sh; + + if (!obj->efile.symbols) + return 0; + + scn = elf_getscn(obj->efile.elf, obj->efile.symbols_shndx); + if (!scn) + return -LIBBPF_ERRNO__FORMAT; + if (gelf_getshdr(scn, &sh) != &sh) + return -LIBBPF_ERRNO__FORMAT; + n = sh.sh_size / sh.sh_entsize; + + pr_debug("looking for externs among %d symbols...\n", n); + for (i = 0; i < n; i++) { + GElf_Sym sym; + + if (!gelf_getsym(obj->efile.symbols, i, &sym)) + return -LIBBPF_ERRNO__FORMAT; + if (!sym_is_extern(&sym)) + continue; + ext_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, + sym.st_name); + if (!ext_name || !ext_name[0]) + continue; + + ext = obj->externs; + ext = reallocarray(ext, obj->nr_extern + 1, sizeof(*ext)); + if (!ext) + return -ENOMEM; + obj->externs = ext; + ext = &ext[obj->nr_extern]; + memset(ext, 0, sizeof(*ext)); + obj->nr_extern++; + + ext->btf_id = find_extern_btf_id(obj->btf, ext_name); + if (ext->btf_id <= 0) { + pr_warn("failed to find BTF for extern '%s': %d\n", + ext_name, ext->btf_id); + return ext->btf_id; + } + t = btf__type_by_id(obj->btf, ext->btf_id); + ext->name = btf__name_by_offset(obj->btf, t->name_off); + ext->sym_idx = i; + ext->is_weak = GELF_ST_BIND(sym.st_info) == STB_WEAK; + ext->sz = btf__resolve_size(obj->btf, t->type); + if (ext->sz <= 0) { + pr_warn("failed to resolve size of extern '%s': %d\n", + ext_name, ext->sz); + return ext->sz; + } + ext->align = btf__align_of(obj->btf, t->type); + if (ext->align <= 0) { + pr_warn("failed to determine alignment of extern '%s': %d\n", + ext_name, ext->align); + return -EINVAL; + } + ext->type = find_extern_type(obj->btf, t->type, + &ext->is_signed); + if (ext->type == EXT_UNKNOWN) { + pr_warn("extern '%s' type is unsupported\n", ext_name); + return -ENOTSUP; + } + } + pr_debug("collected %d externs total\n", obj->nr_extern); + + if (!obj->nr_extern) + return 0; + + /* sort externs by (alignment, size, name) and calculate their offsets + * within a map */ + qsort(obj->externs, obj->nr_extern, sizeof(*ext), cmp_externs); + off = 0; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + ext->data_off = roundup(off, ext->align); + off = ext->data_off + ext->sz; + pr_debug("extern #%d: symbol %d, off %u, name %s\n", + i, ext->sym_idx, ext->data_off, ext->name); + } + + btf_id = btf__find_by_name(obj->btf, KCONFIG_SEC); + if (btf_id <= 0) { + pr_warn("no BTF info found for '%s' datasec\n", KCONFIG_SEC); + return -ESRCH; + } + + sec = (struct btf_type *)btf__type_by_id(obj->btf, btf_id); + sec->size = off; + n = btf_vlen(sec); + for (i = 0; i < n; i++) { + struct btf_var_secinfo *vs = btf_var_secinfos(sec) + i; + + t = btf__type_by_id(obj->btf, vs->type); + ext_name = btf__name_by_offset(obj->btf, t->name_off); + ext = find_extern_by_name(obj, ext_name); + if (!ext) { + pr_warn("failed to find extern definition for BTF var '%s'\n", + ext_name); + return -ESRCH; + } + vs->offset = ext->data_off; + btf_var(t)->linkage = BTF_VAR_GLOBAL_ALLOCATED; + } + + return 0; } static struct bpf_program * @@ -1765,6 +2368,19 @@ bpf_object__find_program_by_title(const struct bpf_object *obj, return NULL; } +struct bpf_program * +bpf_object__find_program_by_name(const struct bpf_object *obj, + const char *name) +{ + struct bpf_program *prog; + + bpf_object__for_each_program(prog, obj) { + if (!strcmp(prog->name, name)) + return prog; + } + return NULL; +} + static bool bpf_object__shndx_is_data(const struct bpf_object *obj, int shndx) { @@ -1789,6 +2405,8 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx) return LIBBPF_MAP_BSS; else if (shndx == obj->efile.rodata_shndx) return LIBBPF_MAP_RODATA; + else if (shndx == obj->efile.symbols_shndx) + return LIBBPF_MAP_KCONFIG; else return LIBBPF_MAP_UNSPEC; } @@ -1817,7 +2435,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, return -LIBBPF_ERRNO__RELOC; } if (sym->st_value % 8) { - pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value); + pr_warn("bad call relo offset: %zu\n", + (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_CALL; @@ -1832,6 +2451,30 @@ static int bpf_program__record_reloc(struct bpf_program *prog, insn_idx, insn->code); return -LIBBPF_ERRNO__RELOC; } + + if (sym_is_extern(sym)) { + int sym_idx = GELF_R_SYM(rel->r_info); + int i, n = obj->nr_extern; + struct extern_desc *ext; + + for (i = 0; i < n; i++) { + ext = &obj->externs[i]; + if (ext->sym_idx == sym_idx) + break; + } + if (i >= n) { + pr_warn("extern relo failed to find extern for sym %d\n", + sym_idx); + return -LIBBPF_ERRNO__RELOC; + } + pr_debug("found extern #%d '%s' (sym %d, off %u) for insn %u\n", + i, ext->name, ext->sym_idx, ext->data_off, insn_idx); + reloc_desc->type = RELO_EXTERN; + reloc_desc->insn_idx = insn_idx; + reloc_desc->sym_off = ext->data_off; + return 0; + } + if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", name, shdr_idx); @@ -1859,8 +2502,8 @@ static int bpf_program__record_reloc(struct bpf_program *prog, break; } if (map_idx >= nr_maps) { - pr_warn("map relo failed to find map for sec %u, off %llu\n", - shdr_idx, (__u64)sym->st_value); + pr_warn("map relo failed to find map for sec %u, off %zu\n", + shdr_idx, (size_t)sym->st_value); return -LIBBPF_ERRNO__RELOC; } reloc_desc->type = RELO_LD64; @@ -1875,11 +2518,6 @@ static int bpf_program__record_reloc(struct bpf_program *prog, pr_warn("bad data relo against section %u\n", shdr_idx); return -LIBBPF_ERRNO__RELOC; } - if (!obj->caps.global_data) { - pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", - name, insn_idx); - return -LIBBPF_ERRNO__RELOC; - } for (map_idx = 0; map_idx < nr_maps; map_idx++) { map = &obj->maps[map_idx]; if (map->libbpf_type != type) @@ -1941,9 +2579,9 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, name = elf_strptr(obj->efile.elf, obj->efile.strtabidx, sym.st_name) ? : "<?>"; - pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n", - (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info), - (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info), + pr_debug("relo for shdr %u, symb %zu, value %zu, type %d, bind %d, name %d (\'%s\'), insn %u\n", + (__u32)sym.st_shndx, (size_t)GELF_R_SYM(rel.r_info), + (size_t)sym.st_value, GELF_ST_TYPE(sym.st_info), GELF_ST_BIND(sym.st_info), sym.st_name, name, insn_idx); @@ -2298,29 +2936,35 @@ bpf_object__reuse_map(struct bpf_map *map) static int bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map) { + enum libbpf_map_type map_type = map->libbpf_type; char *cp, errmsg[STRERR_BUFSIZE]; int err, zero = 0; - __u8 *data; - /* Nothing to do here since kernel already zero-initializes .bss map. */ - if (map->libbpf_type == LIBBPF_MAP_BSS) + /* kernel already zero-initializes .bss map. */ + if (map_type == LIBBPF_MAP_BSS) return 0; - data = map->libbpf_type == LIBBPF_MAP_DATA ? - obj->sections.data : obj->sections.rodata; + err = bpf_map_update_elem(map->fd, &zero, map->mmaped, 0); + if (err) { + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); + pr_warn("Error setting initial map(%s) contents: %s\n", + map->name, cp); + return err; + } - err = bpf_map_update_elem(map->fd, &zero, data, 0); - /* Freeze .rodata map as read-only from syscall side. */ - if (!err && map->libbpf_type == LIBBPF_MAP_RODATA) { + /* Freeze .rodata and .kconfig map as read-only from syscall side. */ + if (map_type == LIBBPF_MAP_RODATA || map_type == LIBBPF_MAP_KCONFIG) { err = bpf_map_freeze(map->fd); if (err) { - cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); + err = -errno; + cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("Error freezing map(%s) as read-only: %s\n", map->name, cp); - err = 0; + return err; } } - return err; + return 0; } static int @@ -2411,6 +3055,7 @@ err_out: cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); pr_warn("failed to create map (name: '%s'): %s(%d)\n", map->name, cp, err); + pr_perm_msg(err); for (j = 0; j < i; j++) zclose(obj->maps[j].fd); return err; @@ -2536,6 +3181,21 @@ static bool str_is_empty(const char *s) return !s || !s[0]; } +static bool is_flex_arr(const struct btf *btf, + const struct bpf_core_accessor *acc, + const struct btf_array *arr) +{ + const struct btf_type *t; + + /* not a flexible array, if not inside a struct or has non-zero size */ + if (!acc->name || arr->nelems > 0) + return false; + + /* has to be the last member of enclosing struct */ + t = btf__type_by_id(btf, acc->type_id); + return acc->idx == btf_vlen(t) - 1; +} + /* * Turn bpf_field_reloc into a low- and high-level spec representation, * validating correctness along the way, as well as calculating resulting @@ -2573,6 +3233,7 @@ static int bpf_core_spec_parse(const struct btf *btf, struct bpf_core_spec *spec) { int access_idx, parsed_len, i; + struct bpf_core_accessor *acc; const struct btf_type *t; const char *name; __u32 id; @@ -2620,6 +3281,7 @@ static int bpf_core_spec_parse(const struct btf *btf, return -EINVAL; access_idx = spec->raw_spec[i]; + acc = &spec->spec[spec->len]; if (btf_is_composite(t)) { const struct btf_member *m; @@ -2637,18 +3299,23 @@ static int bpf_core_spec_parse(const struct btf *btf, if (str_is_empty(name)) return -EINVAL; - spec->spec[spec->len].type_id = id; - spec->spec[spec->len].idx = access_idx; - spec->spec[spec->len].name = name; + acc->type_id = id; + acc->idx = access_idx; + acc->name = name; spec->len++; } id = m->type; } else if (btf_is_array(t)) { const struct btf_array *a = btf_array(t); + bool flex; t = skip_mods_and_typedefs(btf, a->type, &id); - if (!t || access_idx >= a->nelems) + if (!t) + return -EINVAL; + + flex = is_flex_arr(btf, acc - 1, a); + if (!flex && access_idx >= a->nelems) return -EINVAL; spec->spec[spec->len].type_id = id; @@ -2953,12 +3620,14 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec, */ if (i > 0) { const struct btf_array *a; + bool flex; if (!btf_is_array(targ_type)) return 0; a = btf_array(targ_type); - if (local_acc->idx >= a->nelems) + flex = is_flex_arr(targ_btf, targ_acc - 1, a); + if (!flex && local_acc->idx >= a->nelems) return 0; if (!skip_mods_and_typedefs(targ_btf, a->type, &targ_id)) @@ -3142,11 +3811,13 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, insn = &prog->insns[insn_idx]; class = BPF_CLASS(insn->code); - if (class == BPF_ALU || class == BPF_ALU64) { + switch (class) { + case BPF_ALU: + case BPF_ALU64: if (BPF_SRC(insn->code) != BPF_K) return -EINVAL; if (!failed && validate && insn->imm != orig_val) { - pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n", + pr_warn("prog '%s': unexpected insn #%d (ALU/ALU64) value: got %u, exp %u -> %u\n", bpf_program__title(prog, false), insn_idx, insn->imm, orig_val, new_val); return -EINVAL; @@ -3156,7 +3827,29 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", bpf_program__title(prog, false), insn_idx, failed ? " w/ failed reloc" : "", orig_val, new_val); - } else { + break; + case BPF_LDX: + case BPF_ST: + case BPF_STX: + if (!failed && validate && insn->off != orig_val) { + pr_warn("prog '%s': unexpected insn #%d (LD/LDX/ST/STX) value: got %u, exp %u -> %u\n", + bpf_program__title(prog, false), insn_idx, + insn->off, orig_val, new_val); + return -EINVAL; + } + if (new_val > SHRT_MAX) { + pr_warn("prog '%s': insn #%d (LD/LDX/ST/STX) value too big: %u\n", + bpf_program__title(prog, false), insn_idx, + new_val); + return -ERANGE; + } + orig_val = insn->off; + insn->off = new_val; + pr_debug("prog '%s': patched insn #%d (LD/LDX/ST/STX)%s off %u -> %u\n", + bpf_program__title(prog, false), insn_idx, + failed ? " w/ failed reloc" : "", orig_val, new_val); + break; + default: pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", bpf_program__title(prog, false), insn_idx, insn->code, insn->src_reg, insn->dst_reg, @@ -3559,9 +4252,6 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj, size_t new_cnt; int err; - if (relo->type != RELO_CALL) - return -LIBBPF_ERRNO__RELOC; - if (prog->idx == obj->efile.text_shndx) { pr_warn("relo in .text insn %d into off %d (insn #%d)\n", relo->insn_idx, relo->sym_off, relo->sym_off / 8); @@ -3623,27 +4313,37 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj) for (i = 0; i < prog->nr_reloc; i++) { struct reloc_desc *relo = &prog->reloc_desc[i]; + struct bpf_insn *insn = &prog->insns[relo->insn_idx]; - if (relo->type == RELO_LD64 || relo->type == RELO_DATA) { - struct bpf_insn *insn = &prog->insns[relo->insn_idx]; - - if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { - pr_warn("relocation out of range: '%s'\n", - prog->section_name); - return -LIBBPF_ERRNO__RELOC; - } + if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { + pr_warn("relocation out of range: '%s'\n", + prog->section_name); + return -LIBBPF_ERRNO__RELOC; + } - if (relo->type != RELO_DATA) { - insn[0].src_reg = BPF_PSEUDO_MAP_FD; - } else { - insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; - insn[1].imm = insn[0].imm + relo->sym_off; - } + switch (relo->type) { + case RELO_LD64: + insn[0].src_reg = BPF_PSEUDO_MAP_FD; + insn[0].imm = obj->maps[relo->map_idx].fd; + break; + case RELO_DATA: + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[1].imm = insn[0].imm + relo->sym_off; insn[0].imm = obj->maps[relo->map_idx].fd; - } else if (relo->type == RELO_CALL) { + break; + case RELO_EXTERN: + insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; + insn[0].imm = obj->maps[obj->kconfig_map_idx].fd; + insn[1].imm = relo->sym_off; + break; + case RELO_CALL: err = bpf_program__reloc_text(prog, obj, relo); if (err) return err; + break; + default: + pr_warn("relo #%d: bad relo type %d\n", i, relo->type); + return -EINVAL; } } @@ -3778,6 +4478,7 @@ retry_load: ret = -errno; cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); pr_warn("load bpf program failed: %s\n", cp); + pr_perm_msg(ret); if (log_buf && log_buf[0] != '\0') { ret = -LIBBPF_ERRNO__VERIFY; @@ -3807,11 +4508,22 @@ out: return ret; } -int -bpf_program__load(struct bpf_program *prog, - char *license, __u32 kern_version) +static int libbpf_find_attach_btf_id(const char *name, + enum bpf_attach_type attach_type, + __u32 attach_prog_fd); + +int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) { - int err = 0, fd, i; + int err = 0, fd, i, btf_id; + + if (prog->type == BPF_PROG_TYPE_TRACING) { + btf_id = libbpf_find_attach_btf_id(prog->section_name, + prog->expected_attach_type, + prog->attach_prog_fd); + if (btf_id <= 0) + return btf_id; + prog->attach_btf_id = btf_id; + } if (prog->instances.nr < 0 || !prog->instances.fds) { if (prog->preprocessor) { @@ -3835,7 +4547,7 @@ bpf_program__load(struct bpf_program *prog, prog->section_name, prog->instances.nr); } err = load_program(prog, prog->insns, prog->insns_cnt, - license, kern_version, &fd); + license, kern_ver, &fd); if (!err) prog->instances.fds[0] = fd; goto out; @@ -3864,9 +4576,7 @@ bpf_program__load(struct bpf_program *prog, } err = load_program(prog, result.new_insn_ptr, - result.new_insn_cnt, - license, kern_version, &fd); - + result.new_insn_cnt, license, kern_ver, &fd); if (err) { pr_warn("Loading the %dth instance of program '%s' failed\n", i, prog->section_name); @@ -3910,20 +4620,14 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level) return 0; } -static int libbpf_find_attach_btf_id(const char *name, - enum bpf_attach_type attach_type, - __u32 attach_prog_fd); static struct bpf_object * __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts) + const struct bpf_object_open_opts *opts) { - const char *pin_root_path; + const char *obj_name, *kconfig; struct bpf_program *prog; struct bpf_object *obj; - const char *obj_name; char tmp_name[64]; - bool relaxed_maps; - __u32 attach_prog_fd; int err; if (elf_version(EV_CURRENT) == EV_NONE) { @@ -3952,16 +4656,23 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, return obj; obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); - relaxed_maps = OPTS_GET(opts, relaxed_maps, false); - pin_root_path = OPTS_GET(opts, pin_root_path, NULL); - attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); - - CHECK_ERR(bpf_object__elf_init(obj), err, out); - CHECK_ERR(bpf_object__check_endianness(obj), err, out); - CHECK_ERR(bpf_object__probe_caps(obj), err, out); - CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path), - err, out); - CHECK_ERR(bpf_object__collect_reloc(obj), err, out); + kconfig = OPTS_GET(opts, kconfig, NULL); + if (kconfig) { + obj->kconfig = strdup(kconfig); + if (!obj->kconfig) + return ERR_PTR(-ENOMEM); + } + + err = bpf_object__elf_init(obj); + err = err ? : bpf_object__check_endianness(obj); + err = err ? : bpf_object__elf_collect(obj); + err = err ? : bpf_object__collect_externs(obj); + err = err ? : bpf_object__finalize_btf(obj); + err = err ? : bpf_object__init_maps(obj, opts); + err = err ? : bpf_object__init_prog_names(obj); + err = err ? : bpf_object__collect_reloc(obj); + if (err) + goto out; bpf_object__elf_finish(obj); bpf_object__for_each_program(prog, obj) { @@ -3978,15 +4689,8 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, bpf_program__set_type(prog, prog_type); bpf_program__set_expected_attach_type(prog, attach_type); - if (prog_type == BPF_PROG_TYPE_TRACING) { - err = libbpf_find_attach_btf_id(prog->section_name, - attach_type, - attach_prog_fd); - if (err <= 0) - goto out; - prog->attach_btf_id = err; - prog->attach_prog_fd = attach_prog_fd; - } + if (prog_type == BPF_PROG_TYPE_TRACING) + prog->attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); } return obj; @@ -4026,7 +4730,7 @@ struct bpf_object *bpf_object__open(const char *path) } struct bpf_object * -bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) +bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts) { if (!path) return ERR_PTR(-EINVAL); @@ -4038,7 +4742,7 @@ bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts) struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts) + const struct bpf_object_open_opts *opts) { if (!obj_buf || obj_buf_sz == 0) return ERR_PTR(-EINVAL); @@ -4079,6 +4783,92 @@ int bpf_object__unload(struct bpf_object *obj) return 0; } +static int bpf_object__sanitize_maps(struct bpf_object *obj) +{ + struct bpf_map *m; + + bpf_object__for_each_map(m, obj) { + if (!bpf_map__is_internal(m)) + continue; + if (!obj->caps.global_data) { + pr_warn("kernel doesn't support global data\n"); + return -ENOTSUP; + } + if (!obj->caps.array_mmap) + m->def.map_flags ^= BPF_F_MMAPABLE; + } + + return 0; +} + +static int bpf_object__resolve_externs(struct bpf_object *obj, + const char *extra_kconfig) +{ + bool need_config = false; + struct extern_desc *ext; + int err, i; + void *data; + + if (obj->nr_extern == 0) + return 0; + + data = obj->maps[obj->kconfig_map_idx].mmaped; + + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + + if (strcmp(ext->name, "LINUX_KERNEL_VERSION") == 0) { + void *ext_val = data + ext->data_off; + __u32 kver = get_kernel_version(); + + if (!kver) { + pr_warn("failed to get kernel version\n"); + return -EINVAL; + } + err = set_ext_value_num(ext, ext_val, kver); + if (err) + return err; + pr_debug("extern %s=0x%x\n", ext->name, kver); + } else if (strncmp(ext->name, "CONFIG_", 7) == 0) { + need_config = true; + } else { + pr_warn("unrecognized extern '%s'\n", ext->name); + return -EINVAL; + } + } + if (need_config && extra_kconfig) { + err = bpf_object__read_kconfig_mem(obj, extra_kconfig, data); + if (err) + return -EINVAL; + need_config = false; + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + if (!ext->is_set) { + need_config = true; + break; + } + } + } + if (need_config) { + err = bpf_object__read_kconfig_file(obj, data); + if (err) + return -EINVAL; + } + for (i = 0; i < obj->nr_extern; i++) { + ext = &obj->externs[i]; + + if (!ext->is_set && !ext->is_weak) { + pr_warn("extern %s (strong) not resolved\n", ext->name); + return -ESRCH; + } else if (!ext->is_set) { + pr_debug("extern %s (weak) not resolved, defaulting to zero\n", + ext->name); + } + } + + return 0; +} + int bpf_object__load_xattr(struct bpf_object_load_attr *attr) { struct bpf_object *obj; @@ -4097,9 +4887,15 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr) obj->loaded = true; - CHECK_ERR(bpf_object__create_maps(obj), err, out); - CHECK_ERR(bpf_object__relocate(obj, attr->target_btf_path), err, out); - CHECK_ERR(bpf_object__load_progs(obj, attr->log_level), err, out); + err = bpf_object__probe_caps(obj); + err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); + err = err ? : bpf_object__sanitize_and_load_btf(obj); + err = err ? : bpf_object__sanitize_maps(obj); + err = err ? : bpf_object__create_maps(obj); + err = err ? : bpf_object__relocate(obj, attr->target_btf_path); + err = err ? : bpf_object__load_progs(obj, attr->log_level); + if (err) + goto out; return 0; out: @@ -4670,17 +5466,26 @@ void bpf_object__close(struct bpf_object *obj) btf_ext__free(obj->btf_ext); for (i = 0; i < obj->nr_maps; i++) { - zfree(&obj->maps[i].name); - zfree(&obj->maps[i].pin_path); - if (obj->maps[i].clear_priv) - obj->maps[i].clear_priv(&obj->maps[i], - obj->maps[i].priv); - obj->maps[i].priv = NULL; - obj->maps[i].clear_priv = NULL; + struct bpf_map *map = &obj->maps[i]; + + if (map->clear_priv) + map->clear_priv(map, map->priv); + map->priv = NULL; + map->clear_priv = NULL; + + if (map->mmaped) { + munmap(map->mmaped, bpf_map_mmap_sz(map)); + map->mmaped = NULL; + } + + zfree(&map->name); + zfree(&map->pin_path); } - zfree(&obj->sections.rodata); - zfree(&obj->sections.data); + zfree(&obj->kconfig); + zfree(&obj->externs); + obj->nr_extern = 0; + zfree(&obj->maps); obj->nr_maps = 0; @@ -4820,6 +5625,11 @@ void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex) prog->prog_ifindex = ifindex; } +const char *bpf_program__name(const struct bpf_program *prog) +{ + return prog->name; +} + const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy) { const char *title; @@ -4972,7 +5782,28 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog, */ #define BPF_APROG_COMPAT(string, ptype) BPF_PROG_SEC(string, ptype) -static const struct { +#define SEC_DEF(sec_pfx, ptype, ...) { \ + .sec = sec_pfx, \ + .len = sizeof(sec_pfx) - 1, \ + .prog_type = BPF_PROG_TYPE_##ptype, \ + __VA_ARGS__ \ +} + +struct bpf_sec_def; + +typedef struct bpf_link *(*attach_fn_t)(const struct bpf_sec_def *sec, + struct bpf_program *prog); + +static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog); +static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, + struct bpf_program *prog); + +struct bpf_sec_def { const char *sec; size_t len; enum bpf_prog_type prog_type; @@ -4980,24 +5811,40 @@ static const struct { bool is_attachable; bool is_attach_btf; enum bpf_attach_type attach_type; -} section_names[] = { + attach_fn_t attach_fn; +}; + +static const struct bpf_sec_def section_defs[] = { BPF_PROG_SEC("socket", BPF_PROG_TYPE_SOCKET_FILTER), - BPF_PROG_SEC("kprobe/", BPF_PROG_TYPE_KPROBE), + BPF_PROG_SEC("sk_reuseport", BPF_PROG_TYPE_SK_REUSEPORT), + SEC_DEF("kprobe/", KPROBE, + .attach_fn = attach_kprobe), BPF_PROG_SEC("uprobe/", BPF_PROG_TYPE_KPROBE), - BPF_PROG_SEC("kretprobe/", BPF_PROG_TYPE_KPROBE), + SEC_DEF("kretprobe/", KPROBE, + .attach_fn = attach_kprobe), BPF_PROG_SEC("uretprobe/", BPF_PROG_TYPE_KPROBE), BPF_PROG_SEC("classifier", BPF_PROG_TYPE_SCHED_CLS), BPF_PROG_SEC("action", BPF_PROG_TYPE_SCHED_ACT), - BPF_PROG_SEC("tracepoint/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("tp/", BPF_PROG_TYPE_TRACEPOINT), - BPF_PROG_SEC("raw_tracepoint/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_SEC("raw_tp/", BPF_PROG_TYPE_RAW_TRACEPOINT), - BPF_PROG_BTF("tp_btf/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_RAW_TP), - BPF_PROG_BTF("fentry/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_FENTRY), - BPF_PROG_BTF("fexit/", BPF_PROG_TYPE_TRACING, - BPF_TRACE_FEXIT), + SEC_DEF("tracepoint/", TRACEPOINT, + .attach_fn = attach_tp), + SEC_DEF("tp/", TRACEPOINT, + .attach_fn = attach_tp), + SEC_DEF("raw_tracepoint/", RAW_TRACEPOINT, + .attach_fn = attach_raw_tp), + SEC_DEF("raw_tp/", RAW_TRACEPOINT, + .attach_fn = attach_raw_tp), + SEC_DEF("tp_btf/", TRACING, + .expected_attach_type = BPF_TRACE_RAW_TP, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fentry/", TRACING, + .expected_attach_type = BPF_TRACE_FENTRY, + .is_attach_btf = true, + .attach_fn = attach_trace), + SEC_DEF("fexit/", TRACING, + .expected_attach_type = BPF_TRACE_FEXIT, + .is_attach_btf = true, + .attach_fn = attach_trace), BPF_PROG_SEC("xdp", BPF_PROG_TYPE_XDP), BPF_PROG_SEC("perf_event", BPF_PROG_TYPE_PERF_EVENT), BPF_PROG_SEC("lwt_in", BPF_PROG_TYPE_LWT_IN), @@ -5059,12 +5906,26 @@ static const struct { #undef BPF_APROG_SEC #undef BPF_EAPROG_SEC #undef BPF_APROG_COMPAT +#undef SEC_DEF #define MAX_TYPE_NAME_SIZE 32 +static const struct bpf_sec_def *find_sec_def(const char *sec_name) +{ + int i, n = ARRAY_SIZE(section_defs); + + for (i = 0; i < n; i++) { + if (strncmp(sec_name, + section_defs[i].sec, section_defs[i].len)) + continue; + return §ion_defs[i]; + } + return NULL; +} + static char *libbpf_get_type_names(bool attach_type) { - int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE; + int i, len = ARRAY_SIZE(section_defs) * MAX_TYPE_NAME_SIZE; char *buf; buf = malloc(len); @@ -5073,16 +5934,16 @@ static char *libbpf_get_type_names(bool attach_type) buf[0] = '\0'; /* Forge string buf with all available names */ - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (attach_type && !section_names[i].is_attachable) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (attach_type && !section_defs[i].is_attachable) continue; - if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) { + if (strlen(buf) + strlen(section_defs[i].sec) + 2 > len) { free(buf); return NULL; } strcat(buf, " "); - strcat(buf, section_names[i].sec); + strcat(buf, section_defs[i].sec); } return buf; @@ -5091,23 +5952,23 @@ static char *libbpf_get_type_names(bool attach_type) int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type) { + const struct bpf_sec_def *sec_def; char *type_names; - int i; if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (strncmp(name, section_names[i].sec, section_names[i].len)) - continue; - *prog_type = section_names[i].prog_type; - *expected_attach_type = section_names[i].expected_attach_type; + sec_def = find_sec_def(name); + if (sec_def) { + *prog_type = sec_def->prog_type; + *expected_attach_type = sec_def->expected_attach_type; return 0; } - pr_warn("failed to guess program type from ELF section '%s'\n", name); + + pr_debug("failed to guess program type from ELF section '%s'\n", name); type_names = libbpf_get_type_names(false); if (type_names != NULL) { - pr_info("supported section(type) names are:%s\n", type_names); + pr_debug("supported section(type) names are:%s\n", type_names); free(type_names); } @@ -5186,16 +6047,16 @@ static int libbpf_find_attach_btf_id(const char *name, if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (!section_names[i].is_attach_btf) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (!section_defs[i].is_attach_btf) continue; - if (strncmp(name, section_names[i].sec, section_names[i].len)) + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) continue; if (attach_prog_fd) - err = libbpf_find_prog_btf_id(name + section_names[i].len, + err = libbpf_find_prog_btf_id(name + section_defs[i].len, attach_prog_fd); else - err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, + err = libbpf_find_vmlinux_btf_id(name + section_defs[i].len, attach_type); if (err <= 0) pr_warn("%s is not found in vmlinux BTF\n", name); @@ -5214,18 +6075,18 @@ int libbpf_attach_type_by_name(const char *name, if (!name) return -EINVAL; - for (i = 0; i < ARRAY_SIZE(section_names); i++) { - if (strncmp(name, section_names[i].sec, section_names[i].len)) + for (i = 0; i < ARRAY_SIZE(section_defs); i++) { + if (strncmp(name, section_defs[i].sec, section_defs[i].len)) continue; - if (!section_names[i].is_attachable) + if (!section_defs[i].is_attachable) return -EINVAL; - *attach_type = section_names[i].attach_type; + *attach_type = section_defs[i].attach_type; return 0; } - pr_warn("failed to guess attach type based on ELF section name '%s'\n", name); + pr_debug("failed to guess attach type based on ELF section name '%s'\n", name); type_names = libbpf_get_type_names(true); if (type_names != NULL) { - pr_info("attachable section(type) names are:%s\n", type_names); + pr_debug("attachable section(type) names are:%s\n", type_names); free(type_names); } @@ -5466,17 +6327,37 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, } struct bpf_link { + int (*detach)(struct bpf_link *link); int (*destroy)(struct bpf_link *link); + bool disconnected; }; +/* Release "ownership" of underlying BPF resource (typically, BPF program + * attached to some BPF hook, e.g., tracepoint, kprobe, etc). Disconnected + * link, when destructed through bpf_link__destroy() call won't attempt to + * detach/unregisted that BPF resource. This is useful in situations where, + * say, attached BPF program has to outlive userspace program that attached it + * in the system. Depending on type of BPF program, though, there might be + * additional steps (like pinning BPF program in BPF FS) necessary to ensure + * exit of userspace program doesn't trigger automatic detachment and clean up + * inside the kernel. + */ +void bpf_link__disconnect(struct bpf_link *link) +{ + link->disconnected = true; +} + int bpf_link__destroy(struct bpf_link *link) { - int err; + int err = 0; if (!link) return 0; - err = link->destroy(link); + if (!link->disconnected && link->detach) + err = link->detach(link); + if (link->destroy) + link->destroy(link); free(link); return err; @@ -5487,7 +6368,7 @@ struct bpf_link_fd { int fd; /* hook FD */ }; -static int bpf_link__destroy_perf_event(struct bpf_link *link) +static int bpf_link__detach_perf_event(struct bpf_link *link) { struct bpf_link_fd *l = (void *)link; int err; @@ -5519,10 +6400,10 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog, return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_perf_event; + link->link.detach = &bpf_link__detach_perf_event; link->fd = pfd; if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) { @@ -5679,6 +6560,18 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog, return link; } +static struct bpf_link *attach_kprobe(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + const char *func_name; + bool retprobe; + + func_name = bpf_program__title(prog, false) + sec->len; + retprobe = strcmp(sec->sec, "kretprobe/") == 0; + + return bpf_program__attach_kprobe(prog, retprobe, func_name); +} + struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog, bool retprobe, pid_t pid, const char *binary_path, @@ -5791,7 +6684,33 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog, return link; } -static int bpf_link__destroy_fd(struct bpf_link *link) +static struct bpf_link *attach_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + char *sec_name, *tp_cat, *tp_name; + struct bpf_link *link; + + sec_name = strdup(bpf_program__title(prog, false)); + if (!sec_name) + return ERR_PTR(-ENOMEM); + + /* extract "tp/<category>/<name>" */ + tp_cat = sec_name + sec->len; + tp_name = strchr(tp_cat, '/'); + if (!tp_name) { + link = ERR_PTR(-EINVAL); + goto out; + } + *tp_name = '\0'; + tp_name++; + + link = bpf_program__attach_tracepoint(prog, tp_cat, tp_name); +out: + free(sec_name); + return link; +} + +static int bpf_link__detach_fd(struct bpf_link *link) { struct bpf_link_fd *l = (void *)link; @@ -5812,10 +6731,10 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_fd; + link->link.detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(tp_name, prog_fd); if (pfd < 0) { @@ -5830,6 +6749,14 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog, return (struct bpf_link *)link; } +static struct bpf_link *attach_raw_tp(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + const char *tp_name = bpf_program__title(prog, false) + sec->len; + + return bpf_program__attach_raw_tracepoint(prog, tp_name); +} + struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) { char errmsg[STRERR_BUFSIZE]; @@ -5843,10 +6770,10 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) return ERR_PTR(-EINVAL); } - link = malloc(sizeof(*link)); + link = calloc(1, sizeof(*link)); if (!link) return ERR_PTR(-ENOMEM); - link->link.destroy = &bpf_link__destroy_fd; + link->link.detach = &bpf_link__detach_fd; pfd = bpf_raw_tracepoint_open(NULL, prog_fd); if (pfd < 0) { @@ -5861,6 +6788,23 @@ struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) return (struct bpf_link *)link; } +static struct bpf_link *attach_trace(const struct bpf_sec_def *sec, + struct bpf_program *prog) +{ + return bpf_program__attach_trace(prog); +} + +struct bpf_link *bpf_program__attach(struct bpf_program *prog) +{ + const struct bpf_sec_def *sec_def; + + sec_def = find_sec_def(bpf_program__title(prog, false)); + if (!sec_def || !sec_def->attach_fn) + return ERR_PTR(-ESRCH); + + return sec_def->attach_fn(sec_def, prog); +} + enum bpf_perf_event_ret bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, @@ -5944,7 +6888,7 @@ struct perf_buffer { size_t mmap_size; struct perf_cpu_buf **cpu_bufs; struct epoll_event *events; - int cpu_cnt; + int cpu_cnt; /* number of allocated CPU buffers */ int epoll_fd; /* perf event FD */ int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */ }; @@ -6078,11 +7022,13 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt, static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, struct perf_buffer_params *p) { + const char *online_cpus_file = "/sys/devices/system/cpu/online"; struct bpf_map_info map = {}; char msg[STRERR_BUFSIZE]; struct perf_buffer *pb; + bool *online = NULL; __u32 map_info_len; - int err, i; + int err, i, j, n; if (page_cnt & (page_cnt - 1)) { pr_warn("page count should be power of two, but is %zu\n", @@ -6151,20 +7097,32 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, goto error; } - for (i = 0; i < pb->cpu_cnt; i++) { + err = parse_cpu_mask_file(online_cpus_file, &online, &n); + if (err) { + pr_warn("failed to get online CPU mask: %d\n", err); + goto error; + } + + for (i = 0, j = 0; i < pb->cpu_cnt; i++) { struct perf_cpu_buf *cpu_buf; int cpu, map_key; cpu = p->cpu_cnt > 0 ? p->cpus[i] : i; map_key = p->cpu_cnt > 0 ? p->map_keys[i] : i; + /* in case user didn't explicitly requested particular CPUs to + * be attached to, skip offline/not present CPUs + */ + if (p->cpu_cnt <= 0 && (cpu >= n || !online[cpu])) + continue; + cpu_buf = perf_buffer__open_cpu_buf(pb, p->attr, cpu, map_key); if (IS_ERR(cpu_buf)) { err = PTR_ERR(cpu_buf); goto error; } - pb->cpu_bufs[i] = cpu_buf; + pb->cpu_bufs[j] = cpu_buf; err = bpf_map_update_elem(pb->map_fd, &map_key, &cpu_buf->fd, 0); @@ -6176,21 +7134,25 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt, goto error; } - pb->events[i].events = EPOLLIN; - pb->events[i].data.ptr = cpu_buf; + pb->events[j].events = EPOLLIN; + pb->events[j].data.ptr = cpu_buf; if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd, - &pb->events[i]) < 0) { + &pb->events[j]) < 0) { err = -errno; pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", cpu, cpu_buf->fd, libbpf_strerror_r(err, msg, sizeof(msg))); goto error; } + j++; } + pb->cpu_cnt = j; + free(online); return pb; error: + free(online); if (pb) perf_buffer__free(pb); return ERR_PTR(err); @@ -6521,62 +7483,267 @@ void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) } } -int libbpf_num_possible_cpus(void) +int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz) { - static const char *fcpu = "/sys/devices/system/cpu/possible"; - int len = 0, n = 0, il = 0, ir = 0; - unsigned int start = 0, end = 0; - int tmp_cpus = 0; - static int cpus; - char buf[128]; - int error = 0; - int fd = -1; + int err = 0, n, len, start, end = -1; + bool *tmp; - tmp_cpus = READ_ONCE(cpus); - if (tmp_cpus > 0) - return tmp_cpus; + *mask = NULL; + *mask_sz = 0; + + /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ + while (*s) { + if (*s == ',' || *s == '\n') { + s++; + continue; + } + n = sscanf(s, "%d%n-%d%n", &start, &len, &end, &len); + if (n <= 0 || n > 2) { + pr_warn("Failed to get CPU range %s: %d\n", s, n); + err = -EINVAL; + goto cleanup; + } else if (n == 1) { + end = start; + } + if (start < 0 || start > end) { + pr_warn("Invalid CPU range [%d,%d] in %s\n", + start, end, s); + err = -EINVAL; + goto cleanup; + } + tmp = realloc(*mask, end + 1); + if (!tmp) { + err = -ENOMEM; + goto cleanup; + } + *mask = tmp; + memset(tmp + *mask_sz, 0, start - *mask_sz); + memset(tmp + start, 1, end - start + 1); + *mask_sz = end + 1; + s += len; + } + if (!*mask_sz) { + pr_warn("Empty CPU range\n"); + return -EINVAL; + } + return 0; +cleanup: + free(*mask); + *mask = NULL; + return err; +} + +int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz) +{ + int fd, err = 0, len; + char buf[128]; fd = open(fcpu, O_RDONLY); if (fd < 0) { - error = errno; - pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error)); - return -error; + err = -errno; + pr_warn("Failed to open cpu mask file %s: %d\n", fcpu, err); + return err; } len = read(fd, buf, sizeof(buf)); close(fd); if (len <= 0) { - error = len ? errno : EINVAL; - pr_warn("Failed to read # of possible cpus from %s: %s\n", - fcpu, strerror(error)); - return -error; + err = len ? -errno : -EINVAL; + pr_warn("Failed to read cpu mask from %s: %d\n", fcpu, err); + return err; } - if (len == sizeof(buf)) { - pr_warn("File %s size overflow\n", fcpu); - return -EOVERFLOW; + if (len >= sizeof(buf)) { + pr_warn("CPU mask is too big in file %s\n", fcpu); + return -E2BIG; } buf[len] = '\0'; - for (ir = 0, tmp_cpus = 0; ir <= len; ir++) { - /* Each sub string separated by ',' has format \d+-\d+ or \d+ */ - if (buf[ir] == ',' || buf[ir] == '\0') { - buf[ir] = '\0'; - n = sscanf(&buf[il], "%u-%u", &start, &end); - if (n <= 0) { - pr_warn("Failed to get # CPUs from %s\n", - &buf[il]); - return -EINVAL; - } else if (n == 1) { - end = start; - } - tmp_cpus += end - start + 1; - il = ir + 1; - } - } - if (tmp_cpus <= 0) { - pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); - return -EINVAL; + return parse_cpu_mask_str(buf, mask, mask_sz); +} + +int libbpf_num_possible_cpus(void) +{ + static const char *fcpu = "/sys/devices/system/cpu/possible"; + static int cpus; + int err, n, i, tmp_cpus; + bool *mask; + + tmp_cpus = READ_ONCE(cpus); + if (tmp_cpus > 0) + return tmp_cpus; + + err = parse_cpu_mask_file(fcpu, &mask, &n); + if (err) + return err; + + tmp_cpus = 0; + for (i = 0; i < n; i++) { + if (mask[i]) + tmp_cpus++; } + free(mask); WRITE_ONCE(cpus, tmp_cpus); return tmp_cpus; } + +int bpf_object__open_skeleton(struct bpf_object_skeleton *s, + const struct bpf_object_open_opts *opts) +{ + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, skel_opts, + .object_name = s->name, + ); + struct bpf_object *obj; + int i; + + /* Attempt to preserve opts->object_name, unless overriden by user + * explicitly. Overwriting object name for skeletons is discouraged, + * as it breaks global data maps, because they contain object name + * prefix as their own map name prefix. When skeleton is generated, + * bpftool is making an assumption that this name will stay the same. + */ + if (opts) { + memcpy(&skel_opts, opts, sizeof(*opts)); + if (!opts->object_name) + skel_opts.object_name = s->name; + } + + obj = bpf_object__open_mem(s->data, s->data_sz, &skel_opts); + if (IS_ERR(obj)) { + pr_warn("failed to initialize skeleton BPF object '%s': %ld\n", + s->name, PTR_ERR(obj)); + return PTR_ERR(obj); + } + + *s->obj = obj; + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map **map = s->maps[i].map; + const char *name = s->maps[i].name; + void **mmaped = s->maps[i].mmaped; + + *map = bpf_object__find_map_by_name(obj, name); + if (!*map) { + pr_warn("failed to find skeleton map '%s'\n", name); + return -ESRCH; + } + + /* externs shouldn't be pre-setup from user code */ + if (mmaped && (*map)->libbpf_type != LIBBPF_MAP_KCONFIG) + *mmaped = (*map)->mmaped; + } + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_program **prog = s->progs[i].prog; + const char *name = s->progs[i].name; + + *prog = bpf_object__find_program_by_name(obj, name); + if (!*prog) { + pr_warn("failed to find skeleton program '%s'\n", name); + return -ESRCH; + } + } + + return 0; +} + +int bpf_object__load_skeleton(struct bpf_object_skeleton *s) +{ + int i, err; + + err = bpf_object__load(*s->obj); + if (err) { + pr_warn("failed to load BPF skeleton '%s': %d\n", s->name, err); + return err; + } + + for (i = 0; i < s->map_cnt; i++) { + struct bpf_map *map = *s->maps[i].map; + size_t mmap_sz = bpf_map_mmap_sz(map); + int prot, map_fd = bpf_map__fd(map); + void **mmaped = s->maps[i].mmaped; + + if (!mmaped) + continue; + + if (!(map->def.map_flags & BPF_F_MMAPABLE)) { + *mmaped = NULL; + continue; + } + + if (map->def.map_flags & BPF_F_RDONLY_PROG) + prot = PROT_READ; + else + prot = PROT_READ | PROT_WRITE; + + /* Remap anonymous mmap()-ed "map initialization image" as + * a BPF map-backed mmap()-ed memory, but preserving the same + * memory address. This will cause kernel to change process' + * page table to point to a different piece of kernel memory, + * but from userspace point of view memory address (and its + * contents, being identical at this point) will stay the + * same. This mapping will be released by bpf_object__close() + * as per normal clean up procedure, so we don't need to worry + * about it from skeleton's clean up perspective. + */ + *mmaped = mmap(map->mmaped, mmap_sz, prot, + MAP_SHARED | MAP_FIXED, map_fd, 0); + if (*mmaped == MAP_FAILED) { + err = -errno; + *mmaped = NULL; + pr_warn("failed to re-mmap() map '%s': %d\n", + bpf_map__name(map), err); + return err; + } + } + + return 0; +} + +int bpf_object__attach_skeleton(struct bpf_object_skeleton *s) +{ + int i; + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_program *prog = *s->progs[i].prog; + struct bpf_link **link = s->progs[i].link; + const struct bpf_sec_def *sec_def; + const char *sec_name = bpf_program__title(prog, false); + + sec_def = find_sec_def(sec_name); + if (!sec_def || !sec_def->attach_fn) + continue; + + *link = sec_def->attach_fn(sec_def, prog); + if (IS_ERR(*link)) { + pr_warn("failed to auto-attach program '%s': %ld\n", + bpf_program__name(prog), PTR_ERR(*link)); + return PTR_ERR(*link); + } + } + + return 0; +} + +void bpf_object__detach_skeleton(struct bpf_object_skeleton *s) +{ + int i; + + for (i = 0; i < s->prog_cnt; i++) { + struct bpf_link **link = s->progs[i].link; + + if (!IS_ERR_OR_NULL(*link)) + bpf_link__destroy(*link); + *link = NULL; + } +} + +void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s) +{ + if (s->progs) + bpf_object__detach_skeleton(s); + if (s->obj) + bpf_object__close(*s->obj); + free(s->maps); + free(s->progs); + free(s); +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 0dbf4bfba0c4..fe592ef48f1b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -17,14 +17,12 @@ #include <sys/types.h> // for size_t #include <linux/bpf.h> +#include "libbpf_common.h" + #ifdef __cplusplus extern "C" { #endif -#ifndef LIBBPF_API -#define LIBBPF_API __attribute__((visibility("default"))) -#endif - enum libbpf_errno { __LIBBPF_ERRNO__START = 4000, @@ -67,28 +65,6 @@ struct bpf_object_open_attr { enum bpf_prog_type prog_type; }; -/* Helper macro to declare and initialize libbpf options struct - * - * This dance with uninitialized declaration, followed by memset to zero, - * followed by assignment using compound literal syntax is done to preserve - * ability to use a nice struct field initialization syntax and **hopefully** - * have all the padding bytes initialized to zero. It's not guaranteed though, - * when copying literal, that compiler won't copy garbage in literal's padding - * bytes, but that's the best way I've found and it seems to work in practice. - * - * Macro declares opts struct of given type and name, zero-initializes, - * including any extra padding, it with memset() and then assigns initial - * values provided by users in struct initializer-syntax as varargs. - */ -#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ - struct TYPE NAME = ({ \ - memset(&NAME, 0, sizeof(struct TYPE)); \ - (struct TYPE) { \ - .sz = sizeof(struct TYPE), \ - __VA_ARGS__ \ - }; \ - }) - struct bpf_object_open_opts { /* size of this struct, for forward/backward compatiblity */ size_t sz; @@ -109,15 +85,19 @@ struct bpf_object_open_opts { */ const char *pin_root_path; __u32 attach_prog_fd; + /* Additional kernel config content that augments and overrides + * system Kconfig for CONFIG_xxx externs. + */ + const char *kconfig; }; -#define bpf_object_open_opts__last_field attach_prog_fd +#define bpf_object_open_opts__last_field kconfig LIBBPF_API struct bpf_object *bpf_object__open(const char *path); LIBBPF_API struct bpf_object * -bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts); +bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts); LIBBPF_API struct bpf_object * bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, - struct bpf_object_open_opts *opts); + const struct bpf_object_open_opts *opts); /* deprecated bpf_object__open variants */ LIBBPF_API struct bpf_object * @@ -126,11 +106,6 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, LIBBPF_API struct bpf_object * bpf_object__open_xattr(struct bpf_object_open_attr *attr); -int bpf_object__section_size(const struct bpf_object *obj, const char *name, - __u32 *size); -int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, - __u32 *off); - enum libbpf_pin_type { LIBBPF_PIN_NONE, /* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ @@ -161,6 +136,7 @@ struct bpf_object_load_attr { LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr); LIBBPF_API int bpf_object__unload(struct bpf_object *obj); + LIBBPF_API const char *bpf_object__name(const struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(const struct bpf_object *obj); @@ -171,6 +147,9 @@ LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); LIBBPF_API struct bpf_program * bpf_object__find_program_by_title(const struct bpf_object *obj, const char *title); +LIBBPF_API struct bpf_program * +bpf_object__find_program_by_name(const struct bpf_object *obj, + const char *name); LIBBPF_API struct bpf_object *bpf_object__next(struct bpf_object *prev); #define bpf_object__for_each_safe(pos, tmp) \ @@ -214,6 +193,7 @@ LIBBPF_API void *bpf_program__priv(const struct bpf_program *prog); LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog, __u32 ifindex); +LIBBPF_API const char *bpf_program__name(const struct bpf_program *prog); LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy); @@ -235,9 +215,12 @@ LIBBPF_API void bpf_program__unload(struct bpf_program *prog); struct bpf_link; +LIBBPF_API void bpf_link__disconnect(struct bpf_link *link); LIBBPF_API int bpf_link__destroy(struct bpf_link *link); LIBBPF_API struct bpf_link * +bpf_program__attach(struct bpf_program *prog); +LIBBPF_API struct bpf_link * bpf_program__attach_perf_event(struct bpf_program *prog, int pfd); LIBBPF_API struct bpf_link * bpf_program__attach_kprobe(struct bpf_program *prog, bool retprobe, @@ -512,18 +495,6 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, void **copy_mem, size_t *copy_size, bpf_perf_event_print_t fn, void *private_data); -struct nlattr; -typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); -int libbpf_netlink_open(unsigned int *nl_pid); -int libbpf_nl_get_link(int sock, unsigned int nl_pid, - libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); -int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); -int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, - libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); -int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, - libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); - struct bpf_prog_linfo; struct bpf_prog_info; @@ -630,6 +601,50 @@ bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); */ LIBBPF_API int libbpf_num_possible_cpus(void); +struct bpf_map_skeleton { + const char *name; + struct bpf_map **map; + void **mmaped; +}; + +struct bpf_prog_skeleton { + const char *name; + struct bpf_program **prog; + struct bpf_link **link; +}; + +struct bpf_object_skeleton { + size_t sz; /* size of this struct, for forward/backward compatibility */ + + const char *name; + void *data; + size_t data_sz; + + struct bpf_object **obj; + + int map_cnt; + int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */ + struct bpf_map_skeleton *maps; + + int prog_cnt; + int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */ + struct bpf_prog_skeleton *progs; +}; + +LIBBPF_API int +bpf_object__open_skeleton(struct bpf_object_skeleton *s, + const struct bpf_object_open_opts *opts); +LIBBPF_API int bpf_object__load_skeleton(struct bpf_object_skeleton *s); +LIBBPF_API int bpf_object__attach_skeleton(struct bpf_object_skeleton *s); +LIBBPF_API void bpf_object__detach_skeleton(struct bpf_object_skeleton *s); +LIBBPF_API void bpf_object__destroy_skeleton(struct bpf_object_skeleton *s); + +enum libbpf_tristate { + TRI_NO = 0, + TRI_YES = 1, + TRI_MODULE = 2, +}; + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 8ddc2c40e482..e9713a574243 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -208,3 +208,19 @@ LIBBPF_0.0.6 { btf__find_by_name_kind; libbpf_find_vmlinux_btf_id; } LIBBPF_0.0.5; + +LIBBPF_0.0.7 { + global: + btf_dump__emit_type_decl; + bpf_link__disconnect; + bpf_object__find_program_by_name; + bpf_object__attach_skeleton; + bpf_object__destroy_skeleton; + bpf_object__detach_skeleton; + bpf_object__load_skeleton; + bpf_object__open_skeleton; + bpf_prog_attach_xattr; + bpf_program__attach; + bpf_program__name; + btf__align_of; +} LIBBPF_0.0.6; diff --git a/tools/lib/bpf/libbpf.pc.template b/tools/lib/bpf/libbpf.pc.template index ac17fcef2108..b45ed534bdfb 100644 --- a/tools/lib/bpf/libbpf.pc.template +++ b/tools/lib/bpf/libbpf.pc.template @@ -8,5 +8,5 @@ Name: libbpf Description: BPF library Version: @VERSION@ Libs: -L${libdir} -lbpf -Requires.private: libelf +Requires.private: libelf zlib Cflags: -I${includedir} diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h new file mode 100644 index 000000000000..a23ae1ac27eb --- /dev/null +++ b/tools/lib/bpf/libbpf_common.h @@ -0,0 +1,40 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * Common user-facing libbpf helpers. + * + * Copyright (c) 2019 Facebook + */ + +#ifndef __LIBBPF_LIBBPF_COMMON_H +#define __LIBBPF_LIBBPF_COMMON_H + +#include <string.h> + +#ifndef LIBBPF_API +#define LIBBPF_API __attribute__((visibility("default"))) +#endif + +/* Helper macro to declare and initialize libbpf options struct + * + * This dance with uninitialized declaration, followed by memset to zero, + * followed by assignment using compound literal syntax is done to preserve + * ability to use a nice struct field initialization syntax and **hopefully** + * have all the padding bytes initialized to zero. It's not guaranteed though, + * when copying literal, that compiler won't copy garbage in literal's padding + * bytes, but that's the best way I've found and it seems to work in practice. + * + * Macro declares opts struct of given type and name, zero-initializes, + * including any extra padding, it with memset() and then assigns initial + * values provided by users in struct initializer-syntax as varargs. + */ +#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...) \ + struct TYPE NAME = ({ \ + memset(&NAME, 0, sizeof(struct TYPE)); \ + (struct TYPE) { \ + .sz = sizeof(struct TYPE), \ + __VA_ARGS__ \ + }; \ + }) + +#endif /* __LIBBPF_LIBBPF_COMMON_H */ diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 97ac17a64a58..8c3afbd97747 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -76,7 +76,7 @@ static inline bool libbpf_validate_opts(const char *opts, for (i = opts_sz; i < user_sz; i++) { if (opts[i]) { - pr_warn("%s has non-zero extra bytes", + pr_warn("%s has non-zero extra bytes\n", type_name); return false; } @@ -95,9 +95,28 @@ static inline bool libbpf_validate_opts(const char *opts, #define OPTS_GET(opts, field, fallback_value) \ (OPTS_HAS(opts, field) ? (opts)->field : fallback_value) +int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); +int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, const char *str_sec, size_t str_len); +int bpf_object__section_size(const struct bpf_object *obj, const char *name, + __u32 *size); +int bpf_object__variable_offset(const struct bpf_object *obj, const char *name, + __u32 *off); + +struct nlattr; +typedef int (*libbpf_dump_nlmsg_t)(void *cookie, void *msg, struct nlattr **tb); +int libbpf_netlink_open(unsigned int *nl_pid); +int libbpf_nl_get_link(int sock, unsigned int nl_pid, + libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie); +int libbpf_nl_get_class(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_class_nlmsg, void *cookie); +int libbpf_nl_get_qdisc(int sock, unsigned int nl_pid, int ifindex, + libbpf_dump_nlmsg_t dump_qdisc_nlmsg, void *cookie); +int libbpf_nl_get_filter(int sock, unsigned int nl_pid, int ifindex, int handle, + libbpf_dump_nlmsg_t dump_filter_nlmsg, void *cookie); + struct btf_ext_info { /* * info points to the individual info section (e.g. func_info and diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore index 419652458da4..b139b3d75ebb 100644 --- a/tools/testing/selftests/bpf/.gitignore +++ b/tools/testing/selftests/bpf/.gitignore @@ -21,7 +21,6 @@ test_lirc_mode2_user get_cgroup_id_user test_skb_cgroup_id_user test_socket_cookie -test_cgroup_attach test_cgroup_storage test_select_reuseport test_flow_dissector @@ -38,5 +37,7 @@ test_hashmap test_btf_dump xdping test_cpp +*.skel.h /no_alu32 /bpf_gcc +/tools diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index e0fe01d9ec33..41691fb067da 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -3,10 +3,12 @@ include ../../../../scripts/Kbuild.include include ../../../scripts/Makefile.arch CURDIR := $(abspath .) -LIBDIR := $(abspath ../../../lib) +TOOLSDIR := $(abspath ../../..) +LIBDIR := $(TOOLSDIR)/lib BPFDIR := $(LIBDIR)/bpf -TOOLSDIR := $(abspath ../../../include) -APIDIR := $(TOOLSDIR)/uapi +TOOLSINCDIR := $(TOOLSDIR)/include +BPFTOOLDIR := $(TOOLSDIR)/bpf/bpftool +APIDIR := $(TOOLSINCDIR)/uapi GENDIR := $(abspath ../../../../include/generated) GENHDR := $(GENDIR)/autoconf.h @@ -19,18 +21,18 @@ LLC ?= llc LLVM_OBJCOPY ?= llvm-objcopy BPF_GCC ?= $(shell command -v bpf-gcc;) CFLAGS += -g -Wall -O2 $(GENFLAGS) -I$(APIDIR) -I$(LIBDIR) -I$(BPFDIR) \ - -I$(GENDIR) -I$(TOOLSDIR) -I$(CURDIR) \ + -I$(GENDIR) -I$(TOOLSINCDIR) -I$(CURDIR) \ -Dbpf_prog_load=bpf_prog_test_load \ -Dbpf_load_program=bpf_test_load_program -LDLIBS += -lcap -lelf -lrt -lpthread +LDLIBS += -lcap -lelf -lz -lrt -lpthread # Order correspond to 'make run_tests' order TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test_progs \ test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \ test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \ - test_cgroup_storage test_select_reuseport \ + test_cgroup_storage \ test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \ - test_cgroup_attach test_progs-no_alu32 + test_progs-no_alu32 # Also test bpf-gcc, if present ifneq ($(BPF_GCC),) @@ -75,6 +77,24 @@ TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \ TEST_CUSTOM_PROGS = urandom_read +# Emit succinct information message describing current building step +# $1 - generic step name (e.g., CC, LINK, etc); +# $2 - optional "flavor" specifier; if provided, will be emitted as [flavor]; +# $3 - target (assumed to be file); only file name will be emitted; +# $4 - optional extra arg, emitted as-is, if provided. +ifeq ($(V),1) +msg = +else +msg = @$(info $(1)$(if $(2), [$(2)]) $(notdir $(3)))$(if $(4), $(4)) +endif + +# override lib.mk's default rules +OVERRIDE_TARGETS := 1 +override define CLEAN + $(call msg, CLEAN) + $(RM) -r $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED) $(TEST_GEN_FILES) $(EXTRA_CLEAN) +endef + include ../lib.mk # Define simple and short `make test_progs`, `make test_sysctl`, etc targets @@ -87,10 +107,16 @@ $(notdir $(TEST_GEN_PROGS) \ $(TEST_GEN_PROGS_EXTENDED) \ $(TEST_CUSTOM_PROGS)): %: $(OUTPUT)/% ; +$(OUTPUT)/%:%.c + $(call msg, BINARY,,$@) + $(LINK.c) $^ $(LDLIBS) -o $@ + $(OUTPUT)/urandom_read: urandom_read.c + $(call msg, BINARY,,$@) $(CC) -o $@ $< -Wl,--build-id $(OUTPUT)/test_stub.o: test_stub.c + $(call msg, CC,,$@) $(CC) -c $(CFLAGS) -o $@ $< BPFOBJ := $(OUTPUT)/libbpf.a @@ -110,13 +136,18 @@ $(OUTPUT)/test_cgroup_storage: cgroup_helpers.c $(OUTPUT)/test_netcnt: cgroup_helpers.c $(OUTPUT)/test_sock_fields: cgroup_helpers.c $(OUTPUT)/test_sysctl: cgroup_helpers.c -$(OUTPUT)/test_cgroup_attach: cgroup_helpers.c .PHONY: force # force a rebuild of BPFOBJ when its dependencies are updated force: +DEFAULT_BPFTOOL := $(OUTPUT)/tools/usr/local/sbin/bpftool +BPFTOOL ?= $(DEFAULT_BPFTOOL) + +$(DEFAULT_BPFTOOL): force + $(MAKE) -C $(BPFTOOLDIR) DESTDIR=$(OUTPUT)/tools install + $(BPFOBJ): force $(MAKE) -C $(BPFDIR) OUTPUT=$(OUTPUT)/ @@ -159,27 +190,33 @@ $(OUTPUT)/flow_dissector_load.o: flow_dissector_load.h # $3 - CFLAGS # $4 - LDFLAGS define CLANG_BPF_BUILD_RULE + $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -mattr=dwarfris -march=bpf -mcpu=probe $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but with disabled alu32 define CLANG_NOALU32_BPF_BUILD_RULE + $(call msg, CLANG-LLC,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -target bpf -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=v2 $4 -filetype=obj -o $2 endef # Similar to CLANG_BPF_BUILD_RULE, but using native Clang and bpf LLC define CLANG_NATIVE_BPF_BUILD_RULE + $(call msg, CLANG-BPF,$(TRUNNER_BINARY),$2) ($(CLANG) $3 -O2 -emit-llvm \ -c $1 -o - || echo "BPF obj compilation failed") | \ $(LLC) -march=bpf -mcpu=probe $4 -filetype=obj -o $2 endef # Build BPF object using GCC define GCC_BPF_BUILD_RULE + $(call msg, GCC-BPF,$(TRUNNER_BINARY),$2) $(BPF_GCC) $3 $4 -O2 -c $1 -o $2 endef +SKEL_BLACKLIST := btf__% test_pinning_invalid.c + # Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES. # Parameters: @@ -195,8 +232,11 @@ TRUNNER_EXTRA_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \ $$(filter %.c,$(TRUNNER_EXTRA_SOURCES))) TRUNNER_EXTRA_HDRS := $$(filter %.h,$(TRUNNER_EXTRA_SOURCES)) TRUNNER_TESTS_HDR := $(TRUNNER_TESTS_DIR)/tests.h -TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, \ - $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c))) +TRUNNER_BPF_SRCS := $$(notdir $$(wildcard $(TRUNNER_BPF_PROGS_DIR)/*.c)) +TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)) +TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h, \ + $$(filter-out $(SKEL_BLACKLIST), \ + $$(TRUNNER_BPF_SRCS))) # Evaluate rules now with extra TRUNNER_XXX variables above already defined $$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2)) @@ -226,12 +266,19 @@ $(TRUNNER_BPF_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $$(call $(TRUNNER_BPF_BUILD_RULE),$$<,$$@, \ $(TRUNNER_BPF_CFLAGS), \ $(TRUNNER_BPF_LDFLAGS)) + +$(TRUNNER_BPF_SKELS): $(TRUNNER_OUTPUT)/%.skel.h: \ + $(TRUNNER_OUTPUT)/%.o \ + | $(BPFTOOL) $(TRUNNER_OUTPUT) + $$(call msg, GEN-SKEL,$(TRUNNER_BINARY),$$@) + $$(BPFTOOL) gen skeleton $$< > $$@ endif # ensure we set up tests.h header generation rule just once ifeq ($($(TRUNNER_TESTS_DIR)-tests-hdr),) $(TRUNNER_TESTS_DIR)-tests-hdr := y $(TRUNNER_TESTS_HDR): $(TRUNNER_TESTS_DIR)/*.c + $$(call msg, TEST-HDR,$(TRUNNER_BINARY),$$@) $$(shell ( cd $(TRUNNER_TESTS_DIR); \ echo '/* Generated header, do not edit */'; \ ls *.c 2> /dev/null | \ @@ -245,7 +292,9 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o: \ $(TRUNNER_TESTS_DIR)/%.c \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_BPF_OBJS) \ + $(TRUNNER_BPF_SKELS) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) + $$(call msg, TEST-OBJ,$(TRUNNER_BINARY),$$@) cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F) $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ @@ -253,17 +302,20 @@ $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o: \ $(TRUNNER_EXTRA_HDRS) \ $(TRUNNER_TESTS_HDR) \ $$(BPFOBJ) | $(TRUNNER_OUTPUT) + $$(call msg, EXTRA-OBJ,$(TRUNNER_BINARY),$$@) $$(CC) $$(CFLAGS) -c $$< $$(LDLIBS) -o $$@ +# only copy extra resources if in flavored build $(TRUNNER_BINARY)-extras: $(TRUNNER_EXTRA_FILES) | $(TRUNNER_OUTPUT) ifneq ($2,) - # only copy extra resources if in flavored build + $$(call msg, EXTRAS-CP,$(TRUNNER_BINARY),$(TRUNNER_EXTRA_FILES)) cp -a $$^ $(TRUNNER_OUTPUT)/ endif $(OUTPUT)/$(TRUNNER_BINARY): $(TRUNNER_TEST_OBJS) \ $(TRUNNER_EXTRA_OBJS) $$(BPFOBJ) \ | $(TRUNNER_BINARY)-extras + $$(call msg, BINARY,,$$@) $$(CC) $$(CFLAGS) $$(filter %.a %.o,$$^) $$(LDLIBS) -o $$@ endef @@ -315,12 +367,15 @@ verifier/tests.h: verifier/*.c echo '#endif' \ ) > verifier/tests.h) $(OUTPUT)/test_verifier: test_verifier.c verifier/tests.h $(BPFOBJ) | $(OUTPUT) + $(call msg, BINARY,,$@) $(CC) $(CFLAGS) $(filter %.a %.o %.c,$^) $(LDLIBS) -o $@ # Make sure we are able to include and link libbpf against c++. -$(OUTPUT)/test_cpp: test_cpp.cpp $(BPFOBJ) +$(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ) + $(call msg, CXX,,$@) $(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@ EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) \ prog_tests/tests.h map_tests/tests.h verifier/tests.h \ - feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc + feature $(OUTPUT)/*.o $(OUTPUT)/no_alu32 $(OUTPUT)/bpf_gcc \ + tools *.skel.h diff --git a/tools/testing/selftests/bpf/prog_tests/attach_probe.c b/tools/testing/selftests/bpf/prog_tests/attach_probe.c index a83111a32d4a..a0ee87c8e1ea 100644 --- a/tools/testing/selftests/bpf/prog_tests/attach_probe.c +++ b/tools/testing/selftests/bpf/prog_tests/attach_probe.c @@ -1,26 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> - -#define EMBED_FILE(NAME, PATH) \ -asm ( \ -" .pushsection \".rodata\", \"a\", @progbits \n" \ -" .global "#NAME"_data \n" \ -#NAME"_data: \n" \ -" .incbin \"" PATH "\" \n" \ -#NAME"_data_end: \n" \ -" .global "#NAME"_size \n" \ -" .type "#NAME"_size, @object \n" \ -" .size "#NAME"_size, 4 \n" \ -" .align 4, \n" \ -#NAME"_size: \n" \ -" .int "#NAME"_data_end - "#NAME"_data \n" \ -" .popsection \n" \ -); \ -extern char NAME##_data[]; \ -extern int NAME##_size; +#include "test_attach_probe.skel.h" ssize_t get_base_addr() { - size_t start; + size_t start, offset; char buf[256]; FILE *f; @@ -28,10 +11,11 @@ ssize_t get_base_addr() { if (!f) return -errno; - while (fscanf(f, "%zx-%*x %s %*s\n", &start, buf) == 2) { + while (fscanf(f, "%zx-%*x %s %zx %*[^\n]\n", + &start, buf, &offset) == 3) { if (strcmp(buf, "r-xp") == 0) { fclose(f); - return start; + return start - offset; } } @@ -39,30 +23,12 @@ ssize_t get_base_addr() { return -EINVAL; } -EMBED_FILE(probe, "test_attach_probe.o"); - void test_attach_probe(void) { - const char *kprobe_name = "kprobe/sys_nanosleep"; - const char *kretprobe_name = "kretprobe/sys_nanosleep"; - const char *uprobe_name = "uprobe/trigger_func"; - const char *uretprobe_name = "uretprobe/trigger_func"; - const int kprobe_idx = 0, kretprobe_idx = 1; - const int uprobe_idx = 2, uretprobe_idx = 3; - const char *obj_name = "attach_probe"; - DECLARE_LIBBPF_OPTS(bpf_object_open_opts, open_opts, - .object_name = obj_name, - .relaxed_maps = true, - ); - struct bpf_program *kprobe_prog, *kretprobe_prog; - struct bpf_program *uprobe_prog, *uretprobe_prog; - struct bpf_object *obj; - int err, duration = 0, res; - struct bpf_link *kprobe_link = NULL; - struct bpf_link *kretprobe_link = NULL; - struct bpf_link *uprobe_link = NULL; - struct bpf_link *uretprobe_link = NULL; - int results_map_fd; + int duration = 0; + struct bpf_link *kprobe_link, *kretprobe_link; + struct bpf_link *uprobe_link, *uretprobe_link; + struct test_attach_probe* skel; size_t uprobe_offset; ssize_t base_addr; @@ -72,123 +38,68 @@ void test_attach_probe(void) return; uprobe_offset = (size_t)&get_base_addr - base_addr; - /* open object */ - obj = bpf_object__open_mem(probe_data, probe_size, &open_opts); - if (CHECK(IS_ERR(obj), "obj_open_mem", "err %ld\n", PTR_ERR(obj))) + skel = test_attach_probe__open_and_load(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) return; - - if (CHECK(strcmp(bpf_object__name(obj), obj_name), "obj_name", - "wrong obj name '%s', expected '%s'\n", - bpf_object__name(obj), obj_name)) + if (CHECK(!skel->bss, "check_bss", ".bss wasn't mmap()-ed\n")) goto cleanup; - kprobe_prog = bpf_object__find_program_by_title(obj, kprobe_name); - if (CHECK(!kprobe_prog, "find_probe", - "prog '%s' not found\n", kprobe_name)) - goto cleanup; - kretprobe_prog = bpf_object__find_program_by_title(obj, kretprobe_name); - if (CHECK(!kretprobe_prog, "find_probe", - "prog '%s' not found\n", kretprobe_name)) - goto cleanup; - uprobe_prog = bpf_object__find_program_by_title(obj, uprobe_name); - if (CHECK(!uprobe_prog, "find_probe", - "prog '%s' not found\n", uprobe_name)) - goto cleanup; - uretprobe_prog = bpf_object__find_program_by_title(obj, uretprobe_name); - if (CHECK(!uretprobe_prog, "find_probe", - "prog '%s' not found\n", uretprobe_name)) - goto cleanup; - - /* create maps && load programs */ - err = bpf_object__load(obj); - if (CHECK(err, "obj_load", "err %d\n", err)) - goto cleanup; - - /* load maps */ - results_map_fd = bpf_find_map(__func__, obj, "results_map"); - if (CHECK(results_map_fd < 0, "find_results_map", - "err %d\n", results_map_fd)) - goto cleanup; - - kprobe_link = bpf_program__attach_kprobe(kprobe_prog, + kprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kprobe, false /* retprobe */, SYS_NANOSLEEP_KPROBE_NAME); if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", - "err %ld\n", PTR_ERR(kprobe_link))) { - kprobe_link = NULL; + "err %ld\n", PTR_ERR(kprobe_link))) goto cleanup; - } - kretprobe_link = bpf_program__attach_kprobe(kretprobe_prog, + skel->links.handle_kprobe = kprobe_link; + + kretprobe_link = bpf_program__attach_kprobe(skel->progs.handle_kretprobe, true /* retprobe */, SYS_NANOSLEEP_KPROBE_NAME); if (CHECK(IS_ERR(kretprobe_link), "attach_kretprobe", - "err %ld\n", PTR_ERR(kretprobe_link))) { - kretprobe_link = NULL; + "err %ld\n", PTR_ERR(kretprobe_link))) goto cleanup; - } - uprobe_link = bpf_program__attach_uprobe(uprobe_prog, + skel->links.handle_kretprobe = kretprobe_link; + + uprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uprobe, false /* retprobe */, 0 /* self pid */, "/proc/self/exe", uprobe_offset); if (CHECK(IS_ERR(uprobe_link), "attach_uprobe", - "err %ld\n", PTR_ERR(uprobe_link))) { - uprobe_link = NULL; + "err %ld\n", PTR_ERR(uprobe_link))) goto cleanup; - } - uretprobe_link = bpf_program__attach_uprobe(uretprobe_prog, + skel->links.handle_uprobe = uprobe_link; + + uretprobe_link = bpf_program__attach_uprobe(skel->progs.handle_uretprobe, true /* retprobe */, -1 /* any pid */, "/proc/self/exe", uprobe_offset); if (CHECK(IS_ERR(uretprobe_link), "attach_uretprobe", - "err %ld\n", PTR_ERR(uretprobe_link))) { - uretprobe_link = NULL; + "err %ld\n", PTR_ERR(uretprobe_link))) goto cleanup; - } + skel->links.handle_uretprobe = uretprobe_link; /* trigger & validate kprobe && kretprobe */ usleep(1); - err = bpf_map_lookup_elem(results_map_fd, &kprobe_idx, &res); - if (CHECK(err, "get_kprobe_res", - "failed to get kprobe res: %d\n", err)) + if (CHECK(skel->bss->kprobe_res != 1, "check_kprobe_res", + "wrong kprobe res: %d\n", skel->bss->kprobe_res)) goto cleanup; - if (CHECK(res != kprobe_idx + 1, "check_kprobe_res", - "wrong kprobe res: %d\n", res)) - goto cleanup; - - err = bpf_map_lookup_elem(results_map_fd, &kretprobe_idx, &res); - if (CHECK(err, "get_kretprobe_res", - "failed to get kretprobe res: %d\n", err)) - goto cleanup; - if (CHECK(res != kretprobe_idx + 1, "check_kretprobe_res", - "wrong kretprobe res: %d\n", res)) + if (CHECK(skel->bss->kretprobe_res != 2, "check_kretprobe_res", + "wrong kretprobe res: %d\n", skel->bss->kretprobe_res)) goto cleanup; /* trigger & validate uprobe & uretprobe */ get_base_addr(); - err = bpf_map_lookup_elem(results_map_fd, &uprobe_idx, &res); - if (CHECK(err, "get_uprobe_res", - "failed to get uprobe res: %d\n", err)) - goto cleanup; - if (CHECK(res != uprobe_idx + 1, "check_uprobe_res", - "wrong uprobe res: %d\n", res)) - goto cleanup; - - err = bpf_map_lookup_elem(results_map_fd, &uretprobe_idx, &res); - if (CHECK(err, "get_uretprobe_res", - "failed to get uretprobe res: %d\n", err)) + if (CHECK(skel->bss->uprobe_res != 3, "check_uprobe_res", + "wrong uprobe res: %d\n", skel->bss->uprobe_res)) goto cleanup; - if (CHECK(res != uretprobe_idx + 1, "check_uretprobe_res", - "wrong uretprobe res: %d\n", res)) + if (CHECK(skel->bss->uretprobe_res != 4, "check_uretprobe_res", + "wrong uretprobe res: %d\n", skel->bss->uretprobe_res)) goto cleanup; cleanup: - bpf_link__destroy(kprobe_link); - bpf_link__destroy(kretprobe_link); - bpf_link__destroy(uprobe_link); - bpf_link__destroy(uretprobe_link); - bpf_object__close(obj); + test_attach_probe__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c new file mode 100644 index 000000000000..5b13f2c6c402 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_autodetach.c @@ -0,0 +1,111 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include "cgroup_helpers.h" + +#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +static int prog_load(void) +{ + struct bpf_insn prog[] = { + BPF_MOV64_IMM(BPF_REG_0, 1), /* r0 = 1 */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); +} + +void test_cgroup_attach_autodetach(void) +{ + __u32 duration = 0, prog_cnt = 4, attach_flags; + int allow_prog[2] = {-1}; + __u32 prog_ids[2] = {0}; + void *ptr = NULL; + int cg = 0, i; + int attempts; + + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { + allow_prog[i] = prog_load(); + if (CHECK(allow_prog[i] < 0, "prog_load", + "verifier output:\n%s\n-------\n", bpf_log_buf)) + goto err; + } + + if (CHECK_FAIL(setup_cgroup_environment())) + goto err; + + /* create a cgroup, attach two programs and remember their ids */ + cg = create_and_get_cgroup("/cg_autodetach"); + if (CHECK_FAIL(cg < 0)) + goto err; + + if (CHECK_FAIL(join_cgroup("/cg_autodetach"))) + goto err; + + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) + if (CHECK(bpf_prog_attach(allow_prog[i], cg, + BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI), + "prog_attach", "prog[%d], errno=%d\n", i, errno)) + goto err; + + /* make sure that programs are attached and run some traffic */ + if (CHECK(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags, + prog_ids, &prog_cnt), + "prog_query", "errno=%d\n", errno)) + goto err; + if (CHECK_FAIL(system(PING_CMD))) + goto err; + + /* allocate some memory (4Mb) to pin the original cgroup */ + ptr = malloc(4 * (1 << 20)); + if (CHECK_FAIL(!ptr)) + goto err; + + /* close programs and cgroup fd */ + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { + close(allow_prog[i]); + allow_prog[i] = -1; + } + + close(cg); + cg = 0; + + /* leave the cgroup and remove it. don't detach programs */ + cleanup_cgroup_environment(); + + /* wait for the asynchronous auto-detachment. + * wait for no more than 5 sec and give up. + */ + for (i = 0; i < ARRAY_SIZE(prog_ids); i++) { + for (attempts = 5; attempts >= 0; attempts--) { + int fd = bpf_prog_get_fd_by_id(prog_ids[i]); + + if (fd < 0) + break; + + /* don't leave the fd open */ + close(fd); + + if (CHECK_FAIL(!attempts)) + goto err; + + sleep(1); + } + } + +err: + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) + if (allow_prog[i] >= 0) + close(allow_prog[i]); + if (cg) + close(cg); + free(ptr); + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c new file mode 100644 index 000000000000..2ff21dbce179 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_multi.c @@ -0,0 +1,285 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include "cgroup_helpers.h" + +#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +static int map_fd = -1; + +static int prog_load_cnt(int verdict, int val) +{ + int cgroup_storage_fd, percpu_cgroup_storage_fd; + + if (map_fd < 0) + map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); + if (map_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); + if (cgroup_storage_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + percpu_cgroup_storage_fd = bpf_create_map( + BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, + sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); + if (percpu_cgroup_storage_fd < 0) { + printf("failed to create map '%s'\n", strerror(errno)); + return -1; + } + + struct bpf_insn prog[] = { + BPF_MOV32_IMM(BPF_REG_0, 0), + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ + BPF_LD_MAP_FD(BPF_REG_1, map_fd), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), + BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), + BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ + + BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_MOV64_IMM(BPF_REG_1, val), + BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), + + BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd), + BPF_MOV64_IMM(BPF_REG_2, 0), + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), + BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), + BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), + + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + int ret; + + ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); + + close(cgroup_storage_fd); + return ret; +} + +void test_cgroup_attach_multi(void) +{ + __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; + int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; + DECLARE_LIBBPF_OPTS(bpf_prog_attach_opts, attach_opts); + int allow_prog[7] = {-1}; + unsigned long long value; + __u32 duration = 0; + int i = 0; + + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { + allow_prog[i] = prog_load_cnt(1, 1 << i); + if (CHECK(allow_prog[i] < 0, "prog_load", + "verifier output:\n%s\n-------\n", bpf_log_buf)) + goto err; + } + + if (CHECK_FAIL(setup_cgroup_environment())) + goto err; + + cg1 = create_and_get_cgroup("/cg1"); + if (CHECK_FAIL(cg1 < 0)) + goto err; + cg2 = create_and_get_cgroup("/cg1/cg2"); + if (CHECK_FAIL(cg2 < 0)) + goto err; + cg3 = create_and_get_cgroup("/cg1/cg2/cg3"); + if (CHECK_FAIL(cg3 < 0)) + goto err; + cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4"); + if (CHECK_FAIL(cg4 < 0)) + goto err; + cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5"); + if (CHECK_FAIL(cg5 < 0)) + goto err; + + if (CHECK_FAIL(join_cgroup("/cg1/cg2/cg3/cg4/cg5"))) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI), + "prog0_attach_to_cg1_multi", "errno=%d\n", errno)) + goto err; + + if (CHECK(!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI), + "fail_same_prog_attach_to_cg1", "unexpected success\n")) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI), + "prog1_attach_to_cg1_multi", "errno=%d\n", errno)) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog2_attach_to_cg2_override", "errno=%d\n", errno)) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_MULTI), + "prog3_attach_to_cg3_multi", "errno=%d\n", errno)) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog4_attach_to_cg4_override", "errno=%d\n", errno)) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0), + "prog5_attach_to_cg5_none", "errno=%d\n", errno)) + goto err; + + CHECK_FAIL(system(PING_CMD)); + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); + CHECK_FAIL(value != 1 + 2 + 8 + 32); + + /* query the number of effective progs in cg5 */ + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, NULL, NULL, &prog_cnt)); + CHECK_FAIL(prog_cnt != 4); + /* retrieve prog_ids of effective progs in cg5 */ + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, &attach_flags, + prog_ids, &prog_cnt)); + CHECK_FAIL(prog_cnt != 4); + CHECK_FAIL(attach_flags != 0); + saved_prog_id = prog_ids[0]; + /* check enospc handling */ + prog_ids[0] = 0; + prog_cnt = 2; + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, &attach_flags, + prog_ids, &prog_cnt) != -1); + CHECK_FAIL(errno != ENOSPC); + CHECK_FAIL(prog_cnt != 4); + /* check that prog_ids are returned even when buffer is too small */ + CHECK_FAIL(prog_ids[0] != saved_prog_id); + /* retrieve prog_id of single attached prog in cg5 */ + prog_ids[0] = 0; + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL, + prog_ids, &prog_cnt)); + CHECK_FAIL(prog_cnt != 1); + CHECK_FAIL(prog_ids[0] != saved_prog_id); + + /* detach bottom program and ping again */ + if (CHECK(bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS), + "prog_detach_from_cg5", "errno=%d\n", errno)) + goto err; + + value = 0; + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); + CHECK_FAIL(system(PING_CMD)); + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); + CHECK_FAIL(value != 1 + 2 + 8 + 16); + + /* test replace */ + + attach_opts.flags = BPF_F_ALLOW_OVERRIDE | BPF_F_REPLACE; + attach_opts.replace_prog_fd = allow_prog[0]; + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "fail_prog_replace_override", "unexpected success\n")) + goto err; + CHECK_FAIL(errno != EINVAL); + + attach_opts.flags = BPF_F_REPLACE; + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "fail_prog_replace_no_multi", "unexpected success\n")) + goto err; + CHECK_FAIL(errno != EINVAL); + + attach_opts.flags = BPF_F_ALLOW_MULTI | BPF_F_REPLACE; + attach_opts.replace_prog_fd = -1; + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "fail_prog_replace_bad_fd", "unexpected success\n")) + goto err; + CHECK_FAIL(errno != EBADF); + + /* replacing a program that is not attached to cgroup should fail */ + attach_opts.replace_prog_fd = allow_prog[3]; + if (CHECK(!bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "fail_prog_replace_no_ent", "unexpected success\n")) + goto err; + CHECK_FAIL(errno != ENOENT); + + /* replace 1st from the top program */ + attach_opts.replace_prog_fd = allow_prog[0]; + if (CHECK(bpf_prog_attach_xattr(allow_prog[6], cg1, + BPF_CGROUP_INET_EGRESS, &attach_opts), + "prog_replace", "errno=%d\n", errno)) + goto err; + + value = 0; + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); + CHECK_FAIL(system(PING_CMD)); + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); + CHECK_FAIL(value != 64 + 2 + 8 + 16); + + /* detach 3rd from bottom program and ping again */ + if (CHECK(!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS), + "fail_prog_detach_from_cg3", "unexpected success\n")) + goto err; + + if (CHECK(bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS), + "prog3_detach_from_cg3", "errno=%d\n", errno)) + goto err; + + value = 0; + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); + CHECK_FAIL(system(PING_CMD)); + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); + CHECK_FAIL(value != 64 + 2 + 16); + + /* detach 2nd from bottom program and ping again */ + if (CHECK(bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS), + "prog_detach_from_cg4", "errno=%d\n", errno)) + goto err; + + value = 0; + CHECK_FAIL(bpf_map_update_elem(map_fd, &key, &value, 0)); + CHECK_FAIL(system(PING_CMD)); + CHECK_FAIL(bpf_map_lookup_elem(map_fd, &key, &value)); + CHECK_FAIL(value != 64 + 2 + 4); + + prog_cnt = 4; + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, + BPF_F_QUERY_EFFECTIVE, &attach_flags, + prog_ids, &prog_cnt)); + CHECK_FAIL(prog_cnt != 3); + CHECK_FAIL(attach_flags != 0); + CHECK_FAIL(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, NULL, + prog_ids, &prog_cnt)); + CHECK_FAIL(prog_cnt != 0); + +err: + for (i = 0; i < ARRAY_SIZE(allow_prog); i++) + if (allow_prog[i] >= 0) + close(allow_prog[i]); + close(cg1); + close(cg2); + close(cg3); + close(cg4); + close(cg5); + cleanup_cgroup_environment(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c new file mode 100644 index 000000000000..9d8cb48b99de --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cgroup_attach_override.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <test_progs.h> + +#include "cgroup_helpers.h" + +#define FOO "/foo" +#define BAR "/foo/bar/" +#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" + +char bpf_log_buf[BPF_LOG_BUF_SIZE]; + +static int prog_load(int verdict) +{ + struct bpf_insn prog[] = { + BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + + return bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, + prog, insns_cnt, "GPL", 0, + bpf_log_buf, BPF_LOG_BUF_SIZE); +} + +void test_cgroup_attach_override(void) +{ + int drop_prog = -1, allow_prog = -1, foo = -1, bar = -1; + __u32 duration = 0; + + allow_prog = prog_load(1); + if (CHECK(allow_prog < 0, "prog_load_allow", + "verifier output:\n%s\n-------\n", bpf_log_buf)) + goto err; + + drop_prog = prog_load(0); + if (CHECK(drop_prog < 0, "prog_load_drop", + "verifier output:\n%s\n-------\n", bpf_log_buf)) + goto err; + + foo = test__join_cgroup(FOO); + if (CHECK(foo < 0, "cgroup_join_foo", "cgroup setup failed\n")) + goto err; + + if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog_attach_drop_foo_override", + "attach prog to %s failed, errno=%d\n", FOO, errno)) + goto err; + + if (CHECK(!system(PING_CMD), "ping_fail", + "ping unexpectedly succeeded\n")) + goto err; + + bar = test__join_cgroup(BAR); + if (CHECK(bar < 0, "cgroup_join_bar", "cgroup setup failed\n")) + goto err; + + if (CHECK(!system(PING_CMD), "ping_fail", + "ping unexpectedly succeeded\n")) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog_attach_allow_bar_override", + "attach prog to %s failed, errno=%d\n", BAR, errno)) + goto err; + + if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n")) + goto err; + + if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS), + "prog_detach_bar", + "detach prog from %s failed, errno=%d\n", BAR, errno)) + goto err; + + if (CHECK(!system(PING_CMD), "ping_fail", + "ping unexpectedly succeeded\n")) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog_attach_allow_bar_override", + "attach prog to %s failed, errno=%d\n", BAR, errno)) + goto err; + + if (CHECK(bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS), + "prog_detach_foo", + "detach prog from %s failed, errno=%d\n", FOO, errno)) + goto err; + + if (CHECK(system(PING_CMD), "ping_ok", "ping failed\n")) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "prog_attach_allow_bar_override", + "attach prog to %s failed, errno=%d\n", BAR, errno)) + goto err; + + if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0), + "fail_prog_attach_allow_bar_none", + "attach prog to %s unexpectedly succeeded\n", BAR)) + goto err; + + if (CHECK(bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS), + "prog_detach_bar", + "detach prog from %s failed, errno=%d\n", BAR, errno)) + goto err; + + if (CHECK(!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS), + "fail_prog_detach_foo", + "double detach from %s unexpectedly succeeded\n", FOO)) + goto err; + + if (CHECK(bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0), + "prog_attach_allow_foo_none", + "attach prog to %s failed, errno=%d\n", FOO, errno)) + goto err; + + if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0), + "fail_prog_attach_allow_bar_none", + "attach prog to %s unexpectedly succeeded\n", BAR)) + goto err; + + if (CHECK(!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "fail_prog_attach_allow_bar_override", + "attach prog to %s unexpectedly succeeded\n", BAR)) + goto err; + + if (CHECK(!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, + BPF_F_ALLOW_OVERRIDE), + "fail_prog_attach_allow_foo_override", + "attach prog to %s unexpectedly succeeded\n", FOO)) + goto err; + + if (CHECK(bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0), + "prog_attach_drop_foo_none", + "attach prog to %s failed, errno=%d\n", FOO, errno)) + goto err; + +err: + close(foo); + close(bar); + close(allow_prog); + close(drop_prog); +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_extern.c b/tools/testing/selftests/bpf/prog_tests/core_extern.c new file mode 100644 index 000000000000..b093787e9448 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/core_extern.c @@ -0,0 +1,169 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <test_progs.h> +#include <sys/mman.h> +#include <sys/utsname.h> +#include <linux/version.h> +#include "test_core_extern.skel.h" + +static uint32_t get_kernel_version(void) +{ + uint32_t major, minor, patch; + struct utsname info; + + uname(&info); + if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) + return 0; + return KERNEL_VERSION(major, minor, patch); +} + +#define CFG "CONFIG_BPF_SYSCALL=n\n" + +static struct test_case { + const char *name; + const char *cfg; + bool fails; + struct test_core_extern__data data; +} test_cases[] = { + { .name = "default search path", .data = { .bpf_syscall = true } }, + { + .name = "custom values", + .cfg = "CONFIG_BPF_SYSCALL=n\n" + "CONFIG_TRISTATE=m\n" + "CONFIG_BOOL=y\n" + "CONFIG_CHAR=100\n" + "CONFIG_USHORT=30000\n" + "CONFIG_INT=123456\n" + "CONFIG_ULONG=0xDEADBEEFC0DE\n" + "CONFIG_STR=\"abracad\"\n" + "CONFIG_MISSING=0", + .data = { + .bpf_syscall = false, + .tristate_val = TRI_MODULE, + .bool_val = true, + .char_val = 100, + .ushort_val = 30000, + .int_val = 123456, + .ulong_val = 0xDEADBEEFC0DE, + .str_val = "abracad", + }, + }, + /* TRISTATE */ + { .name = "tristate (y)", .cfg = CFG"CONFIG_TRISTATE=y\n", + .data = { .tristate_val = TRI_YES } }, + { .name = "tristate (n)", .cfg = CFG"CONFIG_TRISTATE=n\n", + .data = { .tristate_val = TRI_NO } }, + { .name = "tristate (m)", .cfg = CFG"CONFIG_TRISTATE=m\n", + .data = { .tristate_val = TRI_MODULE } }, + { .name = "tristate (int)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=1" }, + { .name = "tristate (bad)", .fails = 1, .cfg = CFG"CONFIG_TRISTATE=M" }, + /* BOOL */ + { .name = "bool (y)", .cfg = CFG"CONFIG_BOOL=y\n", + .data = { .bool_val = true } }, + { .name = "bool (n)", .cfg = CFG"CONFIG_BOOL=n\n", + .data = { .bool_val = false } }, + { .name = "bool (tristate)", .fails = 1, .cfg = CFG"CONFIG_BOOL=m" }, + { .name = "bool (int)", .fails = 1, .cfg = CFG"CONFIG_BOOL=1" }, + /* CHAR */ + { .name = "char (tristate)", .cfg = CFG"CONFIG_CHAR=m\n", + .data = { .char_val = 'm' } }, + { .name = "char (bad)", .fails = 1, .cfg = CFG"CONFIG_CHAR=q\n" }, + { .name = "char (empty)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\n" }, + { .name = "char (str)", .fails = 1, .cfg = CFG"CONFIG_CHAR=\"y\"\n" }, + /* STRING */ + { .name = "str (empty)", .cfg = CFG"CONFIG_STR=\"\"\n", + .data = { .str_val = "\0\0\0\0\0\0\0" } }, + { .name = "str (padded)", .cfg = CFG"CONFIG_STR=\"abra\"\n", + .data = { .str_val = "abra\0\0\0" } }, + { .name = "str (too long)", .cfg = CFG"CONFIG_STR=\"abracada\"\n", + .data = { .str_val = "abracad" } }, + { .name = "str (no value)", .fails = 1, .cfg = CFG"CONFIG_STR=\n" }, + { .name = "str (bad value)", .fails = 1, .cfg = CFG"CONFIG_STR=bla\n" }, + /* INTEGERS */ + { + .name = "integer forms", + .cfg = CFG + "CONFIG_CHAR=0xA\n" + "CONFIG_USHORT=0462\n" + "CONFIG_INT=-100\n" + "CONFIG_ULONG=+1000000000000", + .data = { + .char_val = 0xA, + .ushort_val = 0462, + .int_val = -100, + .ulong_val = 1000000000000, + }, + }, + { .name = "int (bad)", .fails = 1, .cfg = CFG"CONFIG_INT=abc" }, + { .name = "int (str)", .fails = 1, .cfg = CFG"CONFIG_INT=\"abc\"" }, + { .name = "int (empty)", .fails = 1, .cfg = CFG"CONFIG_INT=" }, + { .name = "int (mixed)", .fails = 1, .cfg = CFG"CONFIG_INT=123abc" }, + { .name = "int (max)", .cfg = CFG"CONFIG_INT=2147483647", + .data = { .int_val = 2147483647 } }, + { .name = "int (min)", .cfg = CFG"CONFIG_INT=-2147483648", + .data = { .int_val = -2147483648 } }, + { .name = "int (max+1)", .fails = 1, .cfg = CFG"CONFIG_INT=2147483648" }, + { .name = "int (min-1)", .fails = 1, .cfg = CFG"CONFIG_INT=-2147483649" }, + { .name = "ushort (max)", .cfg = CFG"CONFIG_USHORT=65535", + .data = { .ushort_val = 65535 } }, + { .name = "ushort (min)", .cfg = CFG"CONFIG_USHORT=0", + .data = { .ushort_val = 0 } }, + { .name = "ushort (max+1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=65536" }, + { .name = "ushort (min-1)", .fails = 1, .cfg = CFG"CONFIG_USHORT=-1" }, + { .name = "u64 (max)", .cfg = CFG"CONFIG_ULONG=0xffffffffffffffff", + .data = { .ulong_val = 0xffffffffffffffff } }, + { .name = "u64 (min)", .cfg = CFG"CONFIG_ULONG=0", + .data = { .ulong_val = 0 } }, + { .name = "u64 (max+1)", .fails = 1, .cfg = CFG"CONFIG_ULONG=0x10000000000000000" }, +}; + +void test_core_extern(void) +{ + const uint32_t kern_ver = get_kernel_version(); + int err, duration = 0, i, j; + struct test_core_extern *skel = NULL; + uint64_t *got, *exp; + int n = sizeof(*skel->data) / sizeof(uint64_t); + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + struct test_case *t = &test_cases[i]; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, + .kconfig = t->cfg, + ); + + if (!test__start_subtest(t->name)) + continue; + + skel = test_core_extern__open_opts(&opts); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) + goto cleanup; + err = test_core_extern__load(skel); + if (t->fails) { + CHECK(!err, "skel_load", + "shouldn't succeed open/load of skeleton\n"); + goto cleanup; + } else if (CHECK(err, "skel_load", + "failed to open/load skeleton\n")) { + goto cleanup; + } + err = test_core_extern__attach(skel); + if (CHECK(err, "attach_raw_tp", "failed attach: %d\n", err)) + goto cleanup; + + usleep(1); + + t->data.kern_ver = kern_ver; + t->data.missing_val = 0xDEADC0DE; + got = (uint64_t *)skel->data; + exp = (uint64_t *)&t->data; + for (j = 0; j < n; j++) { + CHECK(got[j] != exp[j], "check_res", + "result #%d: expected %lx, but got %lx\n", + j, exp[j], got[j]); + } +cleanup: + test_core_extern__destroy(skel); + skel = NULL; + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c index 05fe85281ff7..31e177adbdf1 100644 --- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c +++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c @@ -74,6 +74,7 @@ .b123 = 2, \ .c1c = 3, \ .d00d = 4, \ + .f10c = 0, \ }, \ .output_len = sizeof(struct core_reloc_arrays_output) \ } @@ -308,12 +309,15 @@ static struct core_reloc_test_case test_cases[] = { ARRAYS_CASE(arrays), ARRAYS_CASE(arrays___diff_arr_dim), ARRAYS_CASE(arrays___diff_arr_val_sz), + ARRAYS_CASE(arrays___equiv_zero_sz_arr), + ARRAYS_CASE(arrays___fixed_arr), ARRAYS_ERR_CASE(arrays___err_too_small), ARRAYS_ERR_CASE(arrays___err_too_shallow), ARRAYS_ERR_CASE(arrays___err_non_array), ARRAYS_ERR_CASE(arrays___err_wrong_val_type1), ARRAYS_ERR_CASE(arrays___err_wrong_val_type2), + ARRAYS_ERR_CASE(arrays___err_bad_zero_sz_arr), /* enum/ptr/int handling scenarios */ PRIMITIVES_CASE(primitives), diff --git a/tools/testing/selftests/bpf/prog_tests/cpu_mask.c b/tools/testing/selftests/bpf/prog_tests/cpu_mask.c new file mode 100644 index 000000000000..1fa1bdbaffa9 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/cpu_mask.c @@ -0,0 +1,78 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> +#include <bpf/btf.h> +#include "libbpf_internal.h" + +static int duration = 0; + +static void validate_mask(int case_nr, const char *exp, bool *mask, int n) +{ + int i; + + for (i = 0; exp[i]; i++) { + if (exp[i] == '1') { + if (CHECK(i + 1 > n, "mask_short", + "case #%d: mask too short, got n=%d, need at least %d\n", + case_nr, n, i + 1)) + return; + CHECK(!mask[i], "cpu_not_set", + "case #%d: mask differs, expected cpu#%d SET\n", + case_nr, i); + } else { + CHECK(i < n && mask[i], "cpu_set", + "case #%d: mask differs, expected cpu#%d UNSET\n", + case_nr, i); + } + } + CHECK(i < n, "mask_long", + "case #%d: mask too long, got n=%d, expected at most %d\n", + case_nr, n, i); +} + +static struct { + const char *cpu_mask; + const char *expect; + bool fails; +} test_cases[] = { + { "0\n", "1", false }, + { "0,2\n", "101", false }, + { "0-2\n", "111", false }, + { "0-2,3-4\n", "11111", false }, + { "0", "1", false }, + { "0-2", "111", false }, + { "0,2", "101", false }, + { "0,1-3", "1111", false }, + { "0,1,2,3", "1111", false }, + { "0,2-3,5", "101101", false }, + { "3-3", "0001", false }, + { "2-4,6,9-10", "00111010011", false }, + /* failure cases */ + { "", "", true }, + { "0-", "", true }, + { "0 ", "", true }, + { "0_1", "", true }, + { "1-0", "", true }, + { "-1", "", true }, +}; + +void test_cpu_mask() +{ + int i, err, n; + bool *mask; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + mask = NULL; + err = parse_cpu_mask_str(test_cases[i].cpu_mask, &mask, &n); + if (test_cases[i].fails) { + CHECK(!err, "should_fail", + "case #%d: parsing should fail!\n", i + 1); + } else { + if (CHECK(err, "parse_err", + "case #%d: cpu mask parsing failed: %d\n", + i + 1, err)) + continue; + validate_mask(i + 1, test_cases[i].expect, mask, n); + } + free(mask); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c index 40bcff2cc274..235ac4f67f5b 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_fexit.c @@ -1,90 +1,55 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> +#include "test_pkt_access.skel.h" +#include "fentry_test.skel.h" +#include "fexit_test.skel.h" void test_fentry_fexit(void) { - struct bpf_prog_load_attr attr_fentry = { - .file = "./fentry_test.o", - }; - struct bpf_prog_load_attr attr_fexit = { - .file = "./fexit_test.o", - }; - - struct bpf_object *obj_fentry = NULL, *obj_fexit = NULL, *pkt_obj; - struct bpf_map *data_map_fentry, *data_map_fexit; - char fentry_name[] = "fentry/bpf_fentry_testX"; - char fexit_name[] = "fexit/bpf_fentry_testX"; - int err, pkt_fd, kfree_skb_fd, i; - struct bpf_link *link[12] = {}; - struct bpf_program *prog[12]; - __u32 duration, retval; - const int zero = 0; - u64 result[12]; - - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, - &pkt_obj, &pkt_fd); - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + struct test_pkt_access *pkt_skel = NULL; + struct fentry_test *fentry_skel = NULL; + struct fexit_test *fexit_skel = NULL; + __u64 *fentry_res, *fexit_res; + __u32 duration = 0, retval; + int err, pkt_fd, i; + + pkt_skel = test_pkt_access__open_and_load(); + if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) return; - err = bpf_prog_load_xattr(&attr_fentry, &obj_fentry, &kfree_skb_fd); - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) + fentry_skel = fentry_test__open_and_load(); + if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) goto close_prog; - err = bpf_prog_load_xattr(&attr_fexit, &obj_fexit, &kfree_skb_fd); - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) + fexit_skel = fexit_test__open_and_load(); + if (CHECK(!fexit_skel, "fexit_skel_load", "fexit skeleton failed\n")) goto close_prog; - for (i = 0; i < 6; i++) { - fentry_name[sizeof(fentry_name) - 2] = '1' + i; - prog[i] = bpf_object__find_program_by_title(obj_fentry, fentry_name); - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fentry_name)) - goto close_prog; - link[i] = bpf_program__attach_trace(prog[i]); - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) - goto close_prog; - } - data_map_fentry = bpf_object__find_map_by_name(obj_fentry, "fentry_t.bss"); - if (CHECK(!data_map_fentry, "find_data_map", "data map not found\n")) + err = fentry_test__attach(fentry_skel); + if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) goto close_prog; - - for (i = 6; i < 12; i++) { - fexit_name[sizeof(fexit_name) - 2] = '1' + i - 6; - prog[i] = bpf_object__find_program_by_title(obj_fexit, fexit_name); - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", fexit_name)) - goto close_prog; - link[i] = bpf_program__attach_trace(prog[i]); - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) - goto close_prog; - } - data_map_fexit = bpf_object__find_map_by_name(obj_fexit, "fexit_te.bss"); - if (CHECK(!data_map_fexit, "find_data_map", "data map not found\n")) + err = fexit_test__attach(fexit_skel); + if (CHECK(err, "fexit_attach", "fexit attach failed: %d\n", err)) goto close_prog; + pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); - err = bpf_map_lookup_elem(bpf_map__fd(data_map_fentry), &zero, &result); - if (CHECK(err, "get_result", - "failed to get output data: %d\n", err)) - goto close_prog; - - err = bpf_map_lookup_elem(bpf_map__fd(data_map_fexit), &zero, result + 6); - if (CHECK(err, "get_result", - "failed to get output data: %d\n", err)) - goto close_prog; - - for (i = 0; i < 12; i++) - if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", - i % 6 + 1, result[i])) - goto close_prog; + fentry_res = (__u64 *)fentry_skel->bss; + fexit_res = (__u64 *)fexit_skel->bss; + printf("%lld\n", fentry_skel->bss->test1_result); + for (i = 0; i < 6; i++) { + CHECK(fentry_res[i] != 1, "result", + "fentry_test%d failed err %lld\n", i + 1, fentry_res[i]); + CHECK(fexit_res[i] != 1, "result", + "fexit_test%d failed err %lld\n", i + 1, fexit_res[i]); + } close_prog: - for (i = 0; i < 12; i++) - if (!IS_ERR_OR_NULL(link[i])) - bpf_link__destroy(link[i]); - bpf_object__close(obj_fentry); - bpf_object__close(obj_fexit); - bpf_object__close(pkt_obj); + test_pkt_access__destroy(pkt_skel); + fentry_test__destroy(fentry_skel); + fexit_test__destroy(fexit_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/fentry_test.c b/tools/testing/selftests/bpf/prog_tests/fentry_test.c index 9fb103193878..e1a379f5f7d2 100644 --- a/tools/testing/selftests/bpf/prog_tests/fentry_test.c +++ b/tools/testing/selftests/bpf/prog_tests/fentry_test.c @@ -1,64 +1,43 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2019 Facebook */ #include <test_progs.h> +#include "test_pkt_access.skel.h" +#include "fentry_test.skel.h" void test_fentry_test(void) { - struct bpf_prog_load_attr attr = { - .file = "./fentry_test.o", - }; - - char prog_name[] = "fentry/bpf_fentry_testX"; - struct bpf_object *obj = NULL, *pkt_obj; - int err, pkt_fd, kfree_skb_fd, i; - struct bpf_link *link[6] = {}; - struct bpf_program *prog[6]; + struct test_pkt_access *pkt_skel = NULL; + struct fentry_test *fentry_skel = NULL; + int err, pkt_fd, i; __u32 duration, retval; - struct bpf_map *data_map; - const int zero = 0; - u64 result[6]; + __u64 *result; - err = bpf_prog_load("./test_pkt_access.o", BPF_PROG_TYPE_SCHED_CLS, - &pkt_obj, &pkt_fd); - if (CHECK(err, "prog_load sched cls", "err %d errno %d\n", err, errno)) + pkt_skel = test_pkt_access__open_and_load(); + if (CHECK(!pkt_skel, "pkt_skel_load", "pkt_access skeleton failed\n")) return; - err = bpf_prog_load_xattr(&attr, &obj, &kfree_skb_fd); - if (CHECK(err, "prog_load fail", "err %d errno %d\n", err, errno)) - goto close_prog; + fentry_skel = fentry_test__open_and_load(); + if (CHECK(!fentry_skel, "fentry_skel_load", "fentry skeleton failed\n")) + goto cleanup; - for (i = 0; i < 6; i++) { - prog_name[sizeof(prog_name) - 2] = '1' + i; - prog[i] = bpf_object__find_program_by_title(obj, prog_name); - if (CHECK(!prog[i], "find_prog", "prog %s not found\n", prog_name)) - goto close_prog; - link[i] = bpf_program__attach_trace(prog[i]); - if (CHECK(IS_ERR(link[i]), "attach_trace", "failed to link\n")) - goto close_prog; - } - data_map = bpf_object__find_map_by_name(obj, "fentry_t.bss"); - if (CHECK(!data_map, "find_data_map", "data map not found\n")) - goto close_prog; + err = fentry_test__attach(fentry_skel); + if (CHECK(err, "fentry_attach", "fentry attach failed: %d\n", err)) + goto cleanup; + pkt_fd = bpf_program__fd(pkt_skel->progs.test_pkt_access); err = bpf_prog_test_run(pkt_fd, 1, &pkt_v6, sizeof(pkt_v6), NULL, NULL, &retval, &duration); CHECK(err || retval, "ipv6", "err %d errno %d retval %d duration %d\n", err, errno, retval, duration); - err = bpf_map_lookup_elem(bpf_map__fd(data_map), &zero, &result); - if (CHECK(err, "get_result", - "failed to get output data: %d\n", err)) - goto close_prog; - - for (i = 0; i < 6; i++) - if (CHECK(result[i] != 1, "result", "bpf_fentry_test%d failed err %ld\n", - i + 1, result[i])) - goto close_prog; + result = (__u64 *)fentry_skel->bss; + for (i = 0; i < 6; i++) { + if (CHECK(result[i] != 1, "result", + "fentry_test%d failed err %lld\n", i + 1, result[i])) + goto cleanup; + } -close_prog: - for (i = 0; i < 6; i++) - if (!IS_ERR_OR_NULL(link[i])) - bpf_link__destroy(link[i]); - bpf_object__close(obj); - bpf_object__close(pkt_obj); +cleanup: + fentry_test__destroy(fentry_skel); + test_pkt_access__destroy(pkt_skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/mmap.c b/tools/testing/selftests/bpf/prog_tests/mmap.c index 051a6d48762c..16a814eb4d64 100644 --- a/tools/testing/selftests/bpf/prog_tests/mmap.c +++ b/tools/testing/selftests/bpf/prog_tests/mmap.c @@ -1,16 +1,12 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> #include <sys/mman.h> +#include "test_mmap.skel.h" struct map_data { __u64 val[512 * 4]; }; -struct bss_data { - __u64 in_val; - __u64 out_val; -}; - static size_t roundup_page(size_t sz) { long page_size = sysconf(_SC_PAGE_SIZE); @@ -19,41 +15,25 @@ static size_t roundup_page(size_t sz) void test_mmap(void) { - const char *file = "test_mmap.o"; - const char *probe_name = "raw_tracepoint/sys_enter"; - const char *tp_name = "sys_enter"; - const size_t bss_sz = roundup_page(sizeof(struct bss_data)); + const size_t bss_sz = roundup_page(sizeof(struct test_mmap__bss)); const size_t map_sz = roundup_page(sizeof(struct map_data)); const int zero = 0, one = 1, two = 2, far = 1500; const long page_size = sysconf(_SC_PAGE_SIZE); int err, duration = 0, i, data_map_fd; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_link *link = NULL; struct bpf_map *data_map, *bss_map; void *bss_mmaped = NULL, *map_mmaped = NULL, *tmp1, *tmp2; - volatile struct bss_data *bss_data; - volatile struct map_data *map_data; + struct test_mmap__bss *bss_data; + struct map_data *map_data; + struct test_mmap *skel; __u64 val = 0; - obj = bpf_object__open_file("test_mmap.o", NULL); - if (CHECK(IS_ERR(obj), "obj_open", "failed to open '%s': %ld\n", - file, PTR_ERR(obj))) + + skel = test_mmap__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) return; - prog = bpf_object__find_program_by_title(obj, probe_name); - if (CHECK(!prog, "find_probe", "prog '%s' not found\n", probe_name)) - goto cleanup; - err = bpf_object__load(obj); - if (CHECK(err, "obj_load", "failed to load prog '%s': %d\n", - probe_name, err)) - goto cleanup; - bss_map = bpf_object__find_map_by_name(obj, "test_mma.bss"); - if (CHECK(!bss_map, "find_bss_map", ".bss map not found\n")) - goto cleanup; - data_map = bpf_object__find_map_by_name(obj, "data_map"); - if (CHECK(!data_map, "find_data_map", "data_map map not found\n")) - goto cleanup; + bss_map = skel->maps.bss; + data_map = skel->maps.data_map; data_map_fd = bpf_map__fd(data_map); bss_mmaped = mmap(NULL, bss_sz, PROT_READ | PROT_WRITE, MAP_SHARED, @@ -77,13 +57,15 @@ void test_mmap(void) CHECK_FAIL(bss_data->in_val); CHECK_FAIL(bss_data->out_val); + CHECK_FAIL(skel->bss->in_val); + CHECK_FAIL(skel->bss->out_val); CHECK_FAIL(map_data->val[0]); CHECK_FAIL(map_data->val[1]); CHECK_FAIL(map_data->val[2]); CHECK_FAIL(map_data->val[far]); - link = bpf_program__attach_raw_tracepoint(prog, tp_name); - if (CHECK(IS_ERR(link), "attach_raw_tp", "err %ld\n", PTR_ERR(link))) + err = test_mmap__attach(skel); + if (CHECK(err, "attach_raw_tp", "err %d\n", err)) goto cleanup; bss_data->in_val = 123; @@ -94,6 +76,8 @@ void test_mmap(void) CHECK_FAIL(bss_data->in_val != 123); CHECK_FAIL(bss_data->out_val != 123); + CHECK_FAIL(skel->bss->in_val != 123); + CHECK_FAIL(skel->bss->out_val != 123); CHECK_FAIL(map_data->val[0] != 111); CHECK_FAIL(map_data->val[1] != 222); CHECK_FAIL(map_data->val[2] != 123); @@ -160,6 +144,8 @@ void test_mmap(void) usleep(1); CHECK_FAIL(bss_data->in_val != 321); CHECK_FAIL(bss_data->out_val != 321); + CHECK_FAIL(skel->bss->in_val != 321); + CHECK_FAIL(skel->bss->out_val != 321); CHECK_FAIL(map_data->val[0] != 111); CHECK_FAIL(map_data->val[1] != 222); CHECK_FAIL(map_data->val[2] != 321); @@ -203,6 +189,8 @@ void test_mmap(void) map_data = tmp2; CHECK_FAIL(bss_data->in_val != 321); CHECK_FAIL(bss_data->out_val != 321); + CHECK_FAIL(skel->bss->in_val != 321); + CHECK_FAIL(skel->bss->out_val != 321); CHECK_FAIL(map_data->val[0] != 111); CHECK_FAIL(map_data->val[1] != 222); CHECK_FAIL(map_data->val[2] != 321); @@ -214,7 +202,5 @@ cleanup: CHECK_FAIL(munmap(bss_mmaped, bss_sz)); if (map_mmaped) CHECK_FAIL(munmap(map_mmaped, map_sz)); - if (!IS_ERR_OR_NULL(link)) - bpf_link__destroy(link); - bpf_object__close(obj); + test_mmap__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c index 3003fddc0613..cf6c87936c69 100644 --- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c +++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c @@ -4,6 +4,7 @@ #include <sched.h> #include <sys/socket.h> #include <test_progs.h> +#include "libbpf_internal.h" static void on_sample(void *ctx, int cpu, void *data, __u32 size) { @@ -19,7 +20,7 @@ static void on_sample(void *ctx, int cpu, void *data, __u32 size) void test_perf_buffer(void) { - int err, prog_fd, nr_cpus, i, duration = 0; + int err, prog_fd, on_len, nr_on_cpus = 0, nr_cpus, i, duration = 0; const char *prog_name = "kprobe/sys_nanosleep"; const char *file = "./test_perf_buffer.o"; struct perf_buffer_opts pb_opts = {}; @@ -29,15 +30,27 @@ void test_perf_buffer(void) struct bpf_object *obj; struct perf_buffer *pb; struct bpf_link *link; + bool *online; nr_cpus = libbpf_num_possible_cpus(); if (CHECK(nr_cpus < 0, "nr_cpus", "err %d\n", nr_cpus)) return; + err = parse_cpu_mask_file("/sys/devices/system/cpu/online", + &online, &on_len); + if (CHECK(err, "nr_on_cpus", "err %d\n", err)) + return; + + for (i = 0; i < on_len; i++) + if (online[i]) + nr_on_cpus++; + /* load program */ err = bpf_prog_load(file, BPF_PROG_TYPE_KPROBE, &obj, &prog_fd); - if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) - return; + if (CHECK(err, "obj_load", "err %d errno %d\n", err, errno)) { + obj = NULL; + goto out_close; + } prog = bpf_object__find_program_by_title(obj, prog_name); if (CHECK(!prog, "find_probe", "prog '%s' not found\n", prog_name)) @@ -64,6 +77,11 @@ void test_perf_buffer(void) /* trigger kprobe on every CPU */ CPU_ZERO(&cpu_seen); for (i = 0; i < nr_cpus; i++) { + if (i >= on_len || !online[i]) { + printf("skipping offline CPU #%d\n", i); + continue; + } + CPU_ZERO(&cpu_set); CPU_SET(i, &cpu_set); @@ -81,8 +99,8 @@ void test_perf_buffer(void) if (CHECK(err < 0, "perf_buffer__poll", "err %d\n", err)) goto out_free_pb; - if (CHECK(CPU_COUNT(&cpu_seen) != nr_cpus, "seen_cpu_cnt", - "expect %d, seen %d\n", nr_cpus, CPU_COUNT(&cpu_seen))) + if (CHECK(CPU_COUNT(&cpu_seen) != nr_on_cpus, "seen_cpu_cnt", + "expect %d, seen %d\n", nr_on_cpus, CPU_COUNT(&cpu_seen))) goto out_free_pb; out_free_pb: @@ -91,4 +109,5 @@ out_detach: bpf_link__destroy(link); out_close: bpf_object__close(obj); + free(online); } diff --git a/tools/testing/selftests/bpf/prog_tests/probe_user.c b/tools/testing/selftests/bpf/prog_tests/probe_user.c index 8a3187dec048..7aecfd9e87d1 100644 --- a/tools/testing/selftests/bpf/prog_tests/probe_user.c +++ b/tools/testing/selftests/bpf/prog_tests/probe_user.c @@ -3,8 +3,7 @@ void test_probe_user(void) { -#define kprobe_name "__sys_connect" - const char *prog_name = "kprobe/" kprobe_name; + const char *prog_name = "kprobe/__sys_connect"; const char *obj_file = "./test_probe_user.o"; DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, ); int err, results_map_fd, sock_fd, duration = 0; @@ -33,8 +32,7 @@ void test_probe_user(void) "err %d\n", results_map_fd)) goto cleanup; - kprobe_link = bpf_program__attach_kprobe(kprobe_prog, false, - kprobe_name); + kprobe_link = bpf_program__attach(kprobe_prog); if (CHECK(IS_ERR(kprobe_link), "attach_kprobe", "err %ld\n", PTR_ERR(kprobe_link))) { kprobe_link = NULL; diff --git a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c index d90acc13d1ec..563e12120e77 100644 --- a/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c +++ b/tools/testing/selftests/bpf/prog_tests/rdonly_maps.c @@ -16,14 +16,11 @@ struct rdonly_map_subtest { void test_rdonly_maps(void) { - const char *prog_name_skip_loop = "raw_tracepoint/sys_enter:skip_loop"; - const char *prog_name_part_loop = "raw_tracepoint/sys_enter:part_loop"; - const char *prog_name_full_loop = "raw_tracepoint/sys_enter:full_loop"; const char *file = "test_rdonly_maps.o"; struct rdonly_map_subtest subtests[] = { - { "skip loop", prog_name_skip_loop, 0, 0 }, - { "part loop", prog_name_part_loop, 3, 2 + 3 + 4 }, - { "full loop", prog_name_full_loop, 4, 2 + 3 + 4 + 5 }, + { "skip loop", "skip_loop", 0, 0 }, + { "part loop", "part_loop", 3, 2 + 3 + 4 }, + { "full loop", "full_loop", 4, 2 + 3 + 4 + 5 }, }; int i, err, zero = 0, duration = 0; struct bpf_link *link = NULL; @@ -50,7 +47,7 @@ void test_rdonly_maps(void) if (!test__start_subtest(t->subtest_name)) continue; - prog = bpf_object__find_program_by_title(obj, t->prog_name); + prog = bpf_object__find_program_by_name(obj, t->prog_name); if (CHECK(!prog, "find_prog", "prog '%s' not found\n", t->prog_name)) goto cleanup; diff --git a/tools/testing/selftests/bpf/test_select_reuseport.c b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c index 7566c13eb51a..2c37ae7dc214 100644 --- a/tools/testing/selftests/bpf/test_select_reuseport.c +++ b/tools/testing/selftests/bpf/prog_tests/select_reuseport.c @@ -20,8 +20,11 @@ #include <bpf/libbpf.h> #include "bpf_rlimit.h" #include "bpf_util.h" + +#include "test_progs.h" #include "test_select_reuseport_common.h" +#define MAX_TEST_NAME 80 #define MIN_TCPHDR_LEN 20 #define UDPHDR_LEN 8 @@ -32,11 +35,11 @@ static int result_map, tmp_index_ovr_map, linum_map, data_check_map; static enum result expected_results[NR_RESULTS]; static int sk_fds[REUSEPORT_ARRAY_SIZE]; -static int reuseport_array, outer_map; +static int reuseport_array = -1, outer_map = -1; static int select_by_skb_data_prog; -static int saved_tcp_syncookie; +static int saved_tcp_syncookie = -1; static struct bpf_object *obj; -static int saved_tcp_fo; +static int saved_tcp_fo = -1; static __u32 index_zero; static int epfd; @@ -46,16 +49,21 @@ static union sa46 { sa_family_t family; } srv_sa; -#define CHECK(condition, tag, format...) ({ \ - int __ret = !!(condition); \ - if (__ret) { \ - printf("%s(%d):FAIL:%s ", __func__, __LINE__, tag); \ - printf(format); \ - exit(-1); \ +#define RET_IF(condition, tag, format...) ({ \ + if (CHECK_FAIL(condition)) { \ + printf(tag " " format); \ + return; \ + } \ +}) + +#define RET_ERR(condition, tag, format...) ({ \ + if (CHECK_FAIL(condition)) { \ + printf(tag " " format); \ + return -1; \ } \ }) -static void create_maps(void) +static int create_maps(void) { struct bpf_create_map_attr attr = {}; @@ -67,8 +75,8 @@ static void create_maps(void) attr.max_entries = REUSEPORT_ARRAY_SIZE; reuseport_array = bpf_create_map_xattr(&attr); - CHECK(reuseport_array == -1, "creating reuseport_array", - "reuseport_array:%d errno:%d\n", reuseport_array, errno); + RET_ERR(reuseport_array == -1, "creating reuseport_array", + "reuseport_array:%d errno:%d\n", reuseport_array, errno); /* Creating outer_map */ attr.name = "outer_map"; @@ -78,63 +86,61 @@ static void create_maps(void) attr.max_entries = 1; attr.inner_map_fd = reuseport_array; outer_map = bpf_create_map_xattr(&attr); - CHECK(outer_map == -1, "creating outer_map", - "outer_map:%d errno:%d\n", outer_map, errno); + RET_ERR(outer_map == -1, "creating outer_map", + "outer_map:%d errno:%d\n", outer_map, errno); + + return 0; } -static void prepare_bpf_obj(void) +static int prepare_bpf_obj(void) { struct bpf_program *prog; struct bpf_map *map; int err; - struct bpf_object_open_attr attr = { - .file = "test_select_reuseport_kern.o", - .prog_type = BPF_PROG_TYPE_SK_REUSEPORT, - }; - obj = bpf_object__open_xattr(&attr); - CHECK(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o", - "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj)); - - prog = bpf_program__next(NULL, obj); - CHECK(!prog, "get first bpf_program", "!prog\n"); - bpf_program__set_type(prog, attr.prog_type); + obj = bpf_object__open("test_select_reuseport_kern.o"); + RET_ERR(IS_ERR_OR_NULL(obj), "open test_select_reuseport_kern.o", + "obj:%p PTR_ERR(obj):%ld\n", obj, PTR_ERR(obj)); map = bpf_object__find_map_by_name(obj, "outer_map"); - CHECK(!map, "find outer_map", "!map\n"); + RET_ERR(!map, "find outer_map", "!map\n"); err = bpf_map__reuse_fd(map, outer_map); - CHECK(err, "reuse outer_map", "err:%d\n", err); + RET_ERR(err, "reuse outer_map", "err:%d\n", err); err = bpf_object__load(obj); - CHECK(err, "load bpf_object", "err:%d\n", err); + RET_ERR(err, "load bpf_object", "err:%d\n", err); + prog = bpf_program__next(NULL, obj); + RET_ERR(!prog, "get first bpf_program", "!prog\n"); select_by_skb_data_prog = bpf_program__fd(prog); - CHECK(select_by_skb_data_prog == -1, "get prog fd", - "select_by_skb_data_prog:%d\n", select_by_skb_data_prog); + RET_ERR(select_by_skb_data_prog == -1, "get prog fd", + "select_by_skb_data_prog:%d\n", select_by_skb_data_prog); map = bpf_object__find_map_by_name(obj, "result_map"); - CHECK(!map, "find result_map", "!map\n"); + RET_ERR(!map, "find result_map", "!map\n"); result_map = bpf_map__fd(map); - CHECK(result_map == -1, "get result_map fd", - "result_map:%d\n", result_map); + RET_ERR(result_map == -1, "get result_map fd", + "result_map:%d\n", result_map); map = bpf_object__find_map_by_name(obj, "tmp_index_ovr_map"); - CHECK(!map, "find tmp_index_ovr_map", "!map\n"); + RET_ERR(!map, "find tmp_index_ovr_map\n", "!map"); tmp_index_ovr_map = bpf_map__fd(map); - CHECK(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd", - "tmp_index_ovr_map:%d\n", tmp_index_ovr_map); + RET_ERR(tmp_index_ovr_map == -1, "get tmp_index_ovr_map fd", + "tmp_index_ovr_map:%d\n", tmp_index_ovr_map); map = bpf_object__find_map_by_name(obj, "linum_map"); - CHECK(!map, "find linum_map", "!map\n"); + RET_ERR(!map, "find linum_map", "!map\n"); linum_map = bpf_map__fd(map); - CHECK(linum_map == -1, "get linum_map fd", - "linum_map:%d\n", linum_map); + RET_ERR(linum_map == -1, "get linum_map fd", + "linum_map:%d\n", linum_map); map = bpf_object__find_map_by_name(obj, "data_check_map"); - CHECK(!map, "find data_check_map", "!map\n"); + RET_ERR(!map, "find data_check_map", "!map\n"); data_check_map = bpf_map__fd(map); - CHECK(data_check_map == -1, "get data_check_map fd", - "data_check_map:%d\n", data_check_map); + RET_ERR(data_check_map == -1, "get data_check_map fd", + "data_check_map:%d\n", data_check_map); + + return 0; } static void sa46_init_loopback(union sa46 *sa, sa_family_t family) @@ -163,65 +169,73 @@ static int read_int_sysctl(const char *sysctl) int fd, ret; fd = open(sysctl, 0); - CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n", - sysctl, fd, errno); + RET_ERR(fd == -1, "open(sysctl)", + "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno); ret = read(fd, buf, sizeof(buf)); - CHECK(ret <= 0, "read(sysctl)", "sysctl:%s ret:%d errno:%d\n", - sysctl, ret, errno); - close(fd); + RET_ERR(ret <= 0, "read(sysctl)", + "sysctl:%s ret:%d errno:%d\n", sysctl, ret, errno); + close(fd); return atoi(buf); } -static void write_int_sysctl(const char *sysctl, int v) +static int write_int_sysctl(const char *sysctl, int v) { int fd, ret, size; char buf[16]; fd = open(sysctl, O_RDWR); - CHECK(fd == -1, "open(sysctl)", "sysctl:%s fd:%d errno:%d\n", - sysctl, fd, errno); + RET_ERR(fd == -1, "open(sysctl)", + "sysctl:%s fd:%d errno:%d\n", sysctl, fd, errno); size = snprintf(buf, sizeof(buf), "%d", v); ret = write(fd, buf, size); - CHECK(ret != size, "write(sysctl)", - "sysctl:%s ret:%d size:%d errno:%d\n", sysctl, ret, size, errno); + RET_ERR(ret != size, "write(sysctl)", + "sysctl:%s ret:%d size:%d errno:%d\n", + sysctl, ret, size, errno); + close(fd); + return 0; } static void restore_sysctls(void) { - write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo); - write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie); + if (saved_tcp_fo != -1) + write_int_sysctl(TCP_FO_SYSCTL, saved_tcp_fo); + if (saved_tcp_syncookie != -1) + write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, saved_tcp_syncookie); } -static void enable_fastopen(void) +static int enable_fastopen(void) { int fo; fo = read_int_sysctl(TCP_FO_SYSCTL); - write_int_sysctl(TCP_FO_SYSCTL, fo | 7); + if (fo < 0) + return -1; + + return write_int_sysctl(TCP_FO_SYSCTL, fo | 7); } -static void enable_syncookie(void) +static int enable_syncookie(void) { - write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2); + return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 2); } -static void disable_syncookie(void) +static int disable_syncookie(void) { - write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0); + return write_int_sysctl(TCP_SYNCOOKIE_SYSCTL, 0); } -static __u32 get_linum(void) +static long get_linum(void) { __u32 linum; int err; err = bpf_map_lookup_elem(linum_map, &index_zero, &linum); - CHECK(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n", - err, errno); + RET_ERR(err == -1, "lookup_elem(linum_map)", "err:%d errno:%d\n", + err, errno); return linum; } @@ -237,12 +251,12 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, addrlen = sizeof(cli_sa); err = getsockname(cli_fd, (struct sockaddr *)&cli_sa, &addrlen); - CHECK(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n", - err, errno); + RET_IF(err == -1, "getsockname(cli_fd)", "err:%d errno:%d\n", + err, errno); err = bpf_map_lookup_elem(data_check_map, &index_zero, &result); - CHECK(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n", - err, errno); + RET_IF(err == -1, "lookup_elem(data_check_map)", "err:%d errno:%d\n", + err, errno); if (type == SOCK_STREAM) { expected.len = MIN_TCPHDR_LEN; @@ -284,22 +298,22 @@ static void check_data(int type, sa_family_t family, const struct cmd *cmd, printf("expected: (0x%x, %u, %u)\n", expected.eth_protocol, expected.ip_protocol, expected.bind_inany); - CHECK(1, "data_check result != expected", - "bpf_prog_linum:%u\n", get_linum()); + RET_IF(1, "data_check result != expected", + "bpf_prog_linum:%ld\n", get_linum()); } - CHECK(!result.hash, "data_check result.hash empty", - "result.hash:%u", result.hash); + RET_IF(!result.hash, "data_check result.hash empty", + "result.hash:%u", result.hash); expected.len += cmd ? sizeof(*cmd) : 0; if (type == SOCK_STREAM) - CHECK(expected.len > result.len, "expected.len > result.len", - "expected.len:%u result.len:%u bpf_prog_linum:%u\n", - expected.len, result.len, get_linum()); + RET_IF(expected.len > result.len, "expected.len > result.len", + "expected.len:%u result.len:%u bpf_prog_linum:%ld\n", + expected.len, result.len, get_linum()); else - CHECK(expected.len != result.len, "expected.len != result.len", - "expected.len:%u result.len:%u bpf_prog_linum:%u\n", - expected.len, result.len, get_linum()); + RET_IF(expected.len != result.len, "expected.len != result.len", + "expected.len:%u result.len:%u bpf_prog_linum:%ld\n", + expected.len, result.len, get_linum()); } static void check_results(void) @@ -310,8 +324,8 @@ static void check_results(void) for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &results[i]); - CHECK(err == -1, "lookup_elem(result_map)", - "i:%u err:%d errno:%d\n", i, err, errno); + RET_IF(err == -1, "lookup_elem(result_map)", + "i:%u err:%d errno:%d\n", i, err, errno); } for (i = 0; i < NR_RESULTS; i++) { @@ -337,10 +351,10 @@ static void check_results(void) printf(", %u", expected_results[i]); printf("]\n"); - CHECK(expected_results[broken] != results[broken], - "unexpected result", - "expected_results[%u] != results[%u] bpf_prog_linum:%u\n", - broken, broken, get_linum()); + RET_IF(expected_results[broken] != results[broken], + "unexpected result", + "expected_results[%u] != results[%u] bpf_prog_linum:%ld\n", + broken, broken, get_linum()); } static int send_data(int type, sa_family_t family, void *data, size_t len, @@ -350,17 +364,17 @@ static int send_data(int type, sa_family_t family, void *data, size_t len, int fd, err; fd = socket(family, type, 0); - CHECK(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno); + RET_ERR(fd == -1, "socket()", "fd:%d errno:%d\n", fd, errno); sa46_init_loopback(&cli_sa, family); err = bind(fd, (struct sockaddr *)&cli_sa, sizeof(cli_sa)); - CHECK(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno); + RET_ERR(fd == -1, "bind(cli_sa)", "err:%d errno:%d\n", err, errno); err = sendto(fd, data, len, MSG_FASTOPEN, (struct sockaddr *)&srv_sa, sizeof(srv_sa)); - CHECK(err != len && expected >= PASS, - "sendto()", "family:%u err:%d errno:%d expected:%d\n", - family, err, errno, expected); + RET_ERR(err != len && expected >= PASS, + "sendto()", "family:%u err:%d errno:%d expected:%d\n", + family, err, errno, expected); return fd; } @@ -375,47 +389,49 @@ static void do_test(int type, sa_family_t family, struct cmd *cmd, cli_fd = send_data(type, family, cmd, cmd ? sizeof(*cmd) : 0, expected); + if (cli_fd < 0) + return; nev = epoll_wait(epfd, &ev, 1, expected >= PASS ? 5 : 0); - CHECK((nev <= 0 && expected >= PASS) || - (nev > 0 && expected < PASS), - "nev <> expected", - "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n", - nev, expected, type, family, - cmd ? cmd->reuseport_index : -1, - cmd ? cmd->pass_on_failure : -1); + RET_IF((nev <= 0 && expected >= PASS) || + (nev > 0 && expected < PASS), + "nev <> expected", + "nev:%d expected:%d type:%d family:%d data:(%d, %d)\n", + nev, expected, type, family, + cmd ? cmd->reuseport_index : -1, + cmd ? cmd->pass_on_failure : -1); check_results(); check_data(type, family, cmd, cli_fd); if (expected < PASS) return; - CHECK(expected != PASS_ERR_SK_SELECT_REUSEPORT && - cmd->reuseport_index != ev.data.u32, - "check cmd->reuseport_index", - "cmd:(%u, %u) ev.data.u32:%u\n", - cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32); + RET_IF(expected != PASS_ERR_SK_SELECT_REUSEPORT && + cmd->reuseport_index != ev.data.u32, + "check cmd->reuseport_index", + "cmd:(%u, %u) ev.data.u32:%u\n", + cmd->pass_on_failure, cmd->reuseport_index, ev.data.u32); srv_fd = sk_fds[ev.data.u32]; if (type == SOCK_STREAM) { int new_fd = accept(srv_fd, NULL, 0); - CHECK(new_fd == -1, "accept(srv_fd)", - "ev.data.u32:%u new_fd:%d errno:%d\n", - ev.data.u32, new_fd, errno); + RET_IF(new_fd == -1, "accept(srv_fd)", + "ev.data.u32:%u new_fd:%d errno:%d\n", + ev.data.u32, new_fd, errno); nread = recv(new_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT); - CHECK(nread != sizeof(rcv_cmd), - "recv(new_fd)", - "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", - ev.data.u32, nread, sizeof(rcv_cmd), errno); + RET_IF(nread != sizeof(rcv_cmd), + "recv(new_fd)", + "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", + ev.data.u32, nread, sizeof(rcv_cmd), errno); close(new_fd); } else { nread = recv(srv_fd, &rcv_cmd, sizeof(rcv_cmd), MSG_DONTWAIT); - CHECK(nread != sizeof(rcv_cmd), - "recv(sk_fds)", - "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", - ev.data.u32, nread, sizeof(rcv_cmd), errno); + RET_IF(nread != sizeof(rcv_cmd), + "recv(sk_fds)", + "ev.data.u32:%u nread:%zd sizeof(rcv_cmd):%zu errno:%d\n", + ev.data.u32, nread, sizeof(rcv_cmd), errno); } close(cli_fd); @@ -428,18 +444,14 @@ static void test_err_inner_map(int type, sa_family_t family) .pass_on_failure = 0, }; - printf("%s: ", __func__); expected_results[DROP_ERR_INNER_MAP]++; do_test(type, family, &cmd, DROP_ERR_INNER_MAP); - printf("OK\n"); } static void test_err_skb_data(int type, sa_family_t family) { - printf("%s: ", __func__); expected_results[DROP_ERR_SKB_DATA]++; do_test(type, family, NULL, DROP_ERR_SKB_DATA); - printf("OK\n"); } static void test_err_sk_select_port(int type, sa_family_t family) @@ -449,10 +461,8 @@ static void test_err_sk_select_port(int type, sa_family_t family) .pass_on_failure = 0, }; - printf("%s: ", __func__); expected_results[DROP_ERR_SK_SELECT_REUSEPORT]++; do_test(type, family, &cmd, DROP_ERR_SK_SELECT_REUSEPORT); - printf("OK\n"); } static void test_pass(int type, sa_family_t family) @@ -460,14 +470,12 @@ static void test_pass(int type, sa_family_t family) struct cmd cmd; int i; - printf("%s: ", __func__); cmd.pass_on_failure = 0; for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) { expected_results[PASS]++; cmd.reuseport_index = i; do_test(type, family, &cmd, PASS); } - printf("OK\n"); } static void test_syncookie(int type, sa_family_t family) @@ -481,7 +489,6 @@ static void test_syncookie(int type, sa_family_t family) if (type != SOCK_STREAM) return; - printf("%s: ", __func__); /* * +1 for TCP-SYN and * +1 for the TCP-ACK (ack the syncookie) @@ -497,17 +504,16 @@ static void test_syncookie(int type, sa_family_t family) */ err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &tmp_index, BPF_ANY); - CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, 1)", + "err:%d errno:%d\n", err, errno); do_test(type, family, &cmd, PASS); err = bpf_map_lookup_elem(tmp_index_ovr_map, &index_zero, &tmp_index); - CHECK(err == -1 || tmp_index != -1, - "lookup_elem(tmp_index_ovr_map)", - "err:%d errno:%d tmp_index:%d\n", - err, errno, tmp_index); + RET_IF(err == -1 || tmp_index != -1, + "lookup_elem(tmp_index_ovr_map)", + "err:%d errno:%d tmp_index:%d\n", + err, errno, tmp_index); disable_syncookie(); - printf("OK\n"); } static void test_pass_on_err(int type, sa_family_t family) @@ -517,10 +523,8 @@ static void test_pass_on_err(int type, sa_family_t family) .pass_on_failure = 1, }; - printf("%s: ", __func__); expected_results[PASS_ERR_SK_SELECT_REUSEPORT] += 1; do_test(type, family, &cmd, PASS_ERR_SK_SELECT_REUSEPORT); - printf("OK\n"); } static void test_detach_bpf(int type, sa_family_t family) @@ -532,46 +536,47 @@ static void test_detach_bpf(int type, sa_family_t family) struct cmd cmd = {}; int optvalue = 0; - printf("%s: ", __func__); err = setsockopt(sk_fds[0], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF, &optvalue, sizeof(optvalue)); - CHECK(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == -1, "setsockopt(SO_DETACH_REUSEPORT_BPF)", + "err:%d errno:%d\n", err, errno); err = setsockopt(sk_fds[1], SOL_SOCKET, SO_DETACH_REUSEPORT_BPF, &optvalue, sizeof(optvalue)); - CHECK(err == 0 || errno != ENOENT, "setsockopt(SO_DETACH_REUSEPORT_BPF)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == 0 || errno != ENOENT, + "setsockopt(SO_DETACH_REUSEPORT_BPF)", + "err:%d errno:%d\n", err, errno); for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &tmp); - CHECK(err == -1, "lookup_elem(result_map)", - "i:%u err:%d errno:%d\n", i, err, errno); + RET_IF(err == -1, "lookup_elem(result_map)", + "i:%u err:%d errno:%d\n", i, err, errno); nr_run_before += tmp; } cli_fd = send_data(type, family, &cmd, sizeof(cmd), PASS); + if (cli_fd < 0) + return; nev = epoll_wait(epfd, &ev, 1, 5); - CHECK(nev <= 0, "nev <= 0", - "nev:%d expected:1 type:%d family:%d data:(0, 0)\n", - nev, type, family); + RET_IF(nev <= 0, "nev <= 0", + "nev:%d expected:1 type:%d family:%d data:(0, 0)\n", + nev, type, family); for (i = 0; i < NR_RESULTS; i++) { err = bpf_map_lookup_elem(result_map, &i, &tmp); - CHECK(err == -1, "lookup_elem(result_map)", - "i:%u err:%d errno:%d\n", i, err, errno); + RET_IF(err == -1, "lookup_elem(result_map)", + "i:%u err:%d errno:%d\n", i, err, errno); nr_run_after += tmp; } - CHECK(nr_run_before != nr_run_after, - "nr_run_before != nr_run_after", - "nr_run_before:%u nr_run_after:%u\n", - nr_run_before, nr_run_after); + RET_IF(nr_run_before != nr_run_after, + "nr_run_before != nr_run_after", + "nr_run_before:%u nr_run_after:%u\n", + nr_run_before, nr_run_after); - printf("OK\n"); close(cli_fd); #else - printf("%s: SKIP\n", __func__); + test__skip(); #endif } @@ -594,73 +599,83 @@ static void prepare_sk_fds(int type, sa_family_t family, bool inany) */ for (i = first; i >= 0; i--) { sk_fds[i] = socket(family, type, 0); - CHECK(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n", - i, sk_fds[i], errno); + RET_IF(sk_fds[i] == -1, "socket()", "sk_fds[%d]:%d errno:%d\n", + i, sk_fds[i], errno); err = setsockopt(sk_fds[i], SOL_SOCKET, SO_REUSEPORT, &optval, sizeof(optval)); - CHECK(err == -1, "setsockopt(SO_REUSEPORT)", - "sk_fds[%d] err:%d errno:%d\n", - i, err, errno); + RET_IF(err == -1, "setsockopt(SO_REUSEPORT)", + "sk_fds[%d] err:%d errno:%d\n", + i, err, errno); if (i == first) { err = setsockopt(sk_fds[i], SOL_SOCKET, SO_ATTACH_REUSEPORT_EBPF, &select_by_skb_data_prog, sizeof(select_by_skb_data_prog)); - CHECK(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == -1, "setsockopt(SO_ATTACH_REUEPORT_EBPF)", + "err:%d errno:%d\n", err, errno); } err = bind(sk_fds[i], (struct sockaddr *)&srv_sa, addrlen); - CHECK(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n", - i, err, errno); + RET_IF(err == -1, "bind()", "sk_fds[%d] err:%d errno:%d\n", + i, err, errno); if (type == SOCK_STREAM) { err = listen(sk_fds[i], 10); - CHECK(err == -1, "listen()", - "sk_fds[%d] err:%d errno:%d\n", - i, err, errno); + RET_IF(err == -1, "listen()", + "sk_fds[%d] err:%d errno:%d\n", + i, err, errno); } err = bpf_map_update_elem(reuseport_array, &i, &sk_fds[i], BPF_NOEXIST); - CHECK(err == -1, "update_elem(reuseport_array)", - "sk_fds[%d] err:%d errno:%d\n", i, err, errno); + RET_IF(err == -1, "update_elem(reuseport_array)", + "sk_fds[%d] err:%d errno:%d\n", i, err, errno); if (i == first) { socklen_t addrlen = sizeof(srv_sa); err = getsockname(sk_fds[i], (struct sockaddr *)&srv_sa, &addrlen); - CHECK(err == -1, "getsockname()", - "sk_fds[%d] err:%d errno:%d\n", i, err, errno); + RET_IF(err == -1, "getsockname()", + "sk_fds[%d] err:%d errno:%d\n", i, err, errno); } } epfd = epoll_create(1); - CHECK(epfd == -1, "epoll_create(1)", - "epfd:%d errno:%d\n", epfd, errno); + RET_IF(epfd == -1, "epoll_create(1)", + "epfd:%d errno:%d\n", epfd, errno); ev.events = EPOLLIN; for (i = 0; i < REUSEPORT_ARRAY_SIZE; i++) { ev.data.u32 = i; err = epoll_ctl(epfd, EPOLL_CTL_ADD, sk_fds[i], &ev); - CHECK(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i); + RET_IF(err, "epoll_ctl(EPOLL_CTL_ADD)", "sk_fds[%d]\n", i); } } -static void setup_per_test(int type, unsigned short family, bool inany) +static void setup_per_test(int type, sa_family_t family, bool inany, + bool no_inner_map) { int ovr = -1, err; prepare_sk_fds(type, family, inany); err = bpf_map_update_elem(tmp_index_ovr_map, &index_zero, &ovr, BPF_ANY); - CHECK(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == -1, "update_elem(tmp_index_ovr_map, 0, -1)", + "err:%d errno:%d\n", err, errno); + + /* Install reuseport_array to outer_map? */ + if (no_inner_map) + return; + + err = bpf_map_update_elem(outer_map, &index_zero, &reuseport_array, + BPF_ANY); + RET_IF(err == -1, "update_elem(outer_map, 0, reuseport_array)", + "err:%d errno:%d\n", err, errno); } -static void cleanup_per_test(void) +static void cleanup_per_test(bool no_inner_map) { int i, err; @@ -668,75 +683,124 @@ static void cleanup_per_test(void) close(sk_fds[i]); close(epfd); + /* Delete reuseport_array from outer_map? */ + if (no_inner_map) + return; + err = bpf_map_delete_elem(outer_map, &index_zero); - CHECK(err == -1, "delete_elem(outer_map)", - "err:%d errno:%d\n", err, errno); + RET_IF(err == -1, "delete_elem(outer_map)", + "err:%d errno:%d\n", err, errno); } static void cleanup(void) { - close(outer_map); - close(reuseport_array); - bpf_object__close(obj); + if (outer_map != -1) + close(outer_map); + if (reuseport_array != -1) + close(reuseport_array); + if (obj) + bpf_object__close(obj); } -static void test_all(void) +static const char *family_str(sa_family_t family) { - /* Extra SOCK_STREAM to test bind_inany==true */ - const int types[] = { SOCK_STREAM, SOCK_DGRAM, SOCK_STREAM }; - const char * const type_strings[] = { "TCP", "UDP", "TCP" }; - const char * const family_strings[] = { "IPv6", "IPv4" }; - const unsigned short families[] = { AF_INET6, AF_INET }; - const bool bind_inany[] = { false, false, true }; - int t, f, err; - - for (f = 0; f < ARRAY_SIZE(families); f++) { - unsigned short family = families[f]; - - for (t = 0; t < ARRAY_SIZE(types); t++) { - bool inany = bind_inany[t]; - int type = types[t]; - - printf("######## %s/%s %s ########\n", - family_strings[f], type_strings[t], - inany ? " INANY " : "LOOPBACK"); - - setup_per_test(type, family, inany); - - test_err_inner_map(type, family); - - /* Install reuseport_array to the outer_map */ - err = bpf_map_update_elem(outer_map, &index_zero, - &reuseport_array, BPF_ANY); - CHECK(err == -1, "update_elem(outer_map)", - "err:%d errno:%d\n", err, errno); - - test_err_skb_data(type, family); - test_err_sk_select_port(type, family); - test_pass(type, family); - test_syncookie(type, family); - test_pass_on_err(type, family); - /* Must be the last test */ - test_detach_bpf(type, family); - - cleanup_per_test(); - printf("\n"); - } + switch (family) { + case AF_INET: + return "IPv4"; + case AF_INET6: + return "IPv6"; + default: + return "unknown"; + } +} + +static const char *sotype_str(int sotype) +{ + switch (sotype) { + case SOCK_STREAM: + return "TCP"; + case SOCK_DGRAM: + return "UDP"; + default: + return "unknown"; + } +} + +#define TEST_INIT(fn, ...) { fn, #fn, __VA_ARGS__ } + +static void test_config(int sotype, sa_family_t family, bool inany) +{ + const struct test { + void (*fn)(int sotype, sa_family_t family); + const char *name; + bool no_inner_map; + } tests[] = { + TEST_INIT(test_err_inner_map, true /* no_inner_map */), + TEST_INIT(test_err_skb_data), + TEST_INIT(test_err_sk_select_port), + TEST_INIT(test_pass), + TEST_INIT(test_syncookie), + TEST_INIT(test_pass_on_err), + TEST_INIT(test_detach_bpf), + }; + char s[MAX_TEST_NAME]; + const struct test *t; + + for (t = tests; t < tests + ARRAY_SIZE(tests); t++) { + snprintf(s, sizeof(s), "%s/%s %s %s", + family_str(family), sotype_str(sotype), + inany ? "INANY" : "LOOPBACK", t->name); + + if (!test__start_subtest(s)) + continue; + + setup_per_test(sotype, family, inany, t->no_inner_map); + t->fn(sotype, family); + cleanup_per_test(t->no_inner_map); } } -int main(int argc, const char **argv) +#define BIND_INANY true + +static void test_all(void) { - create_maps(); - prepare_bpf_obj(); + const struct config { + int sotype; + sa_family_t family; + bool inany; + } configs[] = { + { SOCK_STREAM, AF_INET }, + { SOCK_STREAM, AF_INET, BIND_INANY }, + { SOCK_STREAM, AF_INET6 }, + { SOCK_STREAM, AF_INET6, BIND_INANY }, + { SOCK_DGRAM, AF_INET }, + { SOCK_DGRAM, AF_INET6 }, + }; + const struct config *c; + + for (c = configs; c < configs + ARRAY_SIZE(configs); c++) + test_config(c->sotype, c->family, c->inany); +} + +void test_select_reuseport(void) +{ + if (create_maps()) + goto out; + if (prepare_bpf_obj()) + goto out; + saved_tcp_fo = read_int_sysctl(TCP_FO_SYSCTL); saved_tcp_syncookie = read_int_sysctl(TCP_SYNCOOKIE_SYSCTL); - enable_fastopen(); - disable_syncookie(); - atexit(restore_sysctls); + if (saved_tcp_syncookie < 0 || saved_tcp_syncookie < 0) + goto out; - test_all(); + if (enable_fastopen()) + goto out; + if (disable_syncookie()) + goto out; + test_all(); +out: cleanup(); - return 0; + restore_sysctls(); } diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c index a2eb8db8dafb..c6d6b685a946 100644 --- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c +++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c @@ -11,6 +11,9 @@ void test_skb_ctx(void) .cb[4] = 5, .priority = 6, .tstamp = 7, + .wire_len = 100, + .gso_segs = 8, + .mark = 9, }; struct bpf_prog_test_run_attr tattr = { .data_in = &pkt_v4, @@ -91,4 +94,8 @@ void test_skb_ctx(void) "ctx_out_tstamp", "skb->tstamp == %lld, expected %d\n", skb.tstamp, 8); + CHECK_ATTR(skb.mark != 10, + "ctx_out_mark", + "skb->mark == %u, expected %d\n", + skb.mark, 10); } diff --git a/tools/testing/selftests/bpf/prog_tests/skeleton.c b/tools/testing/selftests/bpf/prog_tests/skeleton.c new file mode 100644 index 000000000000..9264a2736018 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/skeleton.c @@ -0,0 +1,63 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <test_progs.h> + +struct s { + int a; + long long b; +} __attribute__((packed)); + +#include "test_skeleton.skel.h" + +void test_skeleton(void) +{ + int duration = 0, err; + struct test_skeleton* skel; + struct test_skeleton__bss *bss; + struct test_skeleton__kconfig *kcfg; + + skel = test_skeleton__open(); + if (CHECK(!skel, "skel_open", "failed to open skeleton\n")) + return; + + if (CHECK(skel->kconfig, "skel_kconfig", "kconfig is mmaped()!\n")) + goto cleanup; + + err = test_skeleton__load(skel); + if (CHECK(err, "skel_load", "failed to load skeleton: %d\n", err)) + goto cleanup; + + bss = skel->bss; + bss->in1 = 1; + bss->in2 = 2; + bss->in3 = 3; + bss->in4 = 4; + bss->in5.a = 5; + bss->in5.b = 6; + kcfg = skel->kconfig; + + err = test_skeleton__attach(skel); + if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err)) + goto cleanup; + + /* trigger tracepoint */ + usleep(1); + + CHECK(bss->out1 != 1, "res1", "got %d != exp %d\n", bss->out1, 1); + CHECK(bss->out2 != 2, "res2", "got %lld != exp %d\n", bss->out2, 2); + CHECK(bss->out3 != 3, "res3", "got %d != exp %d\n", (int)bss->out3, 3); + CHECK(bss->out4 != 4, "res4", "got %lld != exp %d\n", bss->out4, 4); + CHECK(bss->handler_out5.a != 5, "res5", "got %d != exp %d\n", + bss->handler_out5.a, 5); + CHECK(bss->handler_out5.b != 6, "res6", "got %lld != exp %d\n", + bss->handler_out5.b, 6); + + CHECK(bss->bpf_syscall != kcfg->CONFIG_BPF_SYSCALL, "ext1", + "got %d != exp %d\n", bss->bpf_syscall, kcfg->CONFIG_BPF_SYSCALL); + CHECK(bss->kern_ver != kcfg->LINUX_KERNEL_VERSION, "ext2", + "got %d != exp %d\n", bss->kern_ver, kcfg->LINUX_KERNEL_VERSION); + +cleanup: + test_skeleton__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c index d841dced971f..e8399ae50e77 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id.c @@ -1,16 +1,14 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "test_stacktrace_build_id.skel.h" void test_stacktrace_build_id(void) { + int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; - const char *prog_name = "tracepoint/random/urandom_read"; - const char *file = "./test_stacktrace_build_id.o"; - int err, prog_fd, stack_trace_len; + struct test_stacktrace_build_id *skel; + int err, stack_trace_len; __u32 key, previous_key, val, duration = 0; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_link *link = NULL; char buf[256]; int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; @@ -18,43 +16,24 @@ void test_stacktrace_build_id(void) int retry = 1; retry: - err = bpf_prog_load(file, BPF_PROG_TYPE_TRACEPOINT, &obj, &prog_fd); - if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + skel = test_stacktrace_build_id__open_and_load(); + if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n")) return; - prog = bpf_object__find_program_by_title(obj, prog_name); - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) - goto close_prog; - - link = bpf_program__attach_tracepoint(prog, "random", "urandom_read"); - if (CHECK(IS_ERR(link), "attach_tp", "err %ld\n", PTR_ERR(link))) - goto close_prog; + err = test_stacktrace_build_id__attach(skel); + if (CHECK(err, "attach_tp", "err %d\n", err)) + goto cleanup; /* find map fds */ - control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (CHECK(control_map_fd < 0, "bpf_find_map control_map", - "err %d errno %d\n", err, errno)) - goto disable_pmu; - - stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", - "err %d errno %d\n", err, errno)) - goto disable_pmu; - - stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", - err, errno)) - goto disable_pmu; - - stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); - if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", - "err %d errno %d\n", err, errno)) - goto disable_pmu; + control_map_fd = bpf_map__fd(skel->maps.control_map); + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap); + stackmap_fd = bpf_map__fd(skel->maps.stackmap); + stack_amap_fd = bpf_map__fd(skel->maps.stack_amap); if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null"))) - goto disable_pmu; + goto cleanup; if (CHECK_FAIL(system("./urandom_read"))) - goto disable_pmu; + goto cleanup; /* disable stack trace collection */ key = 0; val = 1; @@ -66,23 +45,23 @@ retry: err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = extract_build_id(buf, 256); if (CHECK(err, "get build_id with readelf", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = bpf_map_get_next_key(stackmap_fd, NULL, &key); if (CHECK(err, "get_next_key from stackmap", "err %d, errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; do { char build_id[64]; @@ -90,7 +69,7 @@ retry: err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); if (CHECK(err, "lookup_elem from stackmap", "err %d, errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i) if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID && id_offs[i].offset != 0) { @@ -108,8 +87,7 @@ retry: * try it one more time. */ if (build_id_matches < 1 && retry--) { - bpf_link__destroy(link); - bpf_object__close(obj); + test_stacktrace_build_id__destroy(skel); printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", __func__); goto retry; @@ -117,17 +95,14 @@ retry: if (CHECK(build_id_matches < 1, "build id match", "Didn't find expected build ID from the map\n")) - goto disable_pmu; + goto cleanup; - stack_trace_len = PERF_MAX_STACK_DEPTH - * sizeof(struct bpf_stack_build_id); + stack_trace_len = PERF_MAX_STACK_DEPTH * + sizeof(struct bpf_stack_build_id); err = compare_stack_ips(stackmap_fd, stack_amap_fd, stack_trace_len); CHECK(err, "compare_stack_ips stackmap vs. stack_amap", "err %d errno %d\n", err, errno); -disable_pmu: - bpf_link__destroy(link); - -close_prog: - bpf_object__close(obj); +cleanup: + test_stacktrace_build_id__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c index f62aa0eb959b..8974450a4bdb 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_build_id_nmi.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <test_progs.h> +#include "test_stacktrace_build_id.skel.h" static __u64 read_perf_max_sample_freq(void) { @@ -16,19 +17,15 @@ static __u64 read_perf_max_sample_freq(void) void test_stacktrace_build_id_nmi(void) { - int control_map_fd, stackid_hmap_fd, stackmap_fd, stack_amap_fd; - const char *prog_name = "tracepoint/random/urandom_read"; - const char *file = "./test_stacktrace_build_id.o"; - int err, pmu_fd, prog_fd; + int control_map_fd, stackid_hmap_fd, stackmap_fd; + struct test_stacktrace_build_id *skel; + int err, pmu_fd; struct perf_event_attr attr = { .freq = 1, .type = PERF_TYPE_HARDWARE, .config = PERF_COUNT_HW_CPU_CYCLES, }; __u32 key, previous_key, val, duration = 0; - struct bpf_program *prog; - struct bpf_object *obj; - struct bpf_link *link; char buf[256]; int i, j; struct bpf_stack_build_id id_offs[PERF_MAX_STACK_DEPTH]; @@ -38,13 +35,16 @@ void test_stacktrace_build_id_nmi(void) attr.sample_freq = read_perf_max_sample_freq(); retry: - err = bpf_prog_load(file, BPF_PROG_TYPE_PERF_EVENT, &obj, &prog_fd); - if (CHECK(err, "prog_load", "err %d errno %d\n", err, errno)) + skel = test_stacktrace_build_id__open(); + if (CHECK(!skel, "skel_open", "skeleton open failed\n")) return; - prog = bpf_object__find_program_by_title(obj, prog_name); - if (CHECK(!prog, "find_prog", "prog '%s' not found\n", prog_name)) - goto close_prog; + /* override program type */ + bpf_program__set_perf_event(skel->progs.oncpu); + + err = test_stacktrace_build_id__load(skel); + if (CHECK(err, "skel_load", "skeleton load failed: %d\n", err)) + goto cleanup; pmu_fd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */, -1 /* group id */, @@ -52,40 +52,25 @@ retry: if (CHECK(pmu_fd < 0, "perf_event_open", "err %d errno %d. Does the test host support PERF_COUNT_HW_CPU_CYCLES?\n", pmu_fd, errno)) - goto close_prog; + goto cleanup; - link = bpf_program__attach_perf_event(prog, pmu_fd); - if (CHECK(IS_ERR(link), "attach_perf_event", - "err %ld\n", PTR_ERR(link))) { + skel->links.oncpu = bpf_program__attach_perf_event(skel->progs.oncpu, + pmu_fd); + if (CHECK(IS_ERR(skel->links.oncpu), "attach_perf_event", + "err %ld\n", PTR_ERR(skel->links.oncpu))) { close(pmu_fd); - goto close_prog; + goto cleanup; } /* find map fds */ - control_map_fd = bpf_find_map(__func__, obj, "control_map"); - if (CHECK(control_map_fd < 0, "bpf_find_map control_map", - "err %d errno %d\n", err, errno)) - goto disable_pmu; - - stackid_hmap_fd = bpf_find_map(__func__, obj, "stackid_hmap"); - if (CHECK(stackid_hmap_fd < 0, "bpf_find_map stackid_hmap", - "err %d errno %d\n", err, errno)) - goto disable_pmu; - - stackmap_fd = bpf_find_map(__func__, obj, "stackmap"); - if (CHECK(stackmap_fd < 0, "bpf_find_map stackmap", "err %d errno %d\n", - err, errno)) - goto disable_pmu; - - stack_amap_fd = bpf_find_map(__func__, obj, "stack_amap"); - if (CHECK(stack_amap_fd < 0, "bpf_find_map stack_amap", - "err %d errno %d\n", err, errno)) - goto disable_pmu; + control_map_fd = bpf_map__fd(skel->maps.control_map); + stackid_hmap_fd = bpf_map__fd(skel->maps.stackid_hmap); + stackmap_fd = bpf_map__fd(skel->maps.stackmap); if (CHECK_FAIL(system("dd if=/dev/urandom of=/dev/zero count=4 2> /dev/null"))) - goto disable_pmu; + goto cleanup; if (CHECK_FAIL(system("taskset 0x1 ./urandom_read 100000"))) - goto disable_pmu; + goto cleanup; /* disable stack trace collection */ key = 0; val = 1; @@ -97,23 +82,23 @@ retry: err = compare_map_keys(stackid_hmap_fd, stackmap_fd); if (CHECK(err, "compare_map_keys stackid_hmap vs. stackmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = compare_map_keys(stackmap_fd, stackid_hmap_fd); if (CHECK(err, "compare_map_keys stackmap vs. stackid_hmap", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = extract_build_id(buf, 256); if (CHECK(err, "get build_id with readelf", "err %d errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; err = bpf_map_get_next_key(stackmap_fd, NULL, &key); if (CHECK(err, "get_next_key from stackmap", "err %d, errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; do { char build_id[64]; @@ -121,7 +106,7 @@ retry: err = bpf_map_lookup_elem(stackmap_fd, &key, id_offs); if (CHECK(err, "lookup_elem from stackmap", "err %d, errno %d\n", err, errno)) - goto disable_pmu; + goto cleanup; for (i = 0; i < PERF_MAX_STACK_DEPTH; ++i) if (id_offs[i].status == BPF_STACK_BUILD_ID_VALID && id_offs[i].offset != 0) { @@ -139,8 +124,7 @@ retry: * try it one more time. */ if (build_id_matches < 1 && retry--) { - bpf_link__destroy(link); - bpf_object__close(obj); + test_stacktrace_build_id__destroy(skel); printf("%s:WARN:Didn't find expected build ID from the map, retrying\n", __func__); goto retry; @@ -148,7 +132,7 @@ retry: if (CHECK(build_id_matches < 1, "build id match", "Didn't find expected build ID from the map\n")) - goto disable_pmu; + goto cleanup; /* * We intentionally skip compare_stack_ips(). This is because we @@ -157,8 +141,6 @@ retry: * BPF_STACK_BUILD_ID_IP; */ -disable_pmu: - bpf_link__destroy(link); -close_prog: - bpf_object__close(obj); +cleanup: + test_stacktrace_build_id__destroy(skel); } diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_perf.c b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c new file mode 100644 index 000000000000..7185bee16fe4 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/xdp_perf.c @@ -0,0 +1,25 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <test_progs.h> + +void test_xdp_perf(void) +{ + const char *file = "./xdp_dummy.o"; + __u32 duration, retval, size; + struct bpf_object *obj; + char in[128], out[128]; + int err, prog_fd; + + err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd); + if (CHECK_FAIL(err)) + return; + + err = bpf_prog_test_run(prog_fd, 1000000, &in[0], 128, + out, &size, &retval, &duration); + + CHECK(err || retval != XDP_PASS || size != 128, + "xdp-perf", + "err %d errno %d retval %d size %d\n", + err, errno, retval, size); + + bpf_object__close(obj); +} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c new file mode 100644 index 000000000000..65eac371b061 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___equiv_zero_sz_arr.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___equiv_zero_sz_arr x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c new file mode 100644 index 000000000000..ecda2b545ac2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___err_bad_zero_sz_arr.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___err_bad_zero_sz_arr x) {} diff --git a/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c new file mode 100644 index 000000000000..fe1d01232c22 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/btf__core_reloc_arrays___fixed_arr.c @@ -0,0 +1,3 @@ +#include "core_reloc_types.h" + +void f(struct core_reloc_arrays___fixed_arr x) {} diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h index 9311489e14b2..6d598cfbdb3e 100644 --- a/tools/testing/selftests/bpf/progs/core_reloc_types.h +++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h @@ -327,6 +327,7 @@ struct core_reloc_arrays_output { char b123; int c1c; int d00d; + int f10c; }; struct core_reloc_arrays_substruct { @@ -339,6 +340,7 @@ struct core_reloc_arrays { char b[2][3][4]; struct core_reloc_arrays_substruct c[3]; struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; }; /* bigger array dimensions */ @@ -347,6 +349,7 @@ struct core_reloc_arrays___diff_arr_dim { char b[3][4][5]; struct core_reloc_arrays_substruct c[4]; struct core_reloc_arrays_substruct d[2][3]; + struct core_reloc_arrays_substruct f[1][3]; }; /* different size of array's value (struct) */ @@ -363,6 +366,29 @@ struct core_reloc_arrays___diff_arr_val_sz { int d; int __padding2; } d[1][2]; + struct { + int __padding1; + int c; + int __padding2; + } f[][2]; +}; + +struct core_reloc_arrays___equiv_zero_sz_arr { + int a[5]; + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; + /* equivalent to flexible array */ + struct core_reloc_arrays_substruct f[0][2]; +}; + +struct core_reloc_arrays___fixed_arr { + int a[5]; + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; + /* not a flexible array anymore, but within access bounds */ + struct core_reloc_arrays_substruct f[1][2]; }; struct core_reloc_arrays___err_too_small { @@ -370,6 +396,7 @@ struct core_reloc_arrays___err_too_small { char b[2][3][4]; struct core_reloc_arrays_substruct c[3]; struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; }; struct core_reloc_arrays___err_too_shallow { @@ -377,6 +404,7 @@ struct core_reloc_arrays___err_too_shallow { char b[2][3]; /* this one lacks one dimension */ struct core_reloc_arrays_substruct c[3]; struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; }; struct core_reloc_arrays___err_non_array { @@ -384,6 +412,7 @@ struct core_reloc_arrays___err_non_array { char b[2][3][4]; struct core_reloc_arrays_substruct c[3]; struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; }; struct core_reloc_arrays___err_wrong_val_type { @@ -391,6 +420,16 @@ struct core_reloc_arrays___err_wrong_val_type { char b[2][3][4]; int c[3]; /* value is not a struct */ struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; +}; + +struct core_reloc_arrays___err_bad_zero_sz_arr { + /* zero-sized array, but not at the end */ + struct core_reloc_arrays_substruct f[0][2]; + int a[5]; + char b[2][3][4]; + struct core_reloc_arrays_substruct c[3]; + struct core_reloc_arrays_substruct d[1][2]; }; /* diff --git a/tools/testing/selftests/bpf/progs/test_attach_probe.c b/tools/testing/selftests/bpf/progs/test_attach_probe.c index 534621e38906..221b69700625 100644 --- a/tools/testing/selftests/bpf/progs/test_attach_probe.c +++ b/tools/testing/selftests/bpf/progs/test_attach_probe.c @@ -5,46 +5,36 @@ #include <linux/bpf.h> #include "bpf_helpers.h" -struct { - __uint(type, BPF_MAP_TYPE_ARRAY); - __uint(max_entries, 4); - __type(key, int); - __type(value, int); -} results_map SEC(".maps"); +int kprobe_res = 0; +int kretprobe_res = 0; +int uprobe_res = 0; +int uretprobe_res = 0; SEC("kprobe/sys_nanosleep") -int handle_sys_nanosleep_entry(struct pt_regs *ctx) +int handle_kprobe(struct pt_regs *ctx) { - const int key = 0, value = 1; - - bpf_map_update_elem(&results_map, &key, &value, 0); + kprobe_res = 1; return 0; } SEC("kretprobe/sys_nanosleep") -int handle_sys_getpid_return(struct pt_regs *ctx) +int handle_kretprobe(struct pt_regs *ctx) { - const int key = 1, value = 2; - - bpf_map_update_elem(&results_map, &key, &value, 0); + kretprobe_res = 2; return 0; } SEC("uprobe/trigger_func") -int handle_uprobe_entry(struct pt_regs *ctx) +int handle_uprobe(struct pt_regs *ctx) { - const int key = 2, value = 3; - - bpf_map_update_elem(&results_map, &key, &value, 0); + uprobe_res = 3; return 0; } SEC("uretprobe/trigger_func") -int handle_uprobe_return(struct pt_regs *ctx) +int handle_uretprobe(struct pt_regs *ctx) { - const int key = 3, value = 4; - - bpf_map_update_elem(&results_map, &key, &value, 0); + uretprobe_res = 4; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_core_extern.c b/tools/testing/selftests/bpf/progs/test_core_extern.c new file mode 100644 index 000000000000..9bfc91d9d004 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_core_extern.c @@ -0,0 +1,62 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <stdint.h> +#include <stdbool.h> +#include <linux/ptrace.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" + +/* non-existing BPF helper, to test dead code elimination */ +static int (*bpf_missing_helper)(const void *arg1, int arg2) = (void *) 999; + +extern int LINUX_KERNEL_VERSION __kconfig; +extern bool CONFIG_BPF_SYSCALL __kconfig; /* strong */ +extern enum libbpf_tristate CONFIG_TRISTATE __kconfig __weak; +extern bool CONFIG_BOOL __kconfig __weak; +extern char CONFIG_CHAR __kconfig __weak; +extern uint16_t CONFIG_USHORT __kconfig __weak; +extern int CONFIG_INT __kconfig __weak; +extern uint64_t CONFIG_ULONG __kconfig __weak; +extern const char CONFIG_STR[8] __kconfig __weak; +extern uint64_t CONFIG_MISSING __kconfig __weak; + +uint64_t kern_ver = -1; +uint64_t bpf_syscall = -1; +uint64_t tristate_val = -1; +uint64_t bool_val = -1; +uint64_t char_val = -1; +uint64_t ushort_val = -1; +uint64_t int_val = -1; +uint64_t ulong_val = -1; +char str_val[8] = {-1, -1, -1, -1, -1, -1, -1, -1}; +uint64_t missing_val = -1; + +SEC("raw_tp/sys_enter") +int handle_sys_enter(struct pt_regs *ctx) +{ + int i; + + kern_ver = LINUX_KERNEL_VERSION; + bpf_syscall = CONFIG_BPF_SYSCALL; + tristate_val = CONFIG_TRISTATE; + bool_val = CONFIG_BOOL; + char_val = CONFIG_CHAR; + ushort_val = CONFIG_USHORT; + int_val = CONFIG_INT; + ulong_val = CONFIG_ULONG; + + for (i = 0; i < sizeof(CONFIG_STR); i++) { + str_val[i] = CONFIG_STR[i]; + } + + if (CONFIG_MISSING) + /* invalid, but dead code - never executed */ + missing_val = bpf_missing_helper(ctx, 123); + else + missing_val = 0xDEADC0DE; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c index 89951b684282..053b86f6b53f 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_arrays.c @@ -18,6 +18,7 @@ struct core_reloc_arrays_output { char b123; int c1c; int d00d; + int f01c; }; struct core_reloc_arrays_substruct { @@ -30,6 +31,7 @@ struct core_reloc_arrays { char b[2][3][4]; struct core_reloc_arrays_substruct c[3]; struct core_reloc_arrays_substruct d[1][2]; + struct core_reloc_arrays_substruct f[][2]; }; #define CORE_READ(dst, src) bpf_core_read(dst, sizeof(*(dst)), src) @@ -40,18 +42,16 @@ int test_core_arrays(void *ctx) struct core_reloc_arrays *in = (void *)&data.in; struct core_reloc_arrays_output *out = (void *)&data.out; - /* in->a[2] */ if (CORE_READ(&out->a2, &in->a[2])) return 1; - /* in->b[1][2][3] */ if (CORE_READ(&out->b123, &in->b[1][2][3])) return 1; - /* in->c[1].c */ if (CORE_READ(&out->c1c, &in->c[1].c)) return 1; - /* in->d[0][0].d */ if (CORE_READ(&out->d00d, &in->d[0][0].d)) return 1; + if (CORE_READ(&out->f01c, &in->f[0][1].c)) + return 1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c index ea7d84f01235..b1f09f5bb1cf 100644 --- a/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c +++ b/tools/testing/selftests/bpf/progs/test_select_reuseport_kern.c @@ -62,7 +62,7 @@ struct { goto done; \ }) -SEC("select_by_skb_data") +SEC("sk_reuseport") int _select_by_skb_data(struct sk_reuseport_md *reuse_md) { __u32 linum, index = 0, flags = 0, index_zero = 0; diff --git a/tools/testing/selftests/bpf/progs/test_skb_ctx.c b/tools/testing/selftests/bpf/progs/test_skb_ctx.c index 2a9f4c736ebc..e18da87fe84f 100644 --- a/tools/testing/selftests/bpf/progs/test_skb_ctx.c +++ b/tools/testing/selftests/bpf/progs/test_skb_ctx.c @@ -17,6 +17,12 @@ int process(struct __sk_buff *skb) } skb->priority++; skb->tstamp++; + skb->mark++; + + if (skb->wire_len != 100) + return 1; + if (skb->gso_segs != 8) + return 1; return 0; } diff --git a/tools/testing/selftests/bpf/progs/test_skeleton.c b/tools/testing/selftests/bpf/progs/test_skeleton.c new file mode 100644 index 000000000000..4f69aac5635f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_skeleton.c @@ -0,0 +1,46 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2019 Facebook */ + +#include <stdbool.h> +#include <linux/bpf.h> +#include "bpf_helpers.h" + +struct s { + int a; + long long b; +} __attribute__((packed)); + +int in1 = 0; +long long in2 = 0; +char in3 = '\0'; +long long in4 __attribute__((aligned(64))) = 0; +struct s in5 = {}; + +long long out2 = 0; +char out3 = 0; +long long out4 = 0; +int out1 = 0; + +extern bool CONFIG_BPF_SYSCALL __kconfig; +extern int LINUX_KERNEL_VERSION __kconfig; +bool bpf_syscall = 0; +int kern_ver = 0; + +SEC("raw_tp/sys_enter") +int handler(const void *ctx) +{ + static volatile struct s out5; + + out1 = in1; + out2 = in2; + out3 = in3; + out4 = in4; + out5 = in5; + + bpf_syscall = CONFIG_BPF_SYSCALL; + kern_ver = LINUX_KERNEL_VERSION; + + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_cgroup_attach.c b/tools/testing/selftests/bpf/test_cgroup_attach.c deleted file mode 100644 index 7671909ee1cb..000000000000 --- a/tools/testing/selftests/bpf/test_cgroup_attach.c +++ /dev/null @@ -1,571 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -/* eBPF example program: - * - * - Creates arraymap in kernel with 4 bytes keys and 8 byte values - * - * - Loads eBPF program - * - * The eBPF program accesses the map passed in to store two pieces of - * information. The number of invocations of the program, which maps - * to the number of packets received, is stored to key 0. Key 1 is - * incremented on each iteration by the number of bytes stored in - * the skb. The program also stores the number of received bytes - * in the cgroup storage. - * - * - Attaches the new program to a cgroup using BPF_PROG_ATTACH - * - * - Every second, reads map[0] and map[1] to see how many bytes and - * packets were seen on any socket of tasks in the given cgroup. - */ - -#define _GNU_SOURCE - -#include <stdio.h> -#include <stdlib.h> -#include <assert.h> -#include <sys/resource.h> -#include <sys/time.h> -#include <unistd.h> -#include <linux/filter.h> - -#include <linux/bpf.h> -#include <bpf/bpf.h> - -#include "bpf_util.h" -#include "bpf_rlimit.h" -#include "cgroup_helpers.h" - -#define FOO "/foo" -#define BAR "/foo/bar/" -#define PING_CMD "ping -q -c1 -w1 127.0.0.1 > /dev/null" - -char bpf_log_buf[BPF_LOG_BUF_SIZE]; - -#ifdef DEBUG -#define debug(args...) printf(args) -#else -#define debug(args...) -#endif - -static int prog_load(int verdict) -{ - int ret; - struct bpf_insn prog[] = { - BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ - BPF_EXIT_INSN(), - }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - - ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, - prog, insns_cnt, "GPL", 0, - bpf_log_buf, BPF_LOG_BUF_SIZE); - - if (ret < 0) { - log_err("Loading program"); - printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); - return 0; - } - return ret; -} - -static int test_foo_bar(void) -{ - int drop_prog, allow_prog, foo = 0, bar = 0, rc = 0; - - allow_prog = prog_load(1); - if (!allow_prog) - goto err; - - drop_prog = prog_load(0); - if (!drop_prog) - goto err; - - if (setup_cgroup_environment()) - goto err; - - /* Create cgroup /foo, get fd, and join it */ - foo = create_and_get_cgroup(FOO); - if (foo < 0) - goto err; - - if (join_cgroup(FOO)) - goto err; - - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to /foo"); - goto err; - } - - debug("Attached DROP prog. This ping in cgroup /foo should fail...\n"); - assert(system(PING_CMD) != 0); - - /* Create cgroup /foo/bar, get fd, and join it */ - bar = create_and_get_cgroup(BAR); - if (bar < 0) - goto err; - - if (join_cgroup(BAR)) - goto err; - - debug("Attached DROP prog. This ping in cgroup /foo/bar should fail...\n"); - assert(system(PING_CMD) != 0); - - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to /foo/bar"); - goto err; - } - - debug("Attached PASS prog. This ping in cgroup /foo/bar should pass...\n"); - assert(system(PING_CMD) == 0); - - if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching program from /foo/bar"); - goto err; - } - - debug("Detached PASS from /foo/bar while DROP is attached to /foo.\n" - "This ping in cgroup /foo/bar should fail...\n"); - assert(system(PING_CMD) != 0); - - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to /foo/bar"); - goto err; - } - - if (bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching program from /foo"); - goto err; - } - - debug("Attached PASS from /foo/bar and detached DROP from /foo.\n" - "This ping in cgroup /foo/bar should pass...\n"); - assert(system(PING_CMD) == 0); - - if (bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to /foo/bar"); - goto err; - } - - if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { - errno = 0; - log_err("Unexpected success attaching prog to /foo/bar"); - goto err; - } - - if (bpf_prog_detach(bar, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching program from /foo/bar"); - goto err; - } - - if (!bpf_prog_detach(foo, BPF_CGROUP_INET_EGRESS)) { - errno = 0; - log_err("Unexpected success in double detach from /foo"); - goto err; - } - - if (bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { - log_err("Attaching non-overridable prog to /foo"); - goto err; - } - - if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, 0)) { - errno = 0; - log_err("Unexpected success attaching non-overridable prog to /foo/bar"); - goto err; - } - - if (!bpf_prog_attach(allow_prog, bar, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - errno = 0; - log_err("Unexpected success attaching overridable prog to /foo/bar"); - goto err; - } - - if (!bpf_prog_attach(allow_prog, foo, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - errno = 0; - log_err("Unexpected success attaching overridable prog to /foo"); - goto err; - } - - if (bpf_prog_attach(drop_prog, foo, BPF_CGROUP_INET_EGRESS, 0)) { - log_err("Attaching different non-overridable prog to /foo"); - goto err; - } - - goto out; - -err: - rc = 1; - -out: - close(foo); - close(bar); - cleanup_cgroup_environment(); - if (!rc) - printf("#override:PASS\n"); - else - printf("#override:FAIL\n"); - return rc; -} - -static int map_fd = -1; - -static int prog_load_cnt(int verdict, int val) -{ - int cgroup_storage_fd, percpu_cgroup_storage_fd; - - if (map_fd < 0) - map_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0); - if (map_fd < 0) { - printf("failed to create map '%s'\n", strerror(errno)); - return -1; - } - - cgroup_storage_fd = bpf_create_map(BPF_MAP_TYPE_CGROUP_STORAGE, - sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); - if (cgroup_storage_fd < 0) { - printf("failed to create map '%s'\n", strerror(errno)); - return -1; - } - - percpu_cgroup_storage_fd = bpf_create_map( - BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE, - sizeof(struct bpf_cgroup_storage_key), 8, 0, 0); - if (percpu_cgroup_storage_fd < 0) { - printf("failed to create map '%s'\n", strerror(errno)); - return -1; - } - - struct bpf_insn prog[] = { - BPF_MOV32_IMM(BPF_REG_0, 0), - BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_0, -4), /* *(u32 *)(fp - 4) = r0 */ - BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), /* r2 = fp - 4 */ - BPF_LD_MAP_FD(BPF_REG_1, map_fd), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem), - BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2), - BPF_MOV64_IMM(BPF_REG_1, val), /* r1 = 1 */ - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_DW, BPF_REG_0, BPF_REG_1, 0, 0), /* xadd r0 += r1 */ - - BPF_LD_MAP_FD(BPF_REG_1, cgroup_storage_fd), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_MOV64_IMM(BPF_REG_1, val), - BPF_RAW_INSN(BPF_STX | BPF_XADD | BPF_W, BPF_REG_0, BPF_REG_1, 0, 0), - - BPF_LD_MAP_FD(BPF_REG_1, percpu_cgroup_storage_fd), - BPF_MOV64_IMM(BPF_REG_2, 0), - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_local_storage), - BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_0, 0), - BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 0x1), - BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_3, 0), - - BPF_MOV64_IMM(BPF_REG_0, verdict), /* r0 = verdict */ - BPF_EXIT_INSN(), - }; - size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); - int ret; - - ret = bpf_load_program(BPF_PROG_TYPE_CGROUP_SKB, - prog, insns_cnt, "GPL", 0, - bpf_log_buf, BPF_LOG_BUF_SIZE); - - if (ret < 0) { - log_err("Loading program"); - printf("Output from verifier:\n%s\n-------\n", bpf_log_buf); - return 0; - } - close(cgroup_storage_fd); - return ret; -} - - -static int test_multiprog(void) -{ - __u32 prog_ids[4], prog_cnt = 0, attach_flags, saved_prog_id; - int cg1 = 0, cg2 = 0, cg3 = 0, cg4 = 0, cg5 = 0, key = 0; - int drop_prog, allow_prog[6] = {}, rc = 0; - unsigned long long value; - int i = 0; - - for (i = 0; i < 6; i++) { - allow_prog[i] = prog_load_cnt(1, 1 << i); - if (!allow_prog[i]) - goto err; - } - drop_prog = prog_load_cnt(0, 1); - if (!drop_prog) - goto err; - - if (setup_cgroup_environment()) - goto err; - - cg1 = create_and_get_cgroup("/cg1"); - if (cg1 < 0) - goto err; - cg2 = create_and_get_cgroup("/cg1/cg2"); - if (cg2 < 0) - goto err; - cg3 = create_and_get_cgroup("/cg1/cg2/cg3"); - if (cg3 < 0) - goto err; - cg4 = create_and_get_cgroup("/cg1/cg2/cg3/cg4"); - if (cg4 < 0) - goto err; - cg5 = create_and_get_cgroup("/cg1/cg2/cg3/cg4/cg5"); - if (cg5 < 0) - goto err; - - if (join_cgroup("/cg1/cg2/cg3/cg4/cg5")) - goto err; - - if (bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_MULTI)) { - log_err("Attaching prog to cg1"); - goto err; - } - if (!bpf_prog_attach(allow_prog[0], cg1, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_MULTI)) { - log_err("Unexpected success attaching the same prog to cg1"); - goto err; - } - if (bpf_prog_attach(allow_prog[1], cg1, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_MULTI)) { - log_err("Attaching prog2 to cg1"); - goto err; - } - if (bpf_prog_attach(allow_prog[2], cg2, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to cg2"); - goto err; - } - if (bpf_prog_attach(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_MULTI)) { - log_err("Attaching prog to cg3"); - goto err; - } - if (bpf_prog_attach(allow_prog[4], cg4, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_OVERRIDE)) { - log_err("Attaching prog to cg4"); - goto err; - } - if (bpf_prog_attach(allow_prog[5], cg5, BPF_CGROUP_INET_EGRESS, 0)) { - log_err("Attaching prog to cg5"); - goto err; - } - assert(system(PING_CMD) == 0); - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); - assert(value == 1 + 2 + 8 + 32); - - /* query the number of effective progs in cg5 */ - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, - NULL, NULL, &prog_cnt) == 0); - assert(prog_cnt == 4); - /* retrieve prog_ids of effective progs in cg5 */ - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, - &attach_flags, prog_ids, &prog_cnt) == 0); - assert(prog_cnt == 4); - assert(attach_flags == 0); - saved_prog_id = prog_ids[0]; - /* check enospc handling */ - prog_ids[0] = 0; - prog_cnt = 2; - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, - &attach_flags, prog_ids, &prog_cnt) == -1 && - errno == ENOSPC); - assert(prog_cnt == 4); - /* check that prog_ids are returned even when buffer is too small */ - assert(prog_ids[0] == saved_prog_id); - /* retrieve prog_id of single attached prog in cg5 */ - prog_ids[0] = 0; - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, - NULL, prog_ids, &prog_cnt) == 0); - assert(prog_cnt == 1); - assert(prog_ids[0] == saved_prog_id); - - /* detach bottom program and ping again */ - if (bpf_prog_detach2(-1, cg5, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching prog from cg5"); - goto err; - } - value = 0; - assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); - assert(system(PING_CMD) == 0); - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); - assert(value == 1 + 2 + 8 + 16); - - /* detach 3rd from bottom program and ping again */ - errno = 0; - if (!bpf_prog_detach2(0, cg3, BPF_CGROUP_INET_EGRESS)) { - log_err("Unexpected success on detach from cg3"); - goto err; - } - if (bpf_prog_detach2(allow_prog[3], cg3, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching from cg3"); - goto err; - } - value = 0; - assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); - assert(system(PING_CMD) == 0); - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); - assert(value == 1 + 2 + 16); - - /* detach 2nd from bottom program and ping again */ - if (bpf_prog_detach2(-1, cg4, BPF_CGROUP_INET_EGRESS)) { - log_err("Detaching prog from cg4"); - goto err; - } - value = 0; - assert(bpf_map_update_elem(map_fd, &key, &value, 0) == 0); - assert(system(PING_CMD) == 0); - assert(bpf_map_lookup_elem(map_fd, &key, &value) == 0); - assert(value == 1 + 2 + 4); - - prog_cnt = 4; - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, BPF_F_QUERY_EFFECTIVE, - &attach_flags, prog_ids, &prog_cnt) == 0); - assert(prog_cnt == 3); - assert(attach_flags == 0); - assert(bpf_prog_query(cg5, BPF_CGROUP_INET_EGRESS, 0, - NULL, prog_ids, &prog_cnt) == 0); - assert(prog_cnt == 0); - goto out; -err: - rc = 1; - -out: - for (i = 0; i < 6; i++) - if (allow_prog[i] > 0) - close(allow_prog[i]); - close(cg1); - close(cg2); - close(cg3); - close(cg4); - close(cg5); - cleanup_cgroup_environment(); - if (!rc) - printf("#multi:PASS\n"); - else - printf("#multi:FAIL\n"); - return rc; -} - -static int test_autodetach(void) -{ - __u32 prog_cnt = 4, attach_flags; - int allow_prog[2] = {0}; - __u32 prog_ids[2] = {0}; - int cg = 0, i, rc = -1; - void *ptr = NULL; - int attempts; - - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { - allow_prog[i] = prog_load_cnt(1, 1 << i); - if (!allow_prog[i]) - goto err; - } - - if (setup_cgroup_environment()) - goto err; - - /* create a cgroup, attach two programs and remember their ids */ - cg = create_and_get_cgroup("/cg_autodetach"); - if (cg < 0) - goto err; - - if (join_cgroup("/cg_autodetach")) - goto err; - - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { - if (bpf_prog_attach(allow_prog[i], cg, BPF_CGROUP_INET_EGRESS, - BPF_F_ALLOW_MULTI)) { - log_err("Attaching prog[%d] to cg:egress", i); - goto err; - } - } - - /* make sure that programs are attached and run some traffic */ - assert(bpf_prog_query(cg, BPF_CGROUP_INET_EGRESS, 0, &attach_flags, - prog_ids, &prog_cnt) == 0); - assert(system(PING_CMD) == 0); - - /* allocate some memory (4Mb) to pin the original cgroup */ - ptr = malloc(4 * (1 << 20)); - if (!ptr) - goto err; - - /* close programs and cgroup fd */ - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) { - close(allow_prog[i]); - allow_prog[i] = 0; - } - - close(cg); - cg = 0; - - /* leave the cgroup and remove it. don't detach programs */ - cleanup_cgroup_environment(); - - /* wait for the asynchronous auto-detachment. - * wait for no more than 5 sec and give up. - */ - for (i = 0; i < ARRAY_SIZE(prog_ids); i++) { - for (attempts = 5; attempts >= 0; attempts--) { - int fd = bpf_prog_get_fd_by_id(prog_ids[i]); - - if (fd < 0) - break; - - /* don't leave the fd open */ - close(fd); - - if (!attempts) - goto err; - - sleep(1); - } - } - - rc = 0; -err: - for (i = 0; i < ARRAY_SIZE(allow_prog); i++) - if (allow_prog[i] > 0) - close(allow_prog[i]); - if (cg) - close(cg); - free(ptr); - cleanup_cgroup_environment(); - if (!rc) - printf("#autodetach:PASS\n"); - else - printf("#autodetach:FAIL\n"); - return rc; -} - -int main(void) -{ - int (*tests[])(void) = { - test_foo_bar, - test_multiprog, - test_autodetach, - }; - int errors = 0; - int i; - - for (i = 0; i < ARRAY_SIZE(tests); i++) - if (tests[i]()) - errors++; - - if (errors) - printf("test_cgroup_attach:FAIL\n"); - else - printf("test_cgroup_attach:PASS\n"); - - return errors ? EXIT_FAILURE : EXIT_SUCCESS; -} diff --git a/tools/testing/selftests/bpf/test_cpp.cpp b/tools/testing/selftests/bpf/test_cpp.cpp index f0eb2727b766..6fe23a10d48a 100644 --- a/tools/testing/selftests/bpf/test_cpp.cpp +++ b/tools/testing/selftests/bpf/test_cpp.cpp @@ -1,12 +1,16 @@ /* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#include <iostream> #include "libbpf.h" #include "bpf.h" #include "btf.h" +#include "test_core_extern.skel.h" /* do nothing, just make sure we can link successfully */ int main(int argc, char *argv[]) { + struct test_core_extern *skel; + /* libbpf.h */ libbpf_set_print(NULL); @@ -16,5 +20,11 @@ int main(int argc, char *argv[]) /* btf.h */ btf__new(NULL, 0); + /* BPF skeleton */ + skel = test_core_extern__open_and_load(); + test_core_extern__destroy(skel); + + std::cout << "DONE!" << std::endl; + return 0; } diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h index 8477df835979..de1fdaa4e7b4 100644 --- a/tools/testing/selftests/bpf/test_progs.h +++ b/tools/testing/selftests/bpf/test_progs.h @@ -100,6 +100,7 @@ extern struct ipv6_packet pkt_v6; #define _CHECK(condition, tag, duration, format...) ({ \ int __ret = !!(condition); \ + int __save_errno = errno; \ if (__ret) { \ test__fail(); \ printf("%s:FAIL:%s ", __func__, tag); \ @@ -108,15 +109,18 @@ extern struct ipv6_packet pkt_v6; printf("%s:PASS:%s %d nsec\n", \ __func__, tag, duration); \ } \ + errno = __save_errno; \ __ret; \ }) #define CHECK_FAIL(condition) ({ \ int __ret = !!(condition); \ + int __save_errno = errno; \ if (__ret) { \ test__fail(); \ printf("%s:FAIL:%d\n", __func__, __LINE__); \ } \ + errno = __save_errno; \ __ret; \ }) |