diff options
Diffstat (limited to 'drivers/net/ethernet/netronome/nfp/bpf/jit.c')
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/jit.c | 462 |
1 files changed, 426 insertions, 36 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 56451edf01c2..4b631e26f199 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -103,23 +103,18 @@ nfp_prog_confirm_current_offset(struct nfp_prog *nfp_prog, unsigned int off) /* --- Emitters --- */ static void __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, - u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, bool sync, bool indir) + u8 mode, u8 xfer, u8 areg, u8 breg, u8 size, enum cmd_ctx_swap ctx, + bool indir) { - enum cmd_ctx_swap ctx; u64 insn; - if (sync) - ctx = CMD_CTX_SWAP; - else - ctx = CMD_CTX_NO_SWAP; - insn = FIELD_PREP(OP_CMD_A_SRC, areg) | FIELD_PREP(OP_CMD_CTX, ctx) | FIELD_PREP(OP_CMD_B_SRC, breg) | FIELD_PREP(OP_CMD_TOKEN, cmd_tgt_act[op].token) | FIELD_PREP(OP_CMD_XFER, xfer) | FIELD_PREP(OP_CMD_CNT, size) | - FIELD_PREP(OP_CMD_SIG, sync) | + FIELD_PREP(OP_CMD_SIG, ctx != CMD_CTX_NO_SWAP) | FIELD_PREP(OP_CMD_TGT_CMD, cmd_tgt_act[op].tgt_cmd) | FIELD_PREP(OP_CMD_INDIR, indir) | FIELD_PREP(OP_CMD_MODE, mode); @@ -129,7 +124,7 @@ __emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, static void emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, - swreg lreg, swreg rreg, u8 size, bool sync, bool indir) + swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx, bool indir) { struct nfp_insn_re_regs reg; int err; @@ -150,22 +145,22 @@ emit_cmd_any(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, return; } - __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, sync, + __emit_cmd(nfp_prog, op, mode, xfer, reg.areg, reg.breg, size, ctx, indir); } static void emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, - swreg lreg, swreg rreg, u8 size, bool sync) + swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) { - emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, false); + emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, false); } static void emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer, - swreg lreg, swreg rreg, u8 size, bool sync) + swreg lreg, swreg rreg, u8 size, enum cmd_ctx_swap ctx) { - emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true); + emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, ctx, true); } static void @@ -410,7 +405,7 @@ __emit_lcsr(struct nfp_prog *nfp_prog, u16 areg, u16 breg, bool wr, u16 addr, FIELD_PREP(OP_LCSR_A_SRC, areg) | FIELD_PREP(OP_LCSR_B_SRC, breg) | FIELD_PREP(OP_LCSR_WRITE, wr) | - FIELD_PREP(OP_LCSR_ADDR, addr) | + FIELD_PREP(OP_LCSR_ADDR, addr / 4) | FIELD_PREP(OP_LCSR_SRC_LMEXTN, src_lmextn) | FIELD_PREP(OP_LCSR_DST_LMEXTN, dst_lmextn); @@ -438,10 +433,16 @@ static void emit_csr_wr(struct nfp_prog *nfp_prog, swreg src, u16 addr) return; } - __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr / 4, + __emit_lcsr(nfp_prog, reg.areg, reg.breg, true, addr, false, reg.src_lmextn); } +/* CSR value is read in following immed[gpr, 0] */ +static void __emit_csr_rd(struct nfp_prog *nfp_prog, u16 addr) +{ + __emit_lcsr(nfp_prog, 0, 0, false, addr, false, false); +} + static void emit_nop(struct nfp_prog *nfp_prog) { __emit_immed(nfp_prog, UR_REG_IMM, UR_REG_IMM, 0, 0, 0, 0, 0, 0, 0); @@ -553,6 +554,19 @@ wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len, emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true); } +/* wrp_reg_or_subpart() - load @field_len bytes from low end of @src, or the + * result to @dst from offset, there is no change on the other bits of @dst. + */ +static void +wrp_reg_or_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, + u8 field_len, u8 offset) +{ + enum shf_sc sc = offset ? SHF_SC_L_SHF : SHF_SC_NONE; + u8 mask = ((1 << field_len) - 1) << offset; + + emit_ld_field(nfp_prog, dst, mask, src, sc, 32 - offset * 8); +} + static void addr40_offset(struct nfp_prog *nfp_prog, u8 src_gpr, swreg offset, swreg *rega, swreg *regb) @@ -597,7 +611,7 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) /* Memory read from source addr into transfer-in registers. */ emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, src_40bit_addr ? CMD_MODE_40b_BA : CMD_MODE_32b, 0, - src_base, off, xfer_num - 1, true, len > 32); + src_base, off, xfer_num - 1, CMD_CTX_SWAP, len > 32); /* Move from transfer-in to transfer-out. */ for (i = 0; i < xfer_num; i++) @@ -609,39 +623,39 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) /* Use single direct_ref write8. */ emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, len - 1, - true); + CMD_CTX_SWAP); } else if (len <= 32 && IS_ALIGNED(len, 4)) { /* Use single direct_ref write32. */ emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, - true); + CMD_CTX_SWAP); } else if (len <= 32) { /* Use single indirect_ref write8. */ wrp_immed(nfp_prog, reg_none(), CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, - len - 1, true); + len - 1, CMD_CTX_SWAP); } else if (IS_ALIGNED(len, 4)) { /* Use single indirect_ref write32. */ wrp_immed(nfp_prog, reg_none(), CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, - xfer_num - 1, true); + xfer_num - 1, CMD_CTX_SWAP); } else if (len <= 40) { /* Use one direct_ref write32 to write the first 32-bytes, then * another direct_ref write8 to write the remaining bytes. */ emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, 7, - true); + CMD_CTX_SWAP); off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, imm_b(nfp_prog)); emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, reg_a(meta->paired_st->dst_reg * 2), off, len - 33, - true); + CMD_CTX_SWAP); } else { /* Use one indirect_ref write32 to write 4-bytes aligned length, * then another direct_ref write8 to write the remaining bytes. @@ -652,12 +666,12 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, - xfer_num - 2, true); + xfer_num - 2, CMD_CTX_SWAP); new_off = meta->paired_st->off + (xfer_num - 1) * 4; off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, - (len & 0x3) - 1, true); + (len & 0x3) - 1, CMD_CTX_SWAP); } /* TODO: The following extra load is to make sure data flow be identical @@ -718,7 +732,7 @@ data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size) shift = size < 4 ? 4 - size : 0; emit_cmd(nfp_prog, CMD_TGT_READ8, CMD_MODE_32b, 0, - pptr_reg(nfp_prog), offset, sz - 1, true); + pptr_reg(nfp_prog), offset, sz - 1, CMD_CTX_SWAP); i = 0; if (shift) @@ -748,7 +762,7 @@ data_ld_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, mask = size < 4 ? GENMASK(size - 1, 0) : 0; emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, mode, 0, - lreg, rreg, sz / 4 - 1, true); + lreg, rreg, sz / 4 - 1, CMD_CTX_SWAP); i = 0; if (mask) @@ -828,7 +842,7 @@ data_stx_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, wrp_mov(nfp_prog, reg_xfer(i), reg_a(src_gpr + i)); emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, - reg_a(dst_gpr), offset, size - 1, true); + reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); return 0; } @@ -842,7 +856,7 @@ data_st_host_order(struct nfp_prog *nfp_prog, u8 dst_gpr, swreg offset, wrp_immed(nfp_prog, reg_xfer(1), imm >> 32); emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, - reg_a(dst_gpr), offset, size - 1, true); + reg_a(dst_gpr), offset, size - 1, CMD_CTX_SWAP); return 0; } @@ -1339,7 +1353,7 @@ static int adjust_head(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) } static int -map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +map_call_stack_common(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { struct bpf_offloaded_map *offmap; struct nfp_bpf_map *nfp_map; @@ -1353,19 +1367,21 @@ map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) /* We only have to reload LM0 if the key is not at start of stack */ lm_off = nfp_prog->stack_depth; - lm_off += meta->arg2.var_off.value + meta->arg2.off; - load_lm_ptr = meta->arg2_var_off || lm_off; + lm_off += meta->arg2.reg.var_off.value + meta->arg2.reg.off; + load_lm_ptr = meta->arg2.var_off || lm_off; /* Set LM0 to start of key */ if (load_lm_ptr) emit_csr_wr(nfp_prog, reg_b(2 * 2), NFP_CSR_ACT_LM_ADDR0); + if (meta->func_id == BPF_FUNC_map_update_elem) + emit_csr_wr(nfp_prog, reg_b(3 * 2), NFP_CSR_ACT_LM_ADDR2); /* Load map ID into a register, it should actually fit as an immediate * but in case it doesn't deal with it here, not in the delay slots. */ tid = ur_load_imm_any(nfp_prog, nfp_map->tid, imm_a(nfp_prog)); - emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + BPF_FUNC_map_lookup_elem, + emit_br_relo(nfp_prog, BR_UNC, BR_OFF_RELO + meta->func_id, 2, RELO_BR_HELPER); ret_tgt = nfp_prog_current_offset(nfp_prog) + 2; @@ -1388,6 +1404,18 @@ map_lookup_stack(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return 0; } +static int +nfp_get_prandom_u32(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + __emit_csr_rd(nfp_prog, NFP_CSR_PSEUDO_RND_NUM); + /* CSR value is read in following immed[gpr, 0] */ + emit_immed(nfp_prog, reg_both(0), 0, + IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); + emit_immed(nfp_prog, reg_both(1), 0, + IMMED_WIDTH_ALL, false, IMMED_SHIFT_0B); + return 0; +} + /* --- Callbacks --- */ static int mov_reg64(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { @@ -1838,6 +1866,128 @@ mem_ldx_emem(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, tmp_reg, meta->insn.dst_reg * 2, size); } +static void +mem_ldx_data_init_pktcache(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta) +{ + s16 range_start = meta->pkt_cache.range_start; + s16 range_end = meta->pkt_cache.range_end; + swreg src_base, off; + u8 xfer_num, len; + bool indir; + + off = re_load_imm_any(nfp_prog, range_start, imm_b(nfp_prog)); + src_base = reg_a(meta->insn.src_reg * 2); + len = range_end - range_start; + xfer_num = round_up(len, REG_WIDTH) / REG_WIDTH; + + indir = len > 8 * REG_WIDTH; + /* Setup PREV_ALU for indirect mode. */ + if (indir) + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + + /* Cache memory into transfer-in registers. */ + emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, + off, xfer_num - 1, CMD_CTX_SWAP, indir); +} + +static int +mem_ldx_data_from_pktcache_unaligned(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + unsigned int size) +{ + s16 range_start = meta->pkt_cache.range_start; + s16 insn_off = meta->insn.off - range_start; + swreg dst_lo, dst_hi, src_lo, src_mid; + u8 dst_gpr = meta->insn.dst_reg * 2; + u8 len_lo = size, len_mid = 0; + u8 idx = insn_off / REG_WIDTH; + u8 off = insn_off % REG_WIDTH; + + dst_hi = reg_both(dst_gpr + 1); + dst_lo = reg_both(dst_gpr); + src_lo = reg_xfer(idx); + + /* The read length could involve as many as three registers. */ + if (size > REG_WIDTH - off) { + /* Calculate the part in the second register. */ + len_lo = REG_WIDTH - off; + len_mid = size - len_lo; + + /* Calculate the part in the third register. */ + if (size > 2 * REG_WIDTH - off) + len_mid = REG_WIDTH; + } + + wrp_reg_subpart(nfp_prog, dst_lo, src_lo, len_lo, off); + + if (!len_mid) { + wrp_immed(nfp_prog, dst_hi, 0); + return 0; + } + + src_mid = reg_xfer(idx + 1); + + if (size <= REG_WIDTH) { + wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, len_mid, len_lo); + wrp_immed(nfp_prog, dst_hi, 0); + } else { + swreg src_hi = reg_xfer(idx + 2); + + wrp_reg_or_subpart(nfp_prog, dst_lo, src_mid, + REG_WIDTH - len_lo, len_lo); + wrp_reg_subpart(nfp_prog, dst_hi, src_mid, len_lo, + REG_WIDTH - len_lo); + wrp_reg_or_subpart(nfp_prog, dst_hi, src_hi, REG_WIDTH - len_lo, + len_lo); + } + + return 0; +} + +static int +mem_ldx_data_from_pktcache_aligned(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, + unsigned int size) +{ + swreg dst_lo, dst_hi, src_lo; + u8 dst_gpr, idx; + + idx = (meta->insn.off - meta->pkt_cache.range_start) / REG_WIDTH; + dst_gpr = meta->insn.dst_reg * 2; + dst_hi = reg_both(dst_gpr + 1); + dst_lo = reg_both(dst_gpr); + src_lo = reg_xfer(idx); + + if (size < REG_WIDTH) { + wrp_reg_subpart(nfp_prog, dst_lo, src_lo, size, 0); + wrp_immed(nfp_prog, dst_hi, 0); + } else if (size == REG_WIDTH) { + wrp_mov(nfp_prog, dst_lo, src_lo); + wrp_immed(nfp_prog, dst_hi, 0); + } else { + swreg src_hi = reg_xfer(idx + 1); + + wrp_mov(nfp_prog, dst_lo, src_lo); + wrp_mov(nfp_prog, dst_hi, src_hi); + } + + return 0; +} + +static int +mem_ldx_data_from_pktcache(struct nfp_prog *nfp_prog, + struct nfp_insn_meta *meta, unsigned int size) +{ + u8 off = meta->insn.off - meta->pkt_cache.range_start; + + if (IS_ALIGNED(off, REG_WIDTH)) + return mem_ldx_data_from_pktcache_aligned(nfp_prog, meta, size); + + return mem_ldx_data_from_pktcache_unaligned(nfp_prog, meta, size); +} + static int mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, unsigned int size) @@ -1852,8 +2002,16 @@ mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, return mem_ldx_skb(nfp_prog, meta, size); } - if (meta->ptr.type == PTR_TO_PACKET) - return mem_ldx_data(nfp_prog, meta, size); + if (meta->ptr.type == PTR_TO_PACKET) { + if (meta->pkt_cache.range_end) { + if (meta->pkt_cache.do_init) + mem_ldx_data_init_pktcache(nfp_prog, meta); + + return mem_ldx_data_from_pktcache(nfp_prog, meta, size); + } else { + return mem_ldx_data(nfp_prog, meta, size); + } + } if (meta->ptr.type == PTR_TO_STACK) return mem_ldx_stack(nfp_prog, meta, size, @@ -1982,6 +2140,111 @@ static int mem_stx8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) return mem_stx(nfp_prog, meta, 8); } +static int +mem_xadd(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta, bool is64) +{ + u8 dst_gpr = meta->insn.dst_reg * 2; + u8 src_gpr = meta->insn.src_reg * 2; + unsigned int full_add, out; + swreg addra, addrb, off; + + off = ur_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); + + /* We can fit 16 bits into command immediate, if we know the immediate + * is guaranteed to either always or never fit into 16 bit we only + * generate code to handle that particular case, otherwise generate + * code for both. + */ + out = nfp_prog_current_offset(nfp_prog); + full_add = nfp_prog_current_offset(nfp_prog); + + if (meta->insn.off) { + out += 2; + full_add += 2; + } + if (meta->xadd_maybe_16bit) { + out += 3; + full_add += 3; + } + if (meta->xadd_over_16bit) + out += 2 + is64; + if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { + out += 5; + full_add += 5; + } + + /* Generate the branch for choosing add_imm vs add */ + if (meta->xadd_maybe_16bit && meta->xadd_over_16bit) { + swreg max_imm = imm_a(nfp_prog); + + wrp_immed(nfp_prog, max_imm, 0xffff); + emit_alu(nfp_prog, reg_none(), + max_imm, ALU_OP_SUB, reg_b(src_gpr)); + emit_alu(nfp_prog, reg_none(), + reg_imm(0), ALU_OP_SUB_C, reg_b(src_gpr + 1)); + emit_br(nfp_prog, BR_BLO, full_add, meta->insn.off ? 2 : 0); + /* defer for add */ + } + + /* If insn has an offset add to the address */ + if (!meta->insn.off) { + addra = reg_a(dst_gpr); + addrb = reg_b(dst_gpr + 1); + } else { + emit_alu(nfp_prog, imma_a(nfp_prog), + reg_a(dst_gpr), ALU_OP_ADD, off); + emit_alu(nfp_prog, imma_b(nfp_prog), + reg_a(dst_gpr + 1), ALU_OP_ADD_C, reg_imm(0)); + addra = imma_a(nfp_prog); + addrb = imma_b(nfp_prog); + } + + /* Generate the add_imm if 16 bits are possible */ + if (meta->xadd_maybe_16bit) { + swreg prev_alu = imm_a(nfp_prog); + + wrp_immed(nfp_prog, prev_alu, + FIELD_PREP(CMD_OVE_DATA, 2) | + CMD_OVE_LEN | + FIELD_PREP(CMD_OV_LEN, 0x8 | is64 << 2)); + wrp_reg_or_subpart(nfp_prog, prev_alu, reg_b(src_gpr), 2, 2); + emit_cmd_indir(nfp_prog, CMD_TGT_ADD_IMM, CMD_MODE_40b_BA, 0, + addra, addrb, 0, CMD_CTX_NO_SWAP); + + if (meta->xadd_over_16bit) + emit_br(nfp_prog, BR_UNC, out, 0); + } + + if (!nfp_prog_confirm_current_offset(nfp_prog, full_add)) + return -EINVAL; + + /* Generate the add if 16 bits are not guaranteed */ + if (meta->xadd_over_16bit) { + emit_cmd(nfp_prog, CMD_TGT_ADD, CMD_MODE_40b_BA, 0, + addra, addrb, is64 << 2, + is64 ? CMD_CTX_SWAP_DEFER2 : CMD_CTX_SWAP_DEFER1); + + wrp_mov(nfp_prog, reg_xfer(0), reg_a(src_gpr)); + if (is64) + wrp_mov(nfp_prog, reg_xfer(1), reg_a(src_gpr + 1)); + } + + if (!nfp_prog_confirm_current_offset(nfp_prog, out)) + return -EINVAL; + + return 0; +} + +static int mem_xadd4(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_xadd(nfp_prog, meta, false); +} + +static int mem_xadd8(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) +{ + return mem_xadd(nfp_prog, meta, true); +} + static int jump(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) { emit_br(nfp_prog, BR_UNC, meta->insn.off, 0); @@ -2183,7 +2446,11 @@ static int call(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) case BPF_FUNC_xdp_adjust_head: return adjust_head(nfp_prog, meta); case BPF_FUNC_map_lookup_elem: - return map_lookup_stack(nfp_prog, meta); + case BPF_FUNC_map_update_elem: + case BPF_FUNC_map_delete_elem: + return map_call_stack_common(nfp_prog, meta); + case BPF_FUNC_get_prandom_u32: + return nfp_get_prandom_u32(nfp_prog, meta); default: WARN_ONCE(1, "verifier allowed unsupported function\n"); return -EOPNOTSUPP; @@ -2243,6 +2510,8 @@ static const instr_cb_t instr_cb[256] = { [BPF_STX | BPF_MEM | BPF_H] = mem_stx2, [BPF_STX | BPF_MEM | BPF_W] = mem_stx4, [BPF_STX | BPF_MEM | BPF_DW] = mem_stx8, + [BPF_STX | BPF_XADD | BPF_W] = mem_xadd4, + [BPF_STX | BPF_XADD | BPF_DW] = mem_xadd8, [BPF_ST | BPF_MEM | BPF_B] = mem_st1, [BPF_ST | BPF_MEM | BPF_H] = mem_st2, [BPF_ST | BPF_MEM | BPF_W] = mem_st4, @@ -2821,6 +3090,120 @@ static void nfp_bpf_opt_ldst_gather(struct nfp_prog *nfp_prog) } } +static void nfp_bpf_opt_pkt_cache(struct nfp_prog *nfp_prog) +{ + struct nfp_insn_meta *meta, *range_node = NULL; + s16 range_start = 0, range_end = 0; + bool cache_avail = false; + struct bpf_insn *insn; + s32 range_ptr_off = 0; + u32 range_ptr_id = 0; + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->flags & FLAG_INSN_IS_JUMP_DST) + cache_avail = false; + + if (meta->skip) + continue; + + insn = &meta->insn; + + if (is_mbpf_store_pkt(meta) || + insn->code == (BPF_JMP | BPF_CALL) || + is_mbpf_classic_store_pkt(meta) || + is_mbpf_classic_load(meta)) { + cache_avail = false; + continue; + } + + if (!is_mbpf_load(meta)) + continue; + + if (meta->ptr.type != PTR_TO_PACKET || meta->ldst_gather_len) { + cache_avail = false; + continue; + } + + if (!cache_avail) { + cache_avail = true; + if (range_node) + goto end_current_then_start_new; + goto start_new; + } + + /* Check ID to make sure two reads share the same + * variable offset against PTR_TO_PACKET, and check OFF + * to make sure they also share the same constant + * offset. + * + * OFFs don't really need to be the same, because they + * are the constant offsets against PTR_TO_PACKET, so + * for different OFFs, we could canonicalize them to + * offsets against original packet pointer. We don't + * support this. + */ + if (meta->ptr.id == range_ptr_id && + meta->ptr.off == range_ptr_off) { + s16 new_start = range_start; + s16 end, off = insn->off; + s16 new_end = range_end; + bool changed = false; + + if (off < range_start) { + new_start = off; + changed = true; + } + + end = off + BPF_LDST_BYTES(insn); + if (end > range_end) { + new_end = end; + changed = true; + } + + if (!changed) + continue; + + if (new_end - new_start <= 64) { + /* Install new range. */ + range_start = new_start; + range_end = new_end; + continue; + } + } + +end_current_then_start_new: + range_node->pkt_cache.range_start = range_start; + range_node->pkt_cache.range_end = range_end; +start_new: + range_node = meta; + range_node->pkt_cache.do_init = true; + range_ptr_id = range_node->ptr.id; + range_ptr_off = range_node->ptr.off; + range_start = insn->off; + range_end = insn->off + BPF_LDST_BYTES(insn); + } + + if (range_node) { + range_node->pkt_cache.range_start = range_start; + range_node->pkt_cache.range_end = range_end; + } + + list_for_each_entry(meta, &nfp_prog->insns, l) { + if (meta->skip) + continue; + + if (is_mbpf_load_pkt(meta) && !meta->ldst_gather_len) { + if (meta->pkt_cache.do_init) { + range_start = meta->pkt_cache.range_start; + range_end = meta->pkt_cache.range_end; + } else { + meta->pkt_cache.range_start = range_start; + meta->pkt_cache.range_end = range_end; + } + } + } +} + static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) { nfp_bpf_opt_reg_init(nfp_prog); @@ -2828,6 +3211,7 @@ static int nfp_bpf_optimize(struct nfp_prog *nfp_prog) nfp_bpf_opt_ld_mask(nfp_prog); nfp_bpf_opt_ld_shift(nfp_prog); nfp_bpf_opt_ldst_gather(nfp_prog); + nfp_bpf_opt_pkt_cache(nfp_prog); return 0; } @@ -2952,6 +3336,12 @@ void *nfp_bpf_relo_for_vnic(struct nfp_prog *nfp_prog, struct nfp_bpf_vnic *bv) case BPF_FUNC_map_lookup_elem: val = nfp_prog->bpf->helpers.map_lookup; break; + case BPF_FUNC_map_update_elem: + val = nfp_prog->bpf->helpers.map_update; + break; + case BPF_FUNC_map_delete_elem: + val = nfp_prog->bpf->helpers.map_delete; + break; default: pr_err("relocation of unknown helper %d\n", val); |