summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c113
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/main.h4
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.c1
-rw-r--r--drivers/net/ethernet/netronome/nfp/nfp_asm.h4
4 files changed, 122 insertions, 0 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index da4e106d3b16..138568c0eee6 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -155,6 +155,13 @@ emit_cmd(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
}
static void
+emit_cmd_indir(struct nfp_prog *nfp_prog, enum cmd_tgt_map op, u8 mode, u8 xfer,
+ swreg lreg, swreg rreg, u8 size, bool sync)
+{
+ emit_cmd_any(nfp_prog, op, mode, xfer, lreg, rreg, size, sync, true);
+}
+
+static void
__emit_br(struct nfp_prog *nfp_prog, enum br_mask mask, enum br_ev_pip ev_pip,
enum br_ctx_signal_state css, u16 addr, u8 defer)
{
@@ -515,6 +522,109 @@ static void wrp_reg_mov(struct nfp_prog *nfp_prog, u16 dst, u16 src)
wrp_mov(nfp_prog, reg_both(dst), reg_b(src));
}
+/* wrp_reg_subpart() - load @field_len bytes from @offset of @src, write the
+ * result to @dst from low end.
+ */
+static void
+wrp_reg_subpart(struct nfp_prog *nfp_prog, swreg dst, swreg src, u8 field_len,
+ u8 offset)
+{
+ enum shf_sc sc = offset ? SHF_SC_R_SHF : SHF_SC_NONE;
+ u8 mask = (1 << field_len) - 1;
+
+ emit_ld_field_any(nfp_prog, dst, mask, src, sc, offset * 8, true);
+}
+
+/* NFP has Command Push Pull bus which supports bluk memory operations. */
+static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
+{
+ bool descending_seq = meta->ldst_gather_len < 0;
+ s16 len = abs(meta->ldst_gather_len);
+ swreg src_base, off;
+ unsigned int i;
+ u8 xfer_num;
+
+ if (WARN_ON_ONCE(len > 32))
+ return -EOPNOTSUPP;
+
+ off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
+ src_base = reg_a(meta->insn.src_reg * 2);
+ xfer_num = round_up(len, 4) / 4;
+
+ /* Memory read from source addr into transfer-in registers. */
+ emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, off,
+ xfer_num - 1, true);
+
+ /* Move from transfer-in to transfer-out. */
+ for (i = 0; i < xfer_num; i++)
+ wrp_mov(nfp_prog, reg_xfer(i), reg_xfer(i));
+
+ off = re_load_imm_any(nfp_prog, meta->paired_st->off, imm_b(nfp_prog));
+
+ if (len <= 8) {
+ /* Use single direct_ref write8. */
+ emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
+ true);
+ } else if (IS_ALIGNED(len, 4)) {
+ /* Use single direct_ref write32. */
+ emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
+ true);
+ } else {
+ /* Use single indirect_ref write8. */
+ wrp_immed(nfp_prog, reg_none(),
+ CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
+ emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off,
+ len - 1, true);
+ }
+
+ /* TODO: The following extra load is to make sure data flow be identical
+ * before and after we do memory copy optimization.
+ *
+ * The load destination register is not guaranteed to be dead, so we
+ * need to make sure it is loaded with the value the same as before
+ * this transformation.
+ *
+ * These extra loads could be removed once we have accurate register
+ * usage information.
+ */
+ if (descending_seq)
+ xfer_num = 0;
+ else if (BPF_SIZE(meta->insn.code) != BPF_DW)
+ xfer_num = xfer_num - 1;
+ else
+ xfer_num = xfer_num - 2;
+
+ switch (BPF_SIZE(meta->insn.code)) {
+ case BPF_B:
+ wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+ reg_xfer(xfer_num), 1,
+ IS_ALIGNED(len, 4) ? 3 : (len & 3) - 1);
+ break;
+ case BPF_H:
+ wrp_reg_subpart(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+ reg_xfer(xfer_num), 2, (len & 3) ^ 2);
+ break;
+ case BPF_W:
+ wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+ reg_xfer(0));
+ break;
+ case BPF_DW:
+ wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2),
+ reg_xfer(xfer_num));
+ wrp_mov(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1),
+ reg_xfer(xfer_num + 1));
+ break;
+ }
+
+ if (BPF_SIZE(meta->insn.code) != BPF_DW)
+ wrp_immed(nfp_prog, reg_both(meta->insn.dst_reg * 2 + 1), 0);
+
+ return 0;
+}
+
static int
data_ld(struct nfp_prog *nfp_prog, swreg offset, u8 dst_gpr, int size)
{
@@ -1490,6 +1600,9 @@ static int
mem_ldx(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta,
unsigned int size)
{
+ if (meta->ldst_gather_len)
+ return nfp_cpp_memcpy(nfp_prog, meta);
+
if (meta->ptr.type == PTR_TO_CTX) {
if (nfp_prog->type == BPF_PROG_TYPE_XDP)
return mem_ldx_xdp(nfp_prog, meta, size);
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/main.h b/drivers/net/ethernet/netronome/nfp/bpf/main.h
index 20ef0adb2931..5884291ddba5 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/main.h
+++ b/drivers/net/ethernet/netronome/nfp/bpf/main.h
@@ -95,6 +95,8 @@ typedef int (*instr_cb_t)(struct nfp_prog *, struct nfp_insn_meta *);
* struct nfp_insn_meta - BPF instruction wrapper
* @insn: BPF instruction
* @ptr: pointer type for memory operations
+ * @ldst_gather_len: memcpy length gathered from load/store sequence
+ * @paired_st: the paired store insn at the head of the sequence
* @ptr_not_const: pointer is not always constant
* @jmp_dst: destination info for jump instructions
* @off: index of first generated machine instruction (in nfp_prog.prog)
@@ -109,6 +111,8 @@ struct nfp_insn_meta {
union {
struct {
struct bpf_reg_state ptr;
+ struct bpf_insn *paired_st;
+ s16 ldst_gather_len;
bool ptr_not_const;
};
struct nfp_insn_meta *jmp_dst;
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.c b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
index da277386077c..d3610987fb07 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.c
@@ -41,6 +41,7 @@
const struct cmd_tgt_act cmd_tgt_act[__CMD_TGT_MAP_SIZE] = {
[CMD_TGT_WRITE8_SWAP] = { 0x02, 0x42 },
+ [CMD_TGT_WRITE32_SWAP] = { 0x02, 0x5f },
[CMD_TGT_READ8] = { 0x01, 0x43 },
[CMD_TGT_READ32] = { 0x00, 0x5c },
[CMD_TGT_READ32_LE] = { 0x01, 0x5c },
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_asm.h b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
index 6ff842a15e5d..98803f9f40b6 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_asm.h
+++ b/drivers/net/ethernet/netronome/nfp/nfp_asm.h
@@ -220,6 +220,7 @@ struct cmd_tgt_act {
enum cmd_tgt_map {
CMD_TGT_READ8,
CMD_TGT_WRITE8_SWAP,
+ CMD_TGT_WRITE32_SWAP,
CMD_TGT_READ32,
CMD_TGT_READ32_LE,
CMD_TGT_READ32_SWAP,
@@ -241,6 +242,9 @@ enum cmd_ctx_swap {
CMD_CTX_NO_SWAP = 3,
};
+#define CMD_OVE_LEN BIT(7)
+#define CMD_OV_LEN GENMASK(12, 8)
+
#define OP_LCSR_BASE 0x0fc00000000ULL
#define OP_LCSR_A_SRC 0x000000003ffULL
#define OP_LCSR_B_SRC 0x000000ffc00ULL