summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorJiong Wang <jiong.wang@netronome.com>2017-11-30 21:32:59 -0800
committerDaniel Borkmann <daniel@iogearbox.net>2017-12-01 20:59:20 +0100
commit8c90053858fce1ca60fab7be03bb61d314ea5c1c (patch)
tree4e46460a1a160d6fcf085a7da61b7d557f08ae0a
parent9879a3814beb3b1350755475e67a8d92ba1f7e4b (diff)
downloadlinux-8c90053858fce1ca60fab7be03bb61d314ea5c1c.tar.bz2
nfp: bpf: implement memory bulk copy for length bigger than 32-bytes
When the gathered copy length is bigger than 32-bytes and within 128-bytes (the maximum length a single CPP Pull/Push request can finish), the strategy of read/write are changeed into: * Read. - use direct reference mode when length is within 32-bytes. - use indirect mode when length is bigger than 32-bytes. * Write. - length <= 8-bytes use write8 (direct_ref). - length <= 32-byte and 4-bytes aligned use write32 (direct_ref). - length <= 32-bytes but not 4-bytes aligned use write8 (indirect_ref). - length > 32-bytes and 4-bytes aligned use write32 (indirect_ref). - length > 32-bytes and not 4-bytes aligned and <= 40-bytes use write32 (direct_ref) to finish the first 32-bytes. use write8 (direct_ref) to finish all remaining hanging part. - length > 32-bytes and not 4-bytes aligned use write32 (indirect_ref) to finish those 4-byte aligned parts. use write8 (direct_ref) to finish all remaining hanging part. Signed-off-by: Jiong Wang <jiong.wang@netronome.com> Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com> Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
-rw-r--r--drivers/net/ethernet/netronome/nfp/bpf/jit.c52
1 files changed, 45 insertions, 7 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
index 138568c0eee6..1b98ef239605 100644
--- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c
+++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c
@@ -544,16 +544,18 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
unsigned int i;
u8 xfer_num;
- if (WARN_ON_ONCE(len > 32))
- return -EOPNOTSUPP;
-
off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog));
src_base = reg_a(meta->insn.src_reg * 2);
xfer_num = round_up(len, 4) / 4;
+ /* Setup PREV_ALU fields to override memory read length. */
+ if (len > 32)
+ wrp_immed(nfp_prog, reg_none(),
+ CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
+
/* Memory read from source addr into transfer-in registers. */
- emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, off,
- xfer_num - 1, true);
+ emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base,
+ off, xfer_num - 1, true, len > 32);
/* Move from transfer-in to transfer-out. */
for (i = 0; i < xfer_num; i++)
@@ -566,18 +568,54 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta)
emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
reg_a(meta->paired_st->dst_reg * 2), off, len - 1,
true);
- } else if (IS_ALIGNED(len, 4)) {
+ } else if (len <= 32 && IS_ALIGNED(len, 4)) {
/* Use single direct_ref write32. */
emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1,
true);
- } else {
+ } else if (len <= 32) {
/* Use single indirect_ref write8. */
wrp_immed(nfp_prog, reg_none(),
CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1));
emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0,
reg_a(meta->paired_st->dst_reg * 2), off,
len - 1, true);
+ } else if (IS_ALIGNED(len, 4)) {
+ /* Use single indirect_ref write32. */
+ wrp_immed(nfp_prog, reg_none(),
+ CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1));
+ emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off,
+ xfer_num - 1, true);
+ } else if (len <= 40) {
+ /* Use one direct_ref write32 to write the first 32-bytes, then
+ * another direct_ref write8 to write the remaining bytes.
+ */
+ emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off, 7,
+ true);
+
+ off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32,
+ imm_b(nfp_prog));
+ emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8,
+ reg_a(meta->paired_st->dst_reg * 2), off, len - 33,
+ true);
+ } else {
+ /* Use one indirect_ref write32 to write 4-bytes aligned length,
+ * then another direct_ref write8 to write the remaining bytes.
+ */
+ u8 new_off;
+
+ wrp_immed(nfp_prog, reg_none(),
+ CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2));
+ emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0,
+ reg_a(meta->paired_st->dst_reg * 2), off,
+ xfer_num - 2, true);
+ new_off = meta->paired_st->off + (xfer_num - 1) * 4;
+ off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog));
+ emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b,
+ xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off,
+ (len & 0x3) - 1, true);
}
/* TODO: The following extra load is to make sure data flow be identical