diff options
author | Jiong Wang <jiong.wang@netronome.com> | 2017-11-30 21:32:59 -0800 |
---|---|---|
committer | Daniel Borkmann <daniel@iogearbox.net> | 2017-12-01 20:59:20 +0100 |
commit | 8c90053858fce1ca60fab7be03bb61d314ea5c1c (patch) | |
tree | 4e46460a1a160d6fcf085a7da61b7d557f08ae0a /drivers/net/ethernet | |
parent | 9879a3814beb3b1350755475e67a8d92ba1f7e4b (diff) | |
download | linux-8c90053858fce1ca60fab7be03bb61d314ea5c1c.tar.bz2 |
nfp: bpf: implement memory bulk copy for length bigger than 32-bytes
When the gathered copy length is bigger than 32-bytes and within 128-bytes
(the maximum length a single CPP Pull/Push request can finish), the
strategy of read/write are changeed into:
* Read.
- use direct reference mode when length is within 32-bytes.
- use indirect mode when length is bigger than 32-bytes.
* Write.
- length <= 8-bytes
use write8 (direct_ref).
- length <= 32-byte and 4-bytes aligned
use write32 (direct_ref).
- length <= 32-bytes but not 4-bytes aligned
use write8 (indirect_ref).
- length > 32-bytes and 4-bytes aligned
use write32 (indirect_ref).
- length > 32-bytes and not 4-bytes aligned and <= 40-bytes
use write32 (direct_ref) to finish the first 32-bytes.
use write8 (direct_ref) to finish all remaining hanging part.
- length > 32-bytes and not 4-bytes aligned
use write32 (indirect_ref) to finish those 4-byte aligned parts.
use write8 (direct_ref) to finish all remaining hanging part.
Signed-off-by: Jiong Wang <jiong.wang@netronome.com>
Reviewed-by: Jakub Kicinski <jakub.kicinski@netronome.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Diffstat (limited to 'drivers/net/ethernet')
-rw-r--r-- | drivers/net/ethernet/netronome/nfp/bpf/jit.c | 52 |
1 files changed, 45 insertions, 7 deletions
diff --git a/drivers/net/ethernet/netronome/nfp/bpf/jit.c b/drivers/net/ethernet/netronome/nfp/bpf/jit.c index 138568c0eee6..1b98ef239605 100644 --- a/drivers/net/ethernet/netronome/nfp/bpf/jit.c +++ b/drivers/net/ethernet/netronome/nfp/bpf/jit.c @@ -544,16 +544,18 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) unsigned int i; u8 xfer_num; - if (WARN_ON_ONCE(len > 32)) - return -EOPNOTSUPP; - off = re_load_imm_any(nfp_prog, meta->insn.off, imm_b(nfp_prog)); src_base = reg_a(meta->insn.src_reg * 2); xfer_num = round_up(len, 4) / 4; + /* Setup PREV_ALU fields to override memory read length. */ + if (len > 32) + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + /* Memory read from source addr into transfer-in registers. */ - emit_cmd(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, off, - xfer_num - 1, true); + emit_cmd_any(nfp_prog, CMD_TGT_READ32_SWAP, CMD_MODE_32b, 0, src_base, + off, xfer_num - 1, true, len > 32); /* Move from transfer-in to transfer-out. */ for (i = 0; i < xfer_num; i++) @@ -566,18 +568,54 @@ static int nfp_cpp_memcpy(struct nfp_prog *nfp_prog, struct nfp_insn_meta *meta) emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, len - 1, true); - } else if (IS_ALIGNED(len, 4)) { + } else if (len <= 32 && IS_ALIGNED(len, 4)) { /* Use single direct_ref write32. */ emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, xfer_num - 1, true); - } else { + } else if (len <= 32) { /* Use single indirect_ref write8. */ wrp_immed(nfp_prog, reg_none(), CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, len - 1)); emit_cmd_indir(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 0, reg_a(meta->paired_st->dst_reg * 2), off, len - 1, true); + } else if (IS_ALIGNED(len, 4)) { + /* Use single indirect_ref write32. */ + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 1)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + xfer_num - 1, true); + } else if (len <= 40) { + /* Use one direct_ref write32 to write the first 32-bytes, then + * another direct_ref write8 to write the remaining bytes. + */ + emit_cmd(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, 7, + true); + + off = re_load_imm_any(nfp_prog, meta->paired_st->off + 32, + imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, 8, + reg_a(meta->paired_st->dst_reg * 2), off, len - 33, + true); + } else { + /* Use one indirect_ref write32 to write 4-bytes aligned length, + * then another direct_ref write8 to write the remaining bytes. + */ + u8 new_off; + + wrp_immed(nfp_prog, reg_none(), + CMD_OVE_LEN | FIELD_PREP(CMD_OV_LEN, xfer_num - 2)); + emit_cmd_indir(nfp_prog, CMD_TGT_WRITE32_SWAP, CMD_MODE_32b, 0, + reg_a(meta->paired_st->dst_reg * 2), off, + xfer_num - 2, true); + new_off = meta->paired_st->off + (xfer_num - 1) * 4; + off = re_load_imm_any(nfp_prog, new_off, imm_b(nfp_prog)); + emit_cmd(nfp_prog, CMD_TGT_WRITE8_SWAP, CMD_MODE_32b, + xfer_num - 1, reg_a(meta->paired_st->dst_reg * 2), off, + (len & 0x3) - 1, true); } /* TODO: The following extra load is to make sure data flow be identical |