summaryrefslogtreecommitdiffstats
path: root/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
diff options
context:
space:
mode:
authorShay Agroskin <shayag@mellanox.com>2019-05-12 18:28:27 +0300
committerSaeed Mahameed <saeedm@mellanox.com>2019-08-01 12:33:31 -0700
commit6c085a8aab5183d8658c9a692bcfda3e24195b7a (patch)
treeb63bb42dc43350339956d97d0757a740ff006099 /drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
parent68865419ba1bf502a5bd279a500deda64000249d (diff)
downloadlinux-6c085a8aab5183d8658c9a692bcfda3e24195b7a.tar.bz2
net/mlx5e: XDP, Close TX MPWQE session when no room for inline packet left
In MPWQE mode, when transmitting packets with XDP, a packet that is smaller than a certain size (set to 256 bytes) would be sent inline within its WQE TX descriptor (mem-copied), in case the hardware tx queue is congested beyond a pre-defined water-mark. If a MPWQE cannot contain an additional inline packet, we close this MPWQE session, and send the packet inlined within the next MPWQE. To save some MPWQE session close+open operations, we don't open MPWQE sessions that are contiguously smaller than certain size (set to the HW MPWQE maximum size). If there isn't enough contiguous room in the send queue, we fill it with NOPs and wrap the send queue index around. This way, qualified packets are always sent inline. Perf tests: Tested packet rate for UDP 64Byte multi-stream over two dual port ConnectX-5 100Gbps NICs. CPU: Intel(R) Xeon(R) CPU E5-2680 v3 @ 2.50GHz XDP_TX: With 24 channels: | ------ | bounced packets | inlined packets | inline ratio | | before | 113.6Mpps | 96.3Mpps | 84% | | after | 115Mpps | 99.5Mpps | 86% | With one channel: | ------ | bounced packets | inlined packets | inline ratio | | before | 6.7Mpps | 0pps | 0% | | after | 6.8Mpps | 0pps | 0% | As we can see, there is improvement in both inline ratio and overall packet rate for 24 channels. Also, we see no degradation for the one-channel case. Signed-off-by: Shay Agroskin <shayag@mellanox.com> Signed-off-by: Tariq Toukan <tariqt@mellanox.com> Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c')
-rw-r--r--drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c32
1 files changed, 10 insertions, 22 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index b0b982cf69bb..8cb98326531f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -179,34 +179,22 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
struct mlx5e_xdpsq_stats *stats = sq->stats;
struct mlx5_wq_cyc *wq = &sq->wq;
- u8 wqebbs;
- u16 pi;
+ u16 pi, contig_wqebbs;
+
+ pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+ contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+
+ if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS))
+ mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs);
mlx5e_xdpsq_fetch_wqe(sq, &session->wqe);
prefetchw(session->wqe->data);
session->ds_count = MLX5E_XDP_TX_EMPTY_DS_COUNT;
session->pkt_count = 0;
- session->complete = 0;
pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-/* The mult of MLX5_SEND_WQE_MAX_WQEBBS * MLX5_SEND_WQEBB_NUM_DS
- * (16 * 4 == 64) does not fit in the 6-bit DS field of Ctrl Segment.
- * We use a bound lower that MLX5_SEND_WQE_MAX_WQEBBS to let a
- * full-session WQE be cache-aligned.
- */
-#if L1_CACHE_BYTES < 128
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#else
-#define MLX5E_XDP_MPW_MAX_WQEBBS (MLX5_SEND_WQE_MAX_WQEBBS - 2)
-#endif
-
- wqebbs = min_t(u16, mlx5_wq_cyc_get_contig_wqebbs(wq, pi),
- MLX5E_XDP_MPW_MAX_WQEBBS);
-
- session->max_ds_count = MLX5_SEND_WQEBB_NUM_DS * wqebbs;
-
mlx5e_xdp_update_inline_state(sq);
stats->mpwqe++;
@@ -244,7 +232,7 @@ static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
{
if (unlikely(!sq->mpwqe.wqe)) {
if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
- MLX5_SEND_WQE_MAX_WQEBBS))) {
+ MLX5E_XDPSQ_STOP_ROOM))) {
/* SQ is full, ring doorbell */
mlx5e_xmit_xdp_doorbell(sq);
sq->stats->full++;
@@ -285,8 +273,8 @@ static bool mlx5e_xmit_xdp_frame_mpwqe(struct mlx5e_xdpsq *sq,
mlx5e_xdp_mpwqe_add_dseg(sq, xdptxd, stats);
- if (unlikely(session->complete ||
- session->ds_count == session->max_ds_count))
+ if (unlikely(mlx5e_xdp_no_room_for_inline_pkt(session) ||
+ session->ds_count == MLX5E_XDP_MPW_MAX_NUM_DS))
mlx5e_xdp_mpwqe_complete(sq);
mlx5e_xdpi_fifo_push(&sq->db.xdpi_fifo, xdpi);