From 939de57d30344ce728b0de61be87984e75af420e Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Mon, 5 Nov 2018 16:05:37 -0600 Subject: net/mlx5e: Use CQE padding for Ethernet CQs Writing 64B CQEs to 128B cache lines results in a RMW operation. Padding the CQEs to 128B if possible improves performance on 128B cache line systems like PPC. Testing on PPC showed up to a 24% improvement in small packet throughput vs the default behavior, depending on the workload and system topology. Signed-off-by: Daniel Jurgens Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/wq.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'drivers/net/ethernet/mellanox/mlx5/core/wq.h') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/wq.h b/drivers/net/ethernet/mellanox/mlx5/core/wq.h index 9bc2184a46bc..ea934a48c90a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/wq.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/wq.h @@ -179,7 +179,12 @@ static inline u32 mlx5_cqwq_get_ci(struct mlx5_cqwq *wq) static inline struct mlx5_cqe64 *mlx5_cqwq_get_wqe(struct mlx5_cqwq *wq, u32 ix) { - return mlx5_frag_buf_get_wqe(&wq->fbc, ix); + struct mlx5_cqe64 *cqe = mlx5_frag_buf_get_wqe(&wq->fbc, ix); + + /* For 128B CQEs the data is in the last 64B */ + cqe += wq->fbc.log_stride == 7; + + return cqe; } static inline u32 mlx5_cqwq_get_ctr_wrap_cnt(struct mlx5_cqwq *wq, u32 ctr) -- cgit v1.2.3