diff options
author | Tariq Toukan <tariqt@mellanox.com> | 2017-03-29 11:46:10 +0300 |
---|---|---|
committer | Saeed Mahameed <saeedm@mellanox.com> | 2017-04-30 16:03:16 +0300 |
commit | 1f5b1e47ee08f6c623db599b6c23ce7c20b79458 (patch) | |
tree | 22ba10a6a89b11fccd7d07db2804aa647dbf7783 /drivers/net | |
parent | a2fa1fe5ad13e7f11b82291fc08bdc654fac741e (diff) | |
download | linux-1f5b1e47ee08f6c623db599b6c23ce7c20b79458.tar.bz2 |
net/mlx5e: Optimize poll ICOSQ completion queue
UMR operations are more frequent and important.
Check them first, and add a compiler branch predictor hint.
According to current design, ICOSQ CQ can contain at most one
pending CQE per napi. Poll function is optimized accordingly.
Performance:
Single-stream packet-rate tested with pktgen.
Packets are dropped in tc level to zoom into driver data-path.
Larger gain is expected for larger packet sizes, as BW is higher
and UMR posts are more frequent.
---------------------------------------------
packet size | before | after | gain |
64B | 4,092,370 | 4,113,306 | 0.5% |
1024B | 3,421,435 | 3,633,819 | 6.2% |
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Cc: kernel-team@fb.com
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Diffstat (limited to 'drivers/net')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c | 62 |
1 files changed, 33 insertions, 29 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c index 43729ec35dfc..491e83d09b58 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c @@ -49,10 +49,40 @@ struct mlx5_cqe64 *mlx5e_get_cqe(struct mlx5e_cq *cq) return cqe; } +static inline void mlx5e_poll_ico_single_cqe(struct mlx5e_cq *cq, + struct mlx5e_icosq *sq, + struct mlx5_cqe64 *cqe, + u16 *sqcc) +{ + struct mlx5_wq_cyc *wq = &sq->wq; + u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; + struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; + struct mlx5e_rq *rq = &sq->channel->rq; + + prefetch(rq); + mlx5_cqwq_pop(&cq->wq); + *sqcc += icowi->num_wqebbs; + + if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { + WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", + cqe->op_own); + return; + } + + if (likely(icowi->opcode == MLX5_OPCODE_UMR)) { + mlx5e_post_rx_mpwqe(rq); + return; + } + + if (unlikely(icowi->opcode != MLX5_OPCODE_NOP)) + WARN_ONCE(true, + "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", + icowi->opcode); +} + static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) { struct mlx5e_icosq *sq = container_of(cq, struct mlx5e_icosq, cq); - struct mlx5_wq_cyc *wq; struct mlx5_cqe64 *cqe; u16 sqcc; @@ -63,39 +93,13 @@ static void mlx5e_poll_ico_cq(struct mlx5e_cq *cq) if (likely(!cqe)) return; - wq = &sq->wq; - /* sq->cc must be updated only after mlx5_cqwq_update_db_record(), * otherwise a cq overrun may occur */ sqcc = sq->cc; - do { - u16 ci = be16_to_cpu(cqe->wqe_counter) & wq->sz_m1; - struct mlx5e_sq_wqe_info *icowi = &sq->db.ico_wqe[ci]; - - mlx5_cqwq_pop(&cq->wq); - sqcc += icowi->num_wqebbs; - - if (unlikely((cqe->op_own >> 4) != MLX5_CQE_REQ)) { - WARN_ONCE(true, "mlx5e: Bad OP in ICOSQ CQE: 0x%x\n", - cqe->op_own); - break; - } - - switch (icowi->opcode) { - case MLX5_OPCODE_NOP: - break; - case MLX5_OPCODE_UMR: - mlx5e_post_rx_mpwqe(&sq->channel->rq); - break; - default: - WARN_ONCE(true, - "mlx5e: Bad OPCODE in ICOSQ WQE info: 0x%x\n", - icowi->opcode); - } - - } while ((cqe = mlx5e_get_cqe(cq))); + /* by design, there's only a single cqe */ + mlx5e_poll_ico_single_cqe(cq, sq, cqe, &sqcc); mlx5_cqwq_update_db_record(&cq->wq); |