diff options
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en.h | 24 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_clock.c | 4 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c | 19 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 81 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 188 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_stats.h | 8 | ||||
-rw-r--r-- | include/linux/mlx5/device.h | 34 |
7 files changed, 328 insertions, 30 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index bfa5daaaf5aa..e8a6c3325b39 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -64,12 +64,9 @@ #define MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW 0x4 #define MLX5E_PARAMS_MAXIMUM_LOG_RQ_SIZE_MPW 0x6 -#define MLX5_MPWRQ_LOG_NUM_STRIDES 11 /* >= 9, HW restriction */ #define MLX5_MPWRQ_LOG_STRIDE_SIZE 6 /* >= 6, HW restriction */ -#define MLX5_MPWRQ_NUM_STRIDES BIT(MLX5_MPWRQ_LOG_NUM_STRIDES) -#define MLX5_MPWRQ_STRIDE_SIZE BIT(MLX5_MPWRQ_LOG_STRIDE_SIZE) -#define MLX5_MPWRQ_LOG_WQE_SZ (MLX5_MPWRQ_LOG_NUM_STRIDES +\ - MLX5_MPWRQ_LOG_STRIDE_SIZE) +#define MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS 8 /* >= 6, HW restriction */ +#define MLX5_MPWRQ_LOG_WQE_SZ 17 #define MLX5_MPWRQ_WQE_PAGE_ORDER (MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT > 0 ? \ MLX5_MPWRQ_LOG_WQE_SZ - PAGE_SHIFT : 0) #define MLX5_MPWRQ_PAGES_PER_WQE BIT(MLX5_MPWRQ_WQE_PAGE_ORDER) @@ -154,9 +151,13 @@ struct mlx5e_umr_wqe { struct mlx5e_params { u8 log_sq_size; u8 rq_wq_type; + u8 mpwqe_log_stride_sz; + u8 mpwqe_log_num_strides; u8 log_rq_size; u16 num_channels; u8 num_tc; + bool rx_cqe_compress_admin; + bool rx_cqe_compress; u16 rx_cq_moderation_usec; u16 rx_cq_moderation_pkts; u16 tx_cq_moderation_usec; @@ -202,6 +203,13 @@ struct mlx5e_cq { struct mlx5e_channel *channel; struct mlx5e_priv *priv; + /* cqe decompression */ + struct mlx5_cqe64 title; + struct mlx5_mini_cqe8 mini_arr[MLX5_MINI_CQE_ARRAY_SIZE]; + u8 mini_arr_idx; + u16 decmprs_left; + u16 decmprs_wqe_counter; + /* control */ struct mlx5_wq_ctrl wq_ctrl; } ____cacheline_aligned_in_smp; @@ -240,6 +248,8 @@ struct mlx5e_rq { /* control */ struct mlx5_wq_ctrl wq_ctrl; u8 wq_type; + u32 mpwqe_stride_sz; + u32 mpwqe_num_strides; u32 rqn; struct mlx5e_channel *channel; struct mlx5e_priv *priv; @@ -263,7 +273,7 @@ struct mlx5e_mpw_info { void (*dma_pre_sync)(struct device *pdev, struct mlx5e_mpw_info *wi, u32 wqe_offset, u32 len); - void (*add_skb_frag)(struct device *pdev, + void (*add_skb_frag)(struct mlx5e_rq *rq, struct sk_buff *skb, struct mlx5e_mpw_info *wi, u32 page_idx, u32 frag_offset, u32 len); @@ -616,6 +626,7 @@ void mlx5e_timestamp_init(struct mlx5e_priv *priv); void mlx5e_timestamp_cleanup(struct mlx5e_priv *priv); int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr); int mlx5e_hwstamp_get(struct net_device *dev, struct ifreq *ifr); +void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val); int mlx5e_vlan_rx_add_vid(struct net_device *dev, __always_unused __be16 proto, u16 vid); @@ -634,6 +645,7 @@ int mlx5e_close_locked(struct net_device *netdev); void mlx5e_build_default_indir_rqt(struct mlx5_core_dev *mdev, u32 *indirection_rqt, int len, int num_channels); +int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed); static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c index 2018eebe1531..847a8f3ac2b2 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_clock.c @@ -93,6 +93,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) /* RX HW timestamp */ switch (config.rx_filter) { case HWTSTAMP_FILTER_NONE: + /* Reset CQE compression to Admin default */ + mlx5e_modify_rx_cqe_compression(priv, priv->params.rx_cqe_compress_admin); break; case HWTSTAMP_FILTER_ALL: case HWTSTAMP_FILTER_SOME: @@ -108,6 +110,8 @@ int mlx5e_hwstamp_set(struct net_device *dev, struct ifreq *ifr) case HWTSTAMP_FILTER_PTP_V2_EVENT: case HWTSTAMP_FILTER_PTP_V2_SYNC: case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ: + /* Disable CQE compression */ + mlx5e_modify_rx_cqe_compression(priv, false); config.rx_filter = HWTSTAMP_FILTER_ALL; break; default: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c index 534d99e2f9c8..fc7dcc03b1de 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c @@ -613,6 +613,25 @@ static u32 ptys2ethtool_supported_port(u32 eth_proto_cap) return 0; } +int mlx5e_get_max_linkspeed(struct mlx5_core_dev *mdev, u32 *speed) +{ + u32 max_speed = 0; + u32 proto_cap; + int err; + int i; + + err = mlx5_query_port_proto_cap(mdev, &proto_cap, MLX5_PTYS_EN); + if (err) + return err; + + for (i = 0; i < MLX5E_LINK_MODES_NUMBER; ++i) + if (proto_cap & MLX5E_PROT_MASK(i)) + max_speed = max(max_speed, ptys2ethtool_table[i].speed); + + *speed = max_speed; + return 0; +} + static void get_speed_duplex(struct net_device *netdev, u32 eth_proto_oper, struct ethtool_cmd *cmd) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 1c70e518b5c5..08040702824d 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -114,6 +114,8 @@ static void mlx5e_update_sw_counters(struct mlx5e_priv *priv) s->rx_mpwqe_filler += rq_stats->mpwqe_filler; s->rx_mpwqe_frag += rq_stats->mpwqe_frag; s->rx_buff_alloc_err += rq_stats->buff_alloc_err; + s->rx_cqe_compress_blks += rq_stats->cqe_compress_blks; + s->rx_cqe_compress_pkts += rq_stats->cqe_compress_pkts; for (j = 0; j < priv->params.num_tc; j++) { sq_stats = &priv->channel[i]->sq[j].stats; @@ -305,7 +307,9 @@ static int mlx5e_create_rq(struct mlx5e_channel *c, rq->handle_rx_cqe = mlx5e_handle_rx_cqe_mpwrq; rq->alloc_wqe = mlx5e_alloc_rx_mpwqe; - rq->wqe_sz = MLX5_MPWRQ_NUM_STRIDES * MLX5_MPWRQ_STRIDE_SIZE; + rq->mpwqe_stride_sz = BIT(priv->params.mpwqe_log_stride_sz); + rq->mpwqe_num_strides = BIT(priv->params.mpwqe_log_num_strides); + rq->wqe_sz = rq->mpwqe_stride_sz * rq->mpwqe_num_strides; byte_count = rq->wqe_sz; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ @@ -1128,9 +1132,9 @@ static void mlx5e_build_rq_param(struct mlx5e_priv *priv, switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: MLX5_SET(wq, wq, log_wqe_num_of_strides, - MLX5_MPWRQ_LOG_NUM_STRIDES - 9); + priv->params.mpwqe_log_num_strides - 9); MLX5_SET(wq, wq, log_wqe_stride_size, - MLX5_MPWRQ_LOG_STRIDE_SIZE - 6); + priv->params.mpwqe_log_stride_sz - 6); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ); break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ @@ -1197,13 +1201,17 @@ static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: log_cq_size = priv->params.log_rq_size + - MLX5_MPWRQ_LOG_NUM_STRIDES; + priv->params.mpwqe_log_num_strides; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ log_cq_size = priv->params.log_rq_size; } MLX5_SET(cqc, cqc, log_cq_size, log_cq_size); + if (priv->params.rx_cqe_compress) { + MLX5_SET(cqc, cqc, mini_cqe_res_format, MLX5_CQE_FORMAT_CSUM); + MLX5_SET(cqc, cqc, cqe_comp_en, 1); + } mlx5e_build_common_cq_param(priv, param); } @@ -2708,11 +2716,49 @@ static bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev) MLX5_CAP_ETH(mdev, reg_umr_sq); } +static int mlx5e_get_pci_bw(struct mlx5_core_dev *mdev, u32 *pci_bw) +{ + enum pcie_link_width width; + enum pci_bus_speed speed; + int err = 0; + + err = pcie_get_minimum_link(mdev->pdev, &speed, &width); + if (err) + return err; + + if (speed == PCI_SPEED_UNKNOWN || width == PCIE_LNK_WIDTH_UNKNOWN) + return -EINVAL; + + switch (speed) { + case PCIE_SPEED_2_5GT: + *pci_bw = 2500 * width; + break; + case PCIE_SPEED_5_0GT: + *pci_bw = 5000 * width; + break; + case PCIE_SPEED_8_0GT: + *pci_bw = 8000 * width; + break; + default: + return -EINVAL; + } + + return 0; +} + +static bool cqe_compress_heuristic(u32 link_speed, u32 pci_bw) +{ + return (link_speed && pci_bw && + (pci_bw < 40000) && (pci_bw < link_speed)); +} + static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, struct net_device *netdev, int num_channels) { struct mlx5e_priv *priv = netdev_priv(netdev); + u32 link_speed = 0; + u32 pci_bw = 0; priv->params.log_sq_size = MLX5E_PARAMS_DEFAULT_LOG_SQ_SIZE; @@ -2720,15 +2766,42 @@ static void mlx5e_build_netdev_priv(struct mlx5_core_dev *mdev, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ : MLX5_WQ_TYPE_LINKED_LIST; + /* set CQE compression */ + priv->params.rx_cqe_compress_admin = false; + if (MLX5_CAP_GEN(mdev, cqe_compression) && + MLX5_CAP_GEN(mdev, vport_group_manager)) { + mlx5e_get_max_linkspeed(mdev, &link_speed); + mlx5e_get_pci_bw(mdev, &pci_bw); + mlx5_core_dbg(mdev, "Max link speed = %d, PCI BW = %d\n", + link_speed, pci_bw); + priv->params.rx_cqe_compress_admin = + cqe_compress_heuristic(link_speed, pci_bw); + } + + priv->params.rx_cqe_compress = priv->params.rx_cqe_compress_admin; + switch (priv->params.rq_wq_type) { case MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ: priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE_MPW; + priv->params.mpwqe_log_stride_sz = + priv->params.rx_cqe_compress ? + MLX5_MPWRQ_LOG_STRIDE_SIZE_CQE_COMPRESS : + MLX5_MPWRQ_LOG_STRIDE_SIZE; + priv->params.mpwqe_log_num_strides = MLX5_MPWRQ_LOG_WQE_SZ - + priv->params.mpwqe_log_stride_sz; priv->params.lro_en = true; break; default: /* MLX5_WQ_TYPE_LINKED_LIST */ priv->params.log_rq_size = MLX5E_PARAMS_DEFAULT_LOG_RQ_SIZE; } + mlx5_core_info(mdev, + "MLX5E: StrdRq(%d) RqSz(%ld) StrdSz(%ld) RxCqeCmprss(%d)\n", + priv->params.rq_wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ, + BIT(priv->params.log_rq_size), + BIT(priv->params.mpwqe_log_stride_sz), + priv->params.rx_cqe_compress_admin); + priv->params.min_rx_wqes = mlx5_min_rx_wqes(priv->params.rq_wq_type, BIT(priv->params.log_rq_size)); priv->params.rx_cq_moderation_usec = diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c index 23adfe2fcba9..f3456798c596 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c @@ -42,6 +42,143 @@ static inline bool mlx5e_rx_hw_stamp(struct mlx5e_tstamp *tstamp) return tstamp->hwtstamp_config.rx_filter == HWTSTAMP_FILTER_ALL; } +static inline void mlx5e_read_cqe_slot(struct mlx5e_cq *cq, u32 cqcc, + void *data) +{ + u32 ci = cqcc & cq->wq.sz_m1; + + memcpy(data, mlx5_cqwq_get_wqe(&cq->wq, ci), sizeof(struct mlx5_cqe64)); +} + +static inline void mlx5e_read_title_slot(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_read_cqe_slot(cq, cqcc, &cq->title); + cq->decmprs_left = be32_to_cpu(cq->title.byte_cnt); + cq->decmprs_wqe_counter = be16_to_cpu(cq->title.wqe_counter); + rq->stats.cqe_compress_blks++; +} + +static inline void mlx5e_read_mini_arr_slot(struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_read_cqe_slot(cq, cqcc, cq->mini_arr); + cq->mini_arr_idx = 0; +} + +static inline void mlx5e_cqes_update_owner(struct mlx5e_cq *cq, u32 cqcc, int n) +{ + u8 op_own = (cqcc >> cq->wq.log_sz) & 1; + u32 wq_sz = 1 << cq->wq.log_sz; + u32 ci = cqcc & cq->wq.sz_m1; + u32 ci_top = min_t(u32, wq_sz, ci + n); + + for (; ci < ci_top; ci++, n--) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + + cqe->op_own = op_own; + } + + if (unlikely(ci == wq_sz)) { + op_own = !op_own; + for (ci = 0; ci < n; ci++) { + struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, ci); + + cqe->op_own = op_own; + } + } +} + +static inline void mlx5e_decompress_cqe(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + u16 wqe_cnt_step; + + cq->title.byte_cnt = cq->mini_arr[cq->mini_arr_idx].byte_cnt; + cq->title.check_sum = cq->mini_arr[cq->mini_arr_idx].checksum; + cq->title.op_own &= 0xf0; + cq->title.op_own |= 0x01 & (cqcc >> cq->wq.log_sz); + cq->title.wqe_counter = cpu_to_be16(cq->decmprs_wqe_counter); + + wqe_cnt_step = + rq->wq_type == MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ ? + mpwrq_get_cqe_consumed_strides(&cq->title) : 1; + cq->decmprs_wqe_counter = + (cq->decmprs_wqe_counter + wqe_cnt_step) & rq->wq.sz_m1; +} + +static inline void mlx5e_decompress_cqe_no_hash(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, u32 cqcc) +{ + mlx5e_decompress_cqe(rq, cq, cqcc); + cq->title.rss_hash_type = 0; + cq->title.rss_hash_result = 0; +} + +static inline u32 mlx5e_decompress_cqes_cont(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, + int update_owner_only, + int budget_rem) +{ + u32 cqcc = cq->wq.cc + update_owner_only; + u32 cqe_count; + u32 i; + + cqe_count = min_t(u32, cq->decmprs_left, budget_rem); + + for (i = update_owner_only; i < cqe_count; + i++, cq->mini_arr_idx++, cqcc++) { + if (cq->mini_arr_idx == MLX5_MINI_CQE_ARRAY_SIZE) + mlx5e_read_mini_arr_slot(cq, cqcc); + + mlx5e_decompress_cqe_no_hash(rq, cq, cqcc); + rq->handle_rx_cqe(rq, &cq->title); + } + mlx5e_cqes_update_owner(cq, cq->wq.cc, cqcc - cq->wq.cc); + cq->wq.cc = cqcc; + cq->decmprs_left -= cqe_count; + rq->stats.cqe_compress_pkts += cqe_count; + + return cqe_count; +} + +static inline u32 mlx5e_decompress_cqes_start(struct mlx5e_rq *rq, + struct mlx5e_cq *cq, + int budget_rem) +{ + mlx5e_read_title_slot(rq, cq, cq->wq.cc); + mlx5e_read_mini_arr_slot(cq, cq->wq.cc + 1); + mlx5e_decompress_cqe(rq, cq, cq->wq.cc); + rq->handle_rx_cqe(rq, &cq->title); + cq->mini_arr_idx++; + + return mlx5e_decompress_cqes_cont(rq, cq, 1, budget_rem) - 1; +} + +void mlx5e_modify_rx_cqe_compression(struct mlx5e_priv *priv, bool val) +{ + bool was_opened; + + if (!MLX5_CAP_GEN(priv->mdev, cqe_compression)) + return; + + mutex_lock(&priv->state_lock); + + if (priv->params.rx_cqe_compress == val) + goto unlock; + + was_opened = test_bit(MLX5E_STATE_OPENED, &priv->state); + if (was_opened) + mlx5e_close_locked(priv->netdev); + + priv->params.rx_cqe_compress = val; + + if (was_opened) + mlx5e_open_locked(priv->netdev); + +unlock: + mutex_unlock(&priv->state_lock); +} + int mlx5e_alloc_rx_wqe(struct mlx5e_rq *rq, struct mlx5e_rx_wqe *wqe, u16 ix) { struct sk_buff *skb; @@ -75,6 +212,11 @@ err_free_skb: return -ENOMEM; } +static inline int mlx5e_mpwqe_strides_per_page(struct mlx5e_rq *rq) +{ + return rq->mpwqe_num_strides >> MLX5_MPWRQ_WQE_PAGE_ORDER; +} + static inline void mlx5e_dma_pre_sync_linear_mpwqe(struct device *pdev, struct mlx5e_mpw_info *wi, @@ -93,13 +235,13 @@ mlx5e_dma_pre_sync_fragmented_mpwqe(struct device *pdev, } static inline void -mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev, +mlx5e_add_skb_frag_linear_mpwqe(struct mlx5e_rq *rq, struct sk_buff *skb, struct mlx5e_mpw_info *wi, u32 page_idx, u32 frag_offset, u32 len) { - unsigned int truesize = ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE); + unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); wi->skbs_frags[page_idx]++; skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, @@ -108,15 +250,15 @@ mlx5e_add_skb_frag_linear_mpwqe(struct device *pdev, } static inline void -mlx5e_add_skb_frag_fragmented_mpwqe(struct device *pdev, +mlx5e_add_skb_frag_fragmented_mpwqe(struct mlx5e_rq *rq, struct sk_buff *skb, struct mlx5e_mpw_info *wi, u32 page_idx, u32 frag_offset, u32 len) { - unsigned int truesize = ALIGN(len, MLX5_MPWRQ_STRIDE_SIZE); + unsigned int truesize = ALIGN(len, rq->mpwqe_stride_sz); - dma_sync_single_for_cpu(pdev, + dma_sync_single_for_cpu(rq->pdev, wi->umr.dma_info[page_idx].addr + frag_offset, len, DMA_FROM_DEVICE); wi->skbs_frags[page_idx]++; @@ -156,7 +298,6 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, skb_copy_to_linear_data_offset(skb, 0, page_address(dma_info->page) + offset, len); -#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE) if (unlikely(offset + headlen > PAGE_SIZE)) { dma_info++; headlen_pg = len; @@ -167,7 +308,6 @@ mlx5e_copy_skb_header_fragmented_mpwqe(struct device *pdev, page_address(dma_info->page), len); } -#endif } static u16 mlx5e_get_wqe_mtt_offset(u16 rq_ix, u16 wqe_ix) @@ -293,7 +433,7 @@ static int mlx5e_alloc_rx_fragmented_mpwqe(struct mlx5e_rq *rq, for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { if (unlikely(mlx5e_alloc_and_map_page(rq, wi, i))) goto err_unmap; - atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE, + atomic_add(mlx5e_mpwqe_strides_per_page(rq), &wi->umr.dma_info[i].page->_count); wi->skbs_frags[i] = 0; } @@ -312,7 +452,7 @@ err_unmap: while (--i >= 0) { dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, PCI_DMA_FROMDEVICE); - atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE, + atomic_sub(mlx5e_mpwqe_strides_per_page(rq), &wi->umr.dma_info[i].page->_count); put_page(wi->umr.dma_info[i].page); } @@ -337,7 +477,7 @@ void mlx5e_free_rx_fragmented_mpwqe(struct mlx5e_rq *rq, for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { dma_unmap_page(rq->pdev, wi->umr.dma_info[i].addr, PAGE_SIZE, PCI_DMA_FROMDEVICE); - atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i], + atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i], &wi->umr.dma_info[i].page->_count); put_page(wi->umr.dma_info[i].page); } @@ -387,7 +527,7 @@ static int mlx5e_alloc_rx_linear_mpwqe(struct mlx5e_rq *rq, */ split_page(wi->dma_info.page, MLX5_MPWRQ_WQE_PAGE_ORDER); for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - atomic_add(MLX5_MPWRQ_STRIDES_PER_PAGE, + atomic_add(mlx5e_mpwqe_strides_per_page(rq), &wi->dma_info.page[i]._count); wi->skbs_frags[i] = 0; } @@ -411,7 +551,7 @@ void mlx5e_free_rx_linear_mpwqe(struct mlx5e_rq *rq, dma_unmap_page(rq->pdev, wi->dma_info.addr, rq->wqe_sz, PCI_DMA_FROMDEVICE); for (i = 0; i < MLX5_MPWRQ_PAGES_PER_WQE; i++) { - atomic_sub(MLX5_MPWRQ_STRIDES_PER_PAGE - wi->skbs_frags[i], + atomic_sub(mlx5e_mpwqe_strides_per_page(rq) - wi->skbs_frags[i], &wi->dma_info.page[i]._count); put_page(&wi->dma_info.page[i]); } @@ -656,9 +796,9 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, u32 cqe_bcnt, struct sk_buff *skb) { - u32 consumed_bytes = ALIGN(cqe_bcnt, MLX5_MPWRQ_STRIDE_SIZE); + u32 consumed_bytes = ALIGN(cqe_bcnt, rq->mpwqe_stride_sz); u16 stride_ix = mpwrq_get_cqe_stride_index(cqe); - u32 wqe_offset = stride_ix * MLX5_MPWRQ_STRIDE_SIZE; + u32 wqe_offset = stride_ix * rq->mpwqe_stride_sz; u32 head_offset = wqe_offset & (PAGE_SIZE - 1); u32 page_idx = wqe_offset >> PAGE_SHIFT; u32 head_page_idx = page_idx; @@ -666,19 +806,17 @@ static inline void mlx5e_mpwqe_fill_rx_skb(struct mlx5e_rq *rq, u32 frag_offset = head_offset + headlen; u16 byte_cnt = cqe_bcnt - headlen; -#if (MLX5_MPWRQ_SMALL_PACKET_THRESHOLD >= MLX5_MPWRQ_STRIDE_SIZE) if (unlikely(frag_offset >= PAGE_SIZE)) { page_idx++; frag_offset -= PAGE_SIZE; } -#endif wi->dma_pre_sync(rq->pdev, wi, wqe_offset, consumed_bytes); while (byte_cnt) { u32 pg_consumed_bytes = min_t(u32, PAGE_SIZE - frag_offset, byte_cnt); - wi->add_skb_frag(rq->pdev, skb, wi, page_idx, frag_offset, + wi->add_skb_frag(rq, skb, wi, page_idx, frag_offset, pg_consumed_bytes); byte_cnt -= pg_consumed_bytes; frag_offset = 0; @@ -728,7 +866,7 @@ void mlx5e_handle_rx_cqe_mpwrq(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe) mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb); mpwrq_cqe_out: - if (likely(wi->consumed_strides < MLX5_MPWRQ_NUM_STRIDES)) + if (likely(wi->consumed_strides < rq->mpwqe_num_strides)) return; wi->free_wqe(rq, wi); @@ -738,14 +876,24 @@ mpwrq_cqe_out: int mlx5e_poll_rx_cq(struct mlx5e_cq *cq, int budget) { struct mlx5e_rq *rq = container_of(cq, struct mlx5e_rq, cq); - int work_done; + int work_done = 0; - for (work_done = 0; work_done < budget; work_done++) { + if (cq->decmprs_left) + work_done += mlx5e_decompress_cqes_cont(rq, cq, 0, budget); + + for (; work_done < budget; work_done++) { struct mlx5_cqe64 *cqe = mlx5e_get_cqe(cq); if (!cqe) break; + if (mlx5_get_cqe_format(cqe) == MLX5_COMPRESSED) { + work_done += + mlx5e_decompress_cqes_start(rq, cq, + budget - work_done); + continue; + } + mlx5_cqwq_pop(&cq->wq); rq->handle_rx_cqe(rq, cqe); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h index 115752b53d85..83bc32b25849 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h @@ -72,6 +72,8 @@ struct mlx5e_sw_stats { u64 rx_mpwqe_filler; u64 rx_mpwqe_frag; u64 rx_buff_alloc_err; + u64 rx_cqe_compress_blks; + u64 rx_cqe_compress_pkts; /* Special handling counters */ u64 link_down_events; @@ -101,6 +103,8 @@ static const struct counter_desc sw_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_filler) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_mpwqe_frag) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, rx_cqe_compress_pkts) }, { MLX5E_DECLARE_STAT(struct mlx5e_sw_stats, link_down_events) }, }; @@ -283,6 +287,8 @@ struct mlx5e_rq_stats { u64 mpwqe_filler; u64 mpwqe_frag; u64 buff_alloc_err; + u64 cqe_compress_blks; + u64 cqe_compress_pkts; }; static const struct counter_desc rq_stats_desc[] = { @@ -297,6 +303,8 @@ static const struct counter_desc rq_stats_desc[] = { { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_filler) }, { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, mpwqe_frag) }, { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, buff_alloc_err) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_blks) }, + { MLX5E_DECLARE_STAT(struct mlx5e_rq_stats, cqe_compress_pkts) }, }; struct mlx5e_sq_stats { diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index ee0d5a937f02..035abdf62cfe 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -685,6 +685,40 @@ struct mlx5_cqe64 { u8 op_own; }; +struct mlx5_mini_cqe8 { + union { + __be32 rx_hash_result; + struct { + __be16 checksum; + __be16 rsvd; + }; + struct { + __be16 wqe_counter; + u8 s_wqe_opcode; + u8 reserved; + } s_wqe_info; + }; + __be32 byte_cnt; +}; + +enum { + MLX5_NO_INLINE_DATA, + MLX5_INLINE_DATA32_SEG, + MLX5_INLINE_DATA64_SEG, + MLX5_COMPRESSED, +}; + +enum { + MLX5_CQE_FORMAT_CSUM = 0x1, +}; + +#define MLX5_MINI_CQE_ARRAY_SIZE 8 + +static inline int mlx5_get_cqe_format(struct mlx5_cqe64 *cqe) +{ + return (cqe->op_own >> 2) & 0x3; +} + static inline int get_cqe_lro_tcppsh(struct mlx5_cqe64 *cqe) { return (cqe->lro_tcppsh_abort_dupack >> 6) & 1; |