diff options
author | David S. Miller <davem@davemloft.net> | 2017-01-09 17:09:31 -0500 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2017-01-09 17:09:31 -0500 |
commit | bda65b4255ac983ce36a6c0ea6a7794f8e8fcc86 (patch) | |
tree | ad280b2ecc1266329416f043dd20cea515b3dc48 /drivers/net/ethernet/mellanox | |
parent | b369e7fd41f7cdbe2488cb736ef4f958bb94b5e2 (diff) | |
parent | f502d834950a28e02651bb7e2cc7111ddd352644 (diff) | |
download | linux-bda65b4255ac983ce36a6c0ea6a7794f8e8fcc86.tar.bz2 |
Merge tag 'mlx5-4kuar-for-4.11' of git://git.kernel.org/pub/scm/linux/kernel/git/mellanox/linux
Saeed Mahameed says:
====================
mlx5 4K UAR
The following series of patches optimizes the usage of the UAR area which is
contained within the BAR 0-1. Previous versions of the firmware and the driver
assumed each system page contains a single UAR. This patch set will query the
firmware for a new capability that if published, means that the firmware can
support UARs of fixed 4K regardless of system page size. In the case of
powerpc, where page size equals 64KB, this means we can utilize 16 UARs per
system page. Since user space processes by default consume eight UARs per
context this means that with this change a process will need a single system
page to fulfill that requirement and in fact make use of more UARs which is
better in terms of performance.
In addition to optimizing user-space processes, we introduce an allocator
that can be used by kernel consumers to allocate blue flame registers
(which are areas within a UAR that are used to write doorbells). This provides
further optimization on using the UAR area since the Ethernet driver makes
use of a single blue flame register per system page and now it will use two
blue flame registers per 4K.
The series also makes changes to naming conventions and now the terms used in
the driver code match the terms used in the PRM (programmers reference manual).
Thus, what used to be called UUAR (micro UAR) is now called BFREG (blue flame
register).
In order to support compatibility between different versions of
library/driver/firmware, the library has now means to notify the kernel driver
that it supports the new scheme and the kernel can notify the library if it
supports this extension. So mixed versions of libraries can run concurrently
without any issues.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'drivers/net/ethernet/mellanox')
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/cq.c | 2 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en.h | 11 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_common.c | 12 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 21 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/eq.c | 14 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/main.c | 26 | ||||
-rw-r--r-- | drivers/net/ethernet/mellanox/mlx5/core/uar.c | 351 |
7 files changed, 261 insertions, 176 deletions
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c index 32d4af9b594d..336d4738b807 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c @@ -179,6 +179,8 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq, mlx5_core_dbg(dev, "failed adding CP 0x%x to debug file system\n", cq->cqn); + cq->uar = dev->priv.uar; + return 0; err_cmd: diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 951dbd58594d..a473cea10c16 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -465,7 +465,6 @@ struct mlx5e_sq { /* read only */ struct mlx5_wq_cyc wq; u32 dma_fifo_mask; - void __iomem *uar_map; struct netdev_queue *txq; u32 sqn; u16 bf_buf_size; @@ -479,7 +478,7 @@ struct mlx5e_sq { /* control path */ struct mlx5_wq_ctrl wq_ctrl; - struct mlx5_uar uar; + struct mlx5_sq_bfreg bfreg; struct mlx5e_channel *channel; int tc; u32 rate_limit; @@ -806,7 +805,7 @@ void mlx5e_set_rx_cq_mode_params(struct mlx5e_params *params, static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, struct mlx5_wqe_ctrl_seg *ctrl, int bf_sz) { - u16 ofst = MLX5_BF_OFFSET + sq->bf_offset; + u16 ofst = sq->bf_offset; /* ensure wqe is visible to device before updating doorbell record */ dma_wmb(); @@ -818,9 +817,9 @@ static inline void mlx5e_tx_notify_hw(struct mlx5e_sq *sq, */ wmb(); if (bf_sz) - __iowrite64_copy(sq->uar_map + ofst, ctrl, bf_sz); + __iowrite64_copy(sq->bfreg.map + ofst, ctrl, bf_sz); else - mlx5_write64((__be32 *)ctrl, sq->uar_map + ofst, NULL); + mlx5_write64((__be32 *)ctrl, sq->bfreg.map + ofst, NULL); /* flush the write-combining mapped buffer */ wmb(); @@ -832,7 +831,7 @@ static inline void mlx5e_cq_arm(struct mlx5e_cq *cq) struct mlx5_core_cq *mcq; mcq = &cq->mcq; - mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, NULL, cq->wq.cc); + mlx5_cq_arm(mcq, MLX5_CQ_DB_REQ_NOT, mcq->uar->map, cq->wq.cc); } static inline u32 mlx5e_get_wqe_mtt_offset(struct mlx5e_rq *rq, u16 wqe_ix) diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c index f175518ff07a..bd898d8deda0 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c @@ -89,16 +89,10 @@ int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev) struct mlx5e_resources *res = &mdev->mlx5e_res; int err; - err = mlx5_alloc_map_uar(mdev, &res->cq_uar, false); - if (err) { - mlx5_core_err(mdev, "alloc_map uar failed, %d\n", err); - return err; - } - err = mlx5_core_alloc_pd(mdev, &res->pdn); if (err) { mlx5_core_err(mdev, "alloc pd failed, %d\n", err); - goto err_unmap_free_uar; + return err; } err = mlx5_core_alloc_transport_domain(mdev, &res->td.tdn); @@ -121,9 +115,6 @@ err_dealloc_transport_domain: mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); err_dealloc_pd: mlx5_core_dealloc_pd(mdev, res->pdn); -err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &res->cq_uar); - return err; } @@ -134,7 +125,6 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev) mlx5_core_destroy_mkey(mdev, &res->mkey); mlx5_core_dealloc_transport_domain(mdev, res->td.tdn); mlx5_core_dealloc_pd(mdev, res->pdn); - mlx5_unmap_free_uar(mdev, &res->cq_uar); } int mlx5e_refresh_tirs_self_loopback(struct mlx5_core_dev *mdev, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c index 60e5670452a1..2cc774260903 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c @@ -967,7 +967,7 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, sq->channel = c; sq->tc = tc; - err = mlx5_alloc_map_uar(mdev, &sq->uar, !!MLX5_CAP_GEN(mdev, bf)); + err = mlx5_alloc_bfreg(mdev, &sq->bfreg, MLX5_CAP_GEN(mdev, bf), false); if (err) return err; @@ -979,12 +979,9 @@ static int mlx5e_create_sq(struct mlx5e_channel *c, goto err_unmap_free_uar; sq->wq.db = &sq->wq.db[MLX5_SND_DBR]; - if (sq->uar.bf_map) { + if (sq->bfreg.wc) set_bit(MLX5E_SQ_STATE_BF_ENABLE, &sq->state); - sq->uar_map = sq->uar.bf_map; - } else { - sq->uar_map = sq->uar.map; - } + sq->bf_buf_size = (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) / 2; sq->max_inline = param->max_inline; sq->min_inline_mode = @@ -1012,7 +1009,7 @@ err_sq_wq_destroy: mlx5_wq_destroy(&sq->wq_ctrl); err_unmap_free_uar: - mlx5_unmap_free_uar(mdev, &sq->uar); + mlx5_free_bfreg(mdev, &sq->bfreg); return err; } @@ -1024,7 +1021,7 @@ static void mlx5e_destroy_sq(struct mlx5e_sq *sq) mlx5e_free_sq_db(sq); mlx5_wq_destroy(&sq->wq_ctrl); - mlx5_unmap_free_uar(priv->mdev, &sq->uar); + mlx5_free_bfreg(priv->mdev, &sq->bfreg); } static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) @@ -1058,7 +1055,7 @@ static int mlx5e_enable_sq(struct mlx5e_sq *sq, struct mlx5e_sq_param *param) MLX5_SET(sqc, sqc, tis_lst_sz, param->type == MLX5E_SQ_ICO ? 0 : 1); MLX5_SET(wq, wq, wq_type, MLX5_WQ_TYPE_CYCLIC); - MLX5_SET(wq, wq, uar_page, sq->uar.index); + MLX5_SET(wq, wq, uar_page, sq->bfreg.index); MLX5_SET(wq, wq, log_wq_pg_sz, sq->wq_ctrl.buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(wq, wq, dbr_addr, sq->wq_ctrl.db.dma); @@ -1216,7 +1213,6 @@ static int mlx5e_create_cq(struct mlx5e_channel *c, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &mdev->mlx5e_res.cq_uar; for (i = 0; i < mlx5_cqwq_get_size(&cq->wq); i++) { struct mlx5_cqe64 *cqe = mlx5_cqwq_get_wqe(&cq->wq, i); @@ -1265,7 +1261,7 @@ static int mlx5e_enable_cq(struct mlx5e_cq *cq, struct mlx5e_cq_param *param) MLX5_SET(cqc, cqc, cq_period_mode, param->cq_period_mode); MLX5_SET(cqc, cqc, c_eqn, eqn); - MLX5_SET(cqc, cqc, uar_page, mcq->uar->index); + MLX5_SET(cqc, cqc, uar_page, mdev->priv.uar->index); MLX5_SET(cqc, cqc, log_page_size, cq->wq_ctrl.frag_buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); MLX5_SET64(cqc, cqc, dbr_addr, cq->wq_ctrl.db.dma); @@ -1677,7 +1673,7 @@ static void mlx5e_build_common_cq_param(struct mlx5e_priv *priv, { void *cqc = param->cqc; - MLX5_SET(cqc, cqc, uar_page, priv->mdev->mlx5e_res.cq_uar.index); + MLX5_SET(cqc, cqc, uar_page, priv->mdev->priv.uar->index); } static void mlx5e_build_rx_cq_param(struct mlx5e_priv *priv, @@ -2296,7 +2292,6 @@ static int mlx5e_create_drop_cq(struct mlx5e_priv *priv, mcq->comp = mlx5e_completion_event; mcq->event = mlx5e_cq_error_event; mcq->irqn = irqn; - mcq->uar = &mdev->mlx5e_res.cq_uar; cq->priv = priv; diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c index 4aff8ac68e14..5130d65dd41a 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c @@ -512,7 +512,7 @@ static void init_eq_buf(struct mlx5_eq *eq) int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, - struct mlx5_uar *uar, enum mlx5_eq_type type) + enum mlx5_eq_type type) { u32 out[MLX5_ST_SZ_DW(create_eq_out)] = {0}; struct mlx5_priv *priv = &dev->priv; @@ -556,7 +556,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eqc = MLX5_ADDR_OF(create_eq_in, in, eq_context_entry); MLX5_SET(eqc, eqc, log_eq_size, ilog2(eq->nent)); - MLX5_SET(eqc, eqc, uar_page, uar->index); + MLX5_SET(eqc, eqc, uar_page, priv->uar->index); MLX5_SET(eqc, eqc, intr, vecidx); MLX5_SET(eqc, eqc, log_page_size, eq->buf.page_shift - MLX5_ADAPTER_PAGE_SHIFT); @@ -571,7 +571,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, eq->eqn = MLX5_GET(create_eq_out, out, eq_number); eq->irqn = priv->msix_arr[vecidx].vector; eq->dev = dev; - eq->doorbell = uar->map + MLX5_EQ_DOORBEL_OFFSET; + eq->doorbell = priv->uar->map + MLX5_EQ_DOORBEL_OFFSET; err = request_irq(eq->irqn, handler, 0, priv->irq_info[vecidx].name, eq); if (err) @@ -686,8 +686,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) err = mlx5_create_map_eq(dev, &table->cmd_eq, MLX5_EQ_VEC_CMD, MLX5_NUM_CMD_EQE, 1ull << MLX5_EVENT_TYPE_CMD, - "mlx5_cmd_eq", &dev->priv.uuari.uars[0], - MLX5_EQ_TYPE_ASYNC); + "mlx5_cmd_eq", MLX5_EQ_TYPE_ASYNC); if (err) { mlx5_core_warn(dev, "failed to create cmd EQ %d\n", err); return err; @@ -697,8 +696,7 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) err = mlx5_create_map_eq(dev, &table->async_eq, MLX5_EQ_VEC_ASYNC, MLX5_NUM_ASYNC_EQE, async_event_mask, - "mlx5_async_eq", &dev->priv.uuari.uars[0], - MLX5_EQ_TYPE_ASYNC); + "mlx5_async_eq", MLX5_EQ_TYPE_ASYNC); if (err) { mlx5_core_warn(dev, "failed to create async EQ %d\n", err); goto err1; @@ -708,7 +706,6 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) MLX5_EQ_VEC_PAGES, /* TODO: sriov max_vf + */ 1, 1 << MLX5_EVENT_TYPE_PAGE_REQUEST, "mlx5_pages_eq", - &dev->priv.uuari.uars[0], MLX5_EQ_TYPE_ASYNC); if (err) { mlx5_core_warn(dev, "failed to create pages EQ %d\n", err); @@ -722,7 +719,6 @@ int mlx5_start_eqs(struct mlx5_core_dev *dev) MLX5_NUM_ASYNC_EQE, 1 << MLX5_EVENT_TYPE_PAGE_FAULT, "mlx5_page_fault_eq", - &dev->priv.uuari.uars[0], MLX5_EQ_TYPE_PF); if (err) { mlx5_core_warn(dev, "failed to create page fault EQ %d\n", diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c index f1ed009b871c..cb7708b45bb1 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/main.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c @@ -537,6 +537,10 @@ static int handle_hca_cap(struct mlx5_core_dev *dev) /* disable cmdif checksum */ MLX5_SET(cmd_hca_cap, set_hca_cap, cmdif_checksum, 0); + /* If the HCA supports 4K UARs use it */ + if (MLX5_CAP_GEN_MAX(dev, uar_4k)) + MLX5_SET(cmd_hca_cap, set_hca_cap, uar_4k, 1); + MLX5_SET(cmd_hca_cap, set_hca_cap, log_uar_page_sz, PAGE_SHIFT - 12); err = set_caps(dev, set_ctx, set_sz, @@ -759,8 +763,7 @@ static int alloc_comp_eqs(struct mlx5_core_dev *dev) snprintf(name, MLX5_MAX_IRQ_NAME, "mlx5_comp%d", i); err = mlx5_create_map_eq(dev, eq, i + MLX5_EQ_VEC_COMP_BASE, nent, 0, - name, &dev->priv.uuari.uars[0], - MLX5_EQ_TYPE_COMP); + name, MLX5_EQ_TYPE_COMP); if (err) { kfree(eq); goto clean; @@ -920,8 +923,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev, struct mlx5_priv *priv) goto out; } - MLX5_INIT_DOORBELL_LOCK(&priv->cq_uar_lock); - err = mlx5_init_cq_table(dev); if (err) { dev_err(&pdev->dev, "failed to initialize cq table\n"); @@ -1100,8 +1101,8 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, goto err_cleanup_once; } - err = mlx5_alloc_uuars(dev, &priv->uuari); - if (err) { + dev->priv.uar = mlx5_get_uars_page(dev); + if (!dev->priv.uar) { dev_err(&pdev->dev, "Failed allocating uar, aborting\n"); goto err_disable_msix; } @@ -1109,7 +1110,7 @@ static int mlx5_load_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, err = mlx5_start_eqs(dev); if (err) { dev_err(&pdev->dev, "Failed to start pages and async EQs\n"); - goto err_free_uar; + goto err_put_uars; } err = alloc_comp_eqs(dev); @@ -1175,8 +1176,8 @@ err_affinity_hints: err_stop_eqs: mlx5_stop_eqs(dev); -err_free_uar: - mlx5_free_uuars(dev, &priv->uuari); +err_put_uars: + mlx5_put_uars_page(dev, priv->uar); err_disable_msix: mlx5_disable_msix(dev); @@ -1238,7 +1239,7 @@ static int mlx5_unload_one(struct mlx5_core_dev *dev, struct mlx5_priv *priv, mlx5_irq_clear_affinity_hints(dev); free_comp_eqs(dev); mlx5_stop_eqs(dev); - mlx5_free_uuars(dev, &priv->uuari); + mlx5_put_uars_page(dev, priv->uar); mlx5_disable_msix(dev); if (cleanup) mlx5_cleanup_once(dev); @@ -1313,6 +1314,11 @@ static int init_one(struct pci_dev *pdev, goto clean_dev; } #endif + mutex_init(&priv->bfregs.reg_head.lock); + mutex_init(&priv->bfregs.wc_head.lock); + INIT_LIST_HEAD(&priv->bfregs.reg_head.list); + INIT_LIST_HEAD(&priv->bfregs.wc_head.list); + err = mlx5_pci_init(dev, priv); if (err) { dev_err(&pdev->dev, "mlx5_pci_init failed with error code %d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c index ab0b896621a0..2e6b0f290ddc 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c @@ -37,11 +37,6 @@ #include <linux/mlx5/cmd.h> #include "mlx5_core.h" -enum { - NUM_DRIVER_UARS = 4, - NUM_LOW_LAT_UUARS = 4, -}; - int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn) { u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0}; @@ -67,167 +62,269 @@ int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn) } EXPORT_SYMBOL(mlx5_cmd_free_uar); -static int need_uuar_lock(int uuarn) +static int uars_per_sys_page(struct mlx5_core_dev *mdev) { - int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; - - if (uuarn == 0 || tot_uuars - NUM_LOW_LAT_UUARS) - return 0; + if (MLX5_CAP_GEN(mdev, uar_4k)) + return MLX5_CAP_GEN(mdev, num_of_uars_per_page); return 1; } -int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +static u64 uar2pfn(struct mlx5_core_dev *mdev, u32 index) { - int tot_uuars = NUM_DRIVER_UARS * MLX5_BF_REGS_PER_PAGE; - struct mlx5_bf *bf; - phys_addr_t addr; - int err; + u32 system_page_index; + + if (MLX5_CAP_GEN(mdev, uar_4k)) + system_page_index = index >> (PAGE_SHIFT - MLX5_ADAPTER_PAGE_SHIFT); + else + system_page_index = index; + + return (pci_resource_start(mdev->pdev, 0) >> PAGE_SHIFT) + system_page_index; +} + +static void up_rel_func(struct kref *kref) +{ + struct mlx5_uars_page *up = container_of(kref, struct mlx5_uars_page, ref_count); + + list_del(&up->list); + if (mlx5_cmd_free_uar(up->mdev, up->index)) + mlx5_core_warn(up->mdev, "failed to free uar index %d\n", up->index); + kfree(up->reg_bitmap); + kfree(up->fp_bitmap); + kfree(up); +} + +static struct mlx5_uars_page *alloc_uars_page(struct mlx5_core_dev *mdev, + bool map_wc) +{ + struct mlx5_uars_page *up; + int err = -ENOMEM; + phys_addr_t pfn; + int bfregs; int i; - uuari->num_uars = NUM_DRIVER_UARS; - uuari->num_low_latency_uuars = NUM_LOW_LAT_UUARS; + bfregs = uars_per_sys_page(mdev) * MLX5_BFREGS_PER_UAR; + up = kzalloc(sizeof(*up), GFP_KERNEL); + if (!up) + return ERR_PTR(err); - mutex_init(&uuari->lock); - uuari->uars = kcalloc(uuari->num_uars, sizeof(*uuari->uars), GFP_KERNEL); - if (!uuari->uars) - return -ENOMEM; + up->mdev = mdev; + up->reg_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + if (!up->reg_bitmap) + goto error1; - uuari->bfs = kcalloc(tot_uuars, sizeof(*uuari->bfs), GFP_KERNEL); - if (!uuari->bfs) { - err = -ENOMEM; - goto out_uars; - } + up->fp_bitmap = kcalloc(BITS_TO_LONGS(bfregs), sizeof(unsigned long), GFP_KERNEL); + if (!up->fp_bitmap) + goto error1; - uuari->bitmap = kcalloc(BITS_TO_LONGS(tot_uuars), sizeof(*uuari->bitmap), - GFP_KERNEL); - if (!uuari->bitmap) { - err = -ENOMEM; - goto out_bfs; - } + for (i = 0; i < bfregs; i++) + if ((i % MLX5_BFREGS_PER_UAR) < MLX5_NON_FP_BFREGS_PER_UAR) + set_bit(i, up->reg_bitmap); + else + set_bit(i, up->fp_bitmap); - uuari->count = kcalloc(tot_uuars, sizeof(*uuari->count), GFP_KERNEL); - if (!uuari->count) { - err = -ENOMEM; - goto out_bitmap; - } + up->bfregs = bfregs; + up->fp_avail = bfregs * MLX5_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; + up->reg_avail = bfregs * MLX5_NON_FP_BFREGS_PER_UAR / MLX5_BFREGS_PER_UAR; - for (i = 0; i < uuari->num_uars; i++) { - err = mlx5_cmd_alloc_uar(dev, &uuari->uars[i].index); - if (err) - goto out_count; + err = mlx5_cmd_alloc_uar(mdev, &up->index); + if (err) { + mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); + goto error1; + } - addr = dev->iseg_base + ((phys_addr_t)(uuari->uars[i].index) << PAGE_SHIFT); - uuari->uars[i].map = ioremap(addr, PAGE_SIZE); - if (!uuari->uars[i].map) { - mlx5_cmd_free_uar(dev, uuari->uars[i].index); + pfn = uar2pfn(mdev, up->index); + if (map_wc) { + up->map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { + err = -EAGAIN; + goto error2; + } + } else { + up->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); + if (!up->map) { err = -ENOMEM; - goto out_count; + goto error2; } - mlx5_core_dbg(dev, "allocated uar index 0x%x, mmaped at %p\n", - uuari->uars[i].index, uuari->uars[i].map); - } - - for (i = 0; i < tot_uuars; i++) { - bf = &uuari->bfs[i]; - - bf->buf_size = (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) / 2; - bf->uar = &uuari->uars[i / MLX5_BF_REGS_PER_PAGE]; - bf->regreg = uuari->uars[i / MLX5_BF_REGS_PER_PAGE].map; - bf->reg = NULL; /* Add WC support */ - bf->offset = (i % MLX5_BF_REGS_PER_PAGE) * - (1 << MLX5_CAP_GEN(dev, log_bf_reg_size)) + - MLX5_BF_OFFSET; - bf->need_lock = need_uuar_lock(i); - spin_lock_init(&bf->lock); - spin_lock_init(&bf->lock32); - bf->uuarn = i; } + kref_init(&up->ref_count); + mlx5_core_dbg(mdev, "allocated UAR page: index %d, total bfregs %d\n", + up->index, up->bfregs); + return up; + +error2: + if (mlx5_cmd_free_uar(mdev, up->index)) + mlx5_core_warn(mdev, "failed to free uar index %d\n", up->index); +error1: + kfree(up->fp_bitmap); + kfree(up->reg_bitmap); + kfree(up); + return ERR_PTR(err); +} - return 0; - -out_count: - for (i--; i >= 0; i--) { - iounmap(uuari->uars[i].map); - mlx5_cmd_free_uar(dev, uuari->uars[i].index); +struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev) +{ + struct mlx5_uars_page *ret; + + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + if (list_empty(&mdev->priv.bfregs.reg_head.list)) { + ret = alloc_uars_page(mdev, false); + if (IS_ERR(ret)) { + ret = NULL; + goto out; + } + list_add(&ret->list, &mdev->priv.bfregs.reg_head.list); + } else { + ret = list_first_entry(&mdev->priv.bfregs.reg_head.list, + struct mlx5_uars_page, list); + kref_get(&ret->ref_count); } - kfree(uuari->count); +out: + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); -out_bitmap: - kfree(uuari->bitmap); - -out_bfs: - kfree(uuari->bfs); + return ret; +} +EXPORT_SYMBOL(mlx5_get_uars_page); -out_uars: - kfree(uuari->uars); - return err; +void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up) +{ + mutex_lock(&mdev->priv.bfregs.reg_head.lock); + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(&mdev->priv.bfregs.reg_head.lock); } +EXPORT_SYMBOL(mlx5_put_uars_page); -int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari) +static unsigned long map_offset(struct mlx5_core_dev *mdev, int dbi) { - int i = uuari->num_uars; + /* return the offset in bytes from the start of the page to the + * blue flame area of the UAR + */ + return dbi / MLX5_BFREGS_PER_UAR * MLX5_ADAPTER_PAGE_SIZE + + (dbi % MLX5_BFREGS_PER_UAR) * + (1 << MLX5_CAP_GEN(mdev, log_bf_reg_size)) + MLX5_BF_OFFSET; +} - for (i--; i >= 0; i--) { - iounmap(uuari->uars[i].map); - mlx5_cmd_free_uar(dev, uuari->uars[i].index); +static int alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) +{ + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct list_head *head; + unsigned long *bitmap; + unsigned int *avail; + struct mutex *lock; /* pointer to right mutex */ + int dbi; + + bfregs = &mdev->priv.bfregs; + if (map_wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; } - - kfree(uuari->count); - kfree(uuari->bitmap); - kfree(uuari->bfs); - kfree(uuari->uars); + mutex_lock(lock); + if (list_empty(head)) { + up = alloc_uars_page(mdev, map_wc); + if (IS_ERR(up)) { + mutex_unlock(lock); + return PTR_ERR(up); + } + list_add(&up->list, head); + } else { + up = list_entry(head->next, struct mlx5_uars_page, list); + kref_get(&up->ref_count); + } + if (fast_path) { + bitmap = up->fp_bitmap; + avail = &up->fp_avail; + } else { + bitmap = up->reg_bitmap; + avail = &up->reg_avail; + } + dbi = find_first_bit(bitmap, up->bfregs); + clear_bit(dbi, bitmap); + (*avail)--; + if (!(*avail)) + list_del(&up->list); + + bfreg->map = up->map + map_offset(mdev, dbi); + bfreg->up = up; + bfreg->wc = map_wc; + bfreg->index = up->index + dbi / MLX5_BFREGS_PER_UAR; + mutex_unlock(lock); return 0; } -int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar, - bool map_wc) +int mlx5_alloc_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg, + bool map_wc, bool fast_path) { - phys_addr_t pfn; - phys_addr_t uar_bar_start; int err; - err = mlx5_cmd_alloc_uar(mdev, &uar->index); - if (err) { - mlx5_core_warn(mdev, "mlx5_cmd_alloc_uar() failed, %d\n", err); - return err; - } + err = alloc_bfreg(mdev, bfreg, map_wc, fast_path); + if (!err) + return 0; - uar_bar_start = pci_resource_start(mdev->pdev, 0); - pfn = (uar_bar_start >> PAGE_SHIFT) + uar->index; + if (err == -EAGAIN && map_wc) + return alloc_bfreg(mdev, bfreg, false, fast_path); - if (map_wc) { - uar->bf_map = ioremap_wc(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->bf_map) { - mlx5_core_warn(mdev, "ioremap_wc() failed\n"); - uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->map) - goto err_free_uar; - } - } else { - uar->map = ioremap(pfn << PAGE_SHIFT, PAGE_SIZE); - if (!uar->map) - goto err_free_uar; - } + return err; +} +EXPORT_SYMBOL(mlx5_alloc_bfreg); - return 0; +static unsigned int addr_to_dbi_in_syspage(struct mlx5_core_dev *dev, + struct mlx5_uars_page *up, + struct mlx5_sq_bfreg *bfreg) +{ + unsigned int uar_idx; + unsigned int bfreg_idx; + unsigned int bf_reg_size; -err_free_uar: - mlx5_core_warn(mdev, "ioremap() failed\n"); - err = -ENOMEM; - mlx5_cmd_free_uar(mdev, uar->index); + bf_reg_size = 1 << MLX5_CAP_GEN(dev, log_bf_reg_size); - return err; + uar_idx = (bfreg->map - up->map) >> MLX5_ADAPTER_PAGE_SHIFT; + bfreg_idx = (((uintptr_t)bfreg->map % MLX5_ADAPTER_PAGE_SIZE) - MLX5_BF_OFFSET) / bf_reg_size; + + return uar_idx * MLX5_BFREGS_PER_UAR + bfreg_idx; } -EXPORT_SYMBOL(mlx5_alloc_map_uar); -void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar) +void mlx5_free_bfreg(struct mlx5_core_dev *mdev, struct mlx5_sq_bfreg *bfreg) { - if (uar->map) - iounmap(uar->map); - else - iounmap(uar->bf_map); - mlx5_cmd_free_uar(mdev, uar->index); + struct mlx5_bfreg_data *bfregs; + struct mlx5_uars_page *up; + struct mutex *lock; /* pointer to right mutex */ + unsigned int dbi; + bool fp; + unsigned int *avail; + unsigned long *bitmap; + struct list_head *head; + + bfregs = &mdev->priv.bfregs; + if (bfreg->wc) { + head = &bfregs->wc_head.list; + lock = &bfregs->wc_head.lock; + } else { + head = &bfregs->reg_head.list; + lock = &bfregs->reg_head.lock; + } + up = bfreg->up; + dbi = addr_to_dbi_in_syspage(mdev, up, bfreg); + fp = (dbi % MLX5_BFREGS_PER_UAR) >= MLX5_NON_FP_BFREGS_PER_UAR; + if (fp) { + avail = &up->fp_avail; + bitmap = up->fp_bitmap; + } else { + avail = &up->reg_avail; + bitmap = up->reg_bitmap; + } + mutex_lock(lock); + (*avail)++; + set_bit(dbi, bitmap); + if (*avail == 1) + list_add_tail(&up->list, head); + + kref_put(&up->ref_count, up_rel_func); + mutex_unlock(lock); } -EXPORT_SYMBOL(mlx5_unmap_free_uar); +EXPORT_SYMBOL(mlx5_free_bfreg); |