From f1eaac37da20823816af274d69a9eed7444e9822 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Mon, 26 Oct 2020 15:23:13 +0200 Subject: RDMA/mlx5: Split mlx5_ib_update_xlt() into ODP and non-ODP cases Mixing these together is just a mess, make a dedicated version, mlx5_ib_update_mr_pas(), which directly loads the whole MTT for a non-ODP MR. The split out version can trivially use a simple loop with rdma_for_each_block() which allows using the core code to compute the MR pages and avoids seeking in the SGL list after each chunk as the __mlx5_ib_populate_pas() call required. Significantly speeds loading large MTTs. Link: https://lore.kernel.org/r/20201026132314.1336717-5-leon@kernel.org Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/infiniband/hw/mlx5/mem.c | 64 ----------------- drivers/infiniband/hw/mlx5/mlx5_ib.h | 3 - drivers/infiniband/hw/mlx5/mr.c | 132 ++++++++++++++++++++++++----------- 3 files changed, 90 insertions(+), 109 deletions(-) diff --git a/drivers/infiniband/hw/mlx5/mem.c b/drivers/infiniband/hw/mlx5/mem.c index 779c4a040d8b..92e7621ec858 100644 --- a/drivers/infiniband/hw/mlx5/mem.c +++ b/drivers/infiniband/hw/mlx5/mem.c @@ -91,70 +91,6 @@ void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, *shift = PAGE_SHIFT + m; } -/* - * Populate the given array with bus addresses from the umem. - * - * dev - mlx5_ib device - * umem - umem to use to fill the pages - * page_shift - determines the page size used in the resulting array - * offset - offset into the umem to start from, - * only implemented for ODP umems - * num_pages - total number of pages to fill - * pas - bus addresses array to fill - * access_flags - access flags to set on all present pages. - use enum mlx5_ib_mtt_access_flags for this. - */ -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags) -{ - int shift = page_shift - PAGE_SHIFT; - int mask = (1 << shift) - 1; - int i, k, idx; - u64 cur = 0; - u64 base; - int len; - struct scatterlist *sg; - int entry; - - i = 0; - for_each_sg(umem->sg_head.sgl, sg, umem->nmap, entry) { - len = sg_dma_len(sg) >> PAGE_SHIFT; - base = sg_dma_address(sg); - - /* Skip elements below offset */ - if (i + len < offset << shift) { - i += len; - continue; - } - - /* Skip pages below offset */ - if (i < offset << shift) { - k = (offset << shift) - i; - i = offset << shift; - } else { - k = 0; - } - - for (; k < len; k++) { - if (!(i & mask)) { - cur = base + (k << PAGE_SHIFT); - cur |= access_flags; - idx = (i >> shift) - offset; - - pas[idx] = cpu_to_be64(cur); - mlx5_ib_dbg(dev, "pas[%d] 0x%llx\n", - i >> shift, be64_to_cpu(pas[idx])); - } - i++; - - /* Stop after num_pages reached */ - if (i >> shift >= offset + num_pages) - return; - } - } -} - /* * Fill in a physical address list. ib_umem_num_dma_blocks() entries will be * filled in the pas array. diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h index d92afbd26aa5..aadd43425a58 100644 --- a/drivers/infiniband/hw/mlx5/mlx5_ib.h +++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h @@ -1232,9 +1232,6 @@ int mlx5_ib_query_port(struct ib_device *ibdev, u8 port, void mlx5_ib_cont_pages(struct ib_umem *umem, u64 addr, unsigned long max_page_shift, int *shift); -void __mlx5_ib_populate_pas(struct mlx5_ib_dev *dev, struct ib_umem *umem, - int page_shift, size_t offset, size_t num_pages, - __be64 *pas, int access_flags); void mlx5_ib_populate_pas(struct ib_umem *umem, size_t page_size, __be64 *pas, u64 access_flags); void mlx5_ib_copy_pas(u64 *old, u64 *new, int step, int num); diff --git a/drivers/infiniband/hw/mlx5/mr.c b/drivers/infiniband/hw/mlx5/mr.c index 0fa3899dee7e..3fa3809c2660 100644 --- a/drivers/infiniband/hw/mlx5/mr.c +++ b/drivers/infiniband/hw/mlx5/mr.c @@ -1116,6 +1116,21 @@ static void mlx5_ib_unmap_free_xlt(struct mlx5_ib_dev *dev, void *xlt, mlx5_ib_free_xlt(xlt, sg->length); } +static unsigned int xlt_wr_final_send_flags(unsigned int flags) +{ + unsigned int res = 0; + + if (flags & MLX5_IB_UPD_XLT_ENABLE) + res |= MLX5_IB_SEND_UMR_ENABLE_MR | + MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | + MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + if (flags & MLX5_IB_UPD_XLT_PD || flags & MLX5_IB_UPD_XLT_ACCESS) + res |= MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; + if (flags & MLX5_IB_UPD_XLT_ADDR) + res |= MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; + return res; +} + int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, int page_shift, int flags) { @@ -1140,6 +1155,9 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, !umr_can_use_indirect_mkey(dev)) return -EPERM; + if (WARN_ON(!mr->umem->is_odp)) + return -EINVAL; + /* UMR copies MTTs in units of MLX5_UMR_MTT_ALIGNMENT bytes, * so we need to align the offset and length accordingly */ @@ -1155,13 +1173,11 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, pages_iter = sg.length / desc_size; orig_sg_length = sg.length; - if (mr->umem->is_odp) { - if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { - struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); - size_t max_pages = ib_umem_odp_num_pages(odp) - idx; + if (!(flags & MLX5_IB_UPD_XLT_INDIRECT)) { + struct ib_umem_odp *odp = to_ib_umem_odp(mr->umem); + size_t max_pages = ib_umem_odp_num_pages(odp) - idx; - pages_to_map = min_t(size_t, pages_to_map, max_pages); - } + pages_to_map = min_t(size_t, pages_to_map, max_pages); } wr.page_shift = page_shift; @@ -1173,36 +1189,14 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, size_to_map = npages * desc_size; dma_sync_single_for_cpu(ddev, sg.addr, sg.length, DMA_TO_DEVICE); - if (mr->umem->is_odp) { - mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); - } else { - __mlx5_ib_populate_pas(dev, mr->umem, page_shift, idx, - npages, xlt, - MLX5_IB_MTT_PRESENT); - /* Clear padding after the pages - * brought from the umem. - */ - memset(xlt + size_to_map, 0, sg.length - size_to_map); - } + mlx5_odp_populate_xlt(xlt, idx, npages, mr, flags); dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); sg.length = ALIGN(size_to_map, MLX5_UMR_MTT_ALIGNMENT); - if (pages_mapped + pages_iter >= pages_to_map) { - if (flags & MLX5_IB_UPD_XLT_ENABLE) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_ENABLE_MR | - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS | - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - if (flags & MLX5_IB_UPD_XLT_PD || - flags & MLX5_IB_UPD_XLT_ACCESS) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_PD_ACCESS; - if (flags & MLX5_IB_UPD_XLT_ADDR) - wr.wr.send_flags |= - MLX5_IB_SEND_UMR_UPDATE_TRANSLATION; - } + if (pages_mapped + pages_iter >= pages_to_map) + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); wr.offset = idx * desc_size; wr.xlt_size = sg.length; @@ -1214,6 +1208,69 @@ int mlx5_ib_update_xlt(struct mlx5_ib_mr *mr, u64 idx, int npages, return err; } +/* + * Send the DMA list to the HW for a normal MR using UMR. + */ +static int mlx5_ib_update_mr_pas(struct mlx5_ib_mr *mr, unsigned int flags) +{ + struct mlx5_ib_dev *dev = mr->dev; + struct device *ddev = dev->ib_dev.dev.parent; + struct ib_block_iter biter; + struct mlx5_mtt *cur_mtt; + struct mlx5_umr_wr wr; + size_t orig_sg_length; + struct mlx5_mtt *mtt; + size_t final_size; + struct ib_sge sg; + int err = 0; + + if (WARN_ON(mr->umem->is_odp)) + return -EINVAL; + + mtt = mlx5_ib_create_xlt_wr(mr, &wr, &sg, + ib_umem_num_dma_blocks(mr->umem, + 1 << mr->page_shift), + sizeof(*mtt), flags); + if (!mtt) + return -ENOMEM; + orig_sg_length = sg.length; + + cur_mtt = mtt; + rdma_for_each_block (mr->umem->sg_head.sgl, &biter, mr->umem->nmap, + BIT(mr->page_shift)) { + if (cur_mtt == (void *)mtt + sg.length) { + dma_sync_single_for_device(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + if (err) + goto err; + dma_sync_single_for_cpu(ddev, sg.addr, sg.length, + DMA_TO_DEVICE); + wr.offset += sg.length; + cur_mtt = mtt; + } + + cur_mtt->ptag = + cpu_to_be64(rdma_block_iter_dma_address(&biter) | + MLX5_IB_MTT_PRESENT); + cur_mtt++; + } + + final_size = (void *)cur_mtt - (void *)mtt; + sg.length = ALIGN(final_size, MLX5_UMR_MTT_ALIGNMENT); + memset(cur_mtt, 0, sg.length - final_size); + wr.wr.send_flags |= xlt_wr_final_send_flags(flags); + wr.xlt_size = sg.length; + + dma_sync_single_for_device(ddev, sg.addr, sg.length, DMA_TO_DEVICE); + err = mlx5_ib_post_send_wait(dev, &wr); + +err: + sg.length = orig_sg_length; + mlx5_ib_unmap_free_xlt(dev, mtt, &sg); + return err; +} + /* * If ibmr is NULL it will be allocated by reg_create. * Else, the given ibmr will be used. @@ -1480,12 +1537,7 @@ struct ib_mr *mlx5_ib_reg_user_mr(struct ib_pd *pd, u64 start, u64 length, * configured properly but left disabled. It is safe to go ahead * and configure it again via UMR while enabling it. */ - int update_xlt_flags = MLX5_IB_UPD_XLT_ENABLE; - - err = mlx5_ib_update_xlt( - mr, 0, - ib_umem_num_dma_blocks(umem, 1UL << mr->page_shift), - mr->page_shift, update_xlt_flags); + err = mlx5_ib_update_mr_pas(mr, MLX5_IB_UPD_XLT_ENABLE); if (err) { dereg_mr(dev, mr); return ERR_PTR(err); @@ -1651,11 +1703,7 @@ int mlx5_ib_rereg_user_mr(struct ib_mr *ib_mr, int flags, u64 start, upd_flags |= MLX5_IB_UPD_XLT_PD; if (flags & IB_MR_REREG_ACCESS) upd_flags |= MLX5_IB_UPD_XLT_ACCESS; - err = mlx5_ib_update_xlt( - mr, 0, - ib_umem_num_dma_blocks(mr->umem, - 1UL << mr->page_shift), - mr->page_shift, upd_flags); + err = mlx5_ib_update_mr_pas(mr, upd_flags); } else { err = rereg_umr(pd, mr, access_flags, flags); } -- cgit v1.2.3