summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2019-11-30 09:38:11 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2019-11-30 09:38:11 -0800
commitd5bb349dbbe27537e90a03b9597deeb07723a86d (patch)
treeb5ad5b6996bf1fb51c84fb252f2ddf9316ee7d81
parent81b6b96475ac7a4ebfceae9f16fb3758327adbfe (diff)
parent0a6cad5df541108cfd3fbd79eef48eb824c89bdc (diff)
downloadlinux-d5bb349dbbe27537e90a03b9597deeb07723a86d.tar.bz2
Merge tag 'drm-vmwgfx-coherent-2019-11-29' of git://anongit.freedesktop.org/drm/drm
Pull drm coherent memory support for vmwgfx from Dave Airlie: "This is a separate pull for the mm pagewalking + drm/vmwgfx work Thomas did and you were involved in, I've left it separate in case you don't feel as comfortable with it as the other stuff. It has mm acks/r-b in the right places from what I can see" * tag 'drm-vmwgfx-coherent-2019-11-29' of git://anongit.freedesktop.org/drm/drm: drm/vmwgfx: Add surface dirty-tracking callbacks drm/vmwgfx: Implement an infrastructure for read-coherent resources drm/vmwgfx: Use an RBtree instead of linked list for MOB resources drm/vmwgfx: Implement an infrastructure for write-coherent resources mm: Add write-protect and clean utilities for address space ranges mm: Add a walk_page_mapping() function to the pagewalk code mm: pagewalk: Take the pagetable lock in walk_pte_range() mm: Remove BUG_ON mmap_sem not held from xxx_trans_huge_lock() drm/ttm: Convert vm callbacks to helpers drm/ttm: Remove explicit typecasts of vm_private_data
-rw-r--r--drivers/gpu/drm/ttm/ttm_bo_vm.c174
-rw-r--r--drivers/gpu/drm/vmwgfx/Kconfig1
-rw-r--r--drivers/gpu/drm/vmwgfx/Makefile2
-rw-r--r--drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h233
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_bo.c10
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_drv.h44
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c1
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c488
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource.c193
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h13
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_surface.c395
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c15
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.c74
-rw-r--r--drivers/gpu/drm/vmwgfx/vmwgfx_validation.h16
-rw-r--r--include/drm/ttm/ttm_bo_api.h14
-rw-r--r--include/linux/huge_mm.h2
-rw-r--r--include/linux/mm.h13
-rw-r--r--include/linux/pagewalk.h9
-rw-r--r--include/uapi/drm/vmwgfx_drm.h4
-rw-r--r--mm/Kconfig3
-rw-r--r--mm/Makefile1
-rw-r--r--mm/mapping_dirty_helpers.c315
-rw-r--r--mm/pagewalk.c99
23 files changed, 1996 insertions, 123 deletions
diff --git a/drivers/gpu/drm/ttm/ttm_bo_vm.c b/drivers/gpu/drm/ttm/ttm_bo_vm.c
index 4b34a278d65b..11863fbdd5d6 100644
--- a/drivers/gpu/drm/ttm/ttm_bo_vm.c
+++ b/drivers/gpu/drm/ttm/ttm_bo_vm.c
@@ -42,8 +42,6 @@
#include <linux/uaccess.h>
#include <linux/mem_encrypt.h>
-#define TTM_BO_VM_NUM_PREFAULT 16
-
static vm_fault_t ttm_bo_vm_fault_idle(struct ttm_buffer_object *bo,
struct vm_fault *vmf)
{
@@ -106,25 +104,30 @@ static unsigned long ttm_bo_io_mem_pfn(struct ttm_buffer_object *bo,
+ page_offset;
}
-static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+/**
+ * ttm_bo_vm_reserve - Reserve a buffer object in a retryable vm callback
+ * @bo: The buffer object
+ * @vmf: The fault structure handed to the callback
+ *
+ * vm callbacks like fault() and *_mkwrite() allow for the mm_sem to be dropped
+ * during long waits, and after the wait the callback will be restarted. This
+ * is to allow other threads using the same virtual memory space concurrent
+ * access to map(), unmap() completely unrelated buffer objects. TTM buffer
+ * object reservations sometimes wait for GPU and should therefore be
+ * considered long waits. This function reserves the buffer object interruptibly
+ * taking this into account. Starvation is avoided by the vm system not
+ * allowing too many repeated restarts.
+ * This function is intended to be used in customized fault() and _mkwrite()
+ * handlers.
+ *
+ * Return:
+ * 0 on success and the bo was reserved.
+ * VM_FAULT_RETRY if blocking wait.
+ * VM_FAULT_NOPAGE if blocking wait and retrying was not allowed.
+ */
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+ struct vm_fault *vmf)
{
- struct vm_area_struct *vma = vmf->vma;
- struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
- vma->vm_private_data;
- struct ttm_bo_device *bdev = bo->bdev;
- unsigned long page_offset;
- unsigned long page_last;
- unsigned long pfn;
- struct ttm_tt *ttm = NULL;
- struct page *page;
- int err;
- int i;
- vm_fault_t ret = VM_FAULT_NOPAGE;
- unsigned long address = vmf->address;
- struct ttm_mem_type_manager *man =
- &bdev->man[bo->mem.mem_type];
- struct vm_area_struct cvma;
-
/*
* Work around locking order reversal in fault / nopfn
* between mmap_sem and bo_reserve: Perform a trylock operation
@@ -151,14 +154,54 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
return VM_FAULT_NOPAGE;
}
+ return 0;
+}
+EXPORT_SYMBOL(ttm_bo_vm_reserve);
+
+/**
+ * ttm_bo_vm_fault_reserved - TTM fault helper
+ * @vmf: The struct vm_fault given as argument to the fault callback
+ * @prot: The page protection to be used for this memory area.
+ * @num_prefault: Maximum number of prefault pages. The caller may want to
+ * specify this based on madvice settings and the size of the GPU object
+ * backed by the memory.
+ *
+ * This function inserts one or more page table entries pointing to the
+ * memory backing the buffer object, and then returns a return code
+ * instructing the caller to retry the page access.
+ *
+ * Return:
+ * VM_FAULT_NOPAGE on success or pending signal
+ * VM_FAULT_SIGBUS on unspecified error
+ * VM_FAULT_OOM on out-of-memory
+ * VM_FAULT_RETRY if retryable wait
+ */
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+ pgprot_t prot,
+ pgoff_t num_prefault)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct vm_area_struct cvma = *vma;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+ struct ttm_bo_device *bdev = bo->bdev;
+ unsigned long page_offset;
+ unsigned long page_last;
+ unsigned long pfn;
+ struct ttm_tt *ttm = NULL;
+ struct page *page;
+ int err;
+ pgoff_t i;
+ vm_fault_t ret = VM_FAULT_NOPAGE;
+ unsigned long address = vmf->address;
+ struct ttm_mem_type_manager *man =
+ &bdev->man[bo->mem.mem_type];
+
/*
* Refuse to fault imported pages. This should be handled
* (if at all) by redirecting mmap to the exporter.
*/
- if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG)) {
- ret = VM_FAULT_SIGBUS;
- goto out_unlock;
- }
+ if (bo->ttm && (bo->ttm->page_flags & TTM_PAGE_FLAG_SG))
+ return VM_FAULT_SIGBUS;
if (bdev->driver->fault_reserve_notify) {
struct dma_fence *moving = dma_fence_get(bo->moving);
@@ -169,11 +212,9 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
break;
case -EBUSY:
case -ERESTARTSYS:
- ret = VM_FAULT_NOPAGE;
- goto out_unlock;
+ return VM_FAULT_NOPAGE;
default:
- ret = VM_FAULT_SIGBUS;
- goto out_unlock;
+ return VM_FAULT_SIGBUS;
}
if (bo->moving != moving) {
@@ -189,21 +230,12 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
* move.
*/
ret = ttm_bo_vm_fault_idle(bo, vmf);
- if (unlikely(ret != 0)) {
- if (ret == VM_FAULT_RETRY &&
- !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT)) {
- /* The BO has already been unreserved. */
- return ret;
- }
-
- goto out_unlock;
- }
+ if (unlikely(ret != 0))
+ return ret;
err = ttm_mem_io_lock(man, true);
- if (unlikely(err != 0)) {
- ret = VM_FAULT_NOPAGE;
- goto out_unlock;
- }
+ if (unlikely(err != 0))
+ return VM_FAULT_NOPAGE;
err = ttm_mem_io_reserve_vm(bo);
if (unlikely(err != 0)) {
ret = VM_FAULT_SIGBUS;
@@ -220,18 +252,8 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
goto out_io_unlock;
}
- /*
- * Make a local vma copy to modify the page_prot member
- * and vm_flags if necessary. The vma parameter is protected
- * by mmap_sem in write mode.
- */
- cvma = *vma;
- cvma.vm_page_prot = vm_get_page_prot(cvma.vm_flags);
-
- if (bo->mem.bus.is_iomem) {
- cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
- cvma.vm_page_prot);
- } else {
+ cvma.vm_page_prot = ttm_io_prot(bo->mem.placement, prot);
+ if (!bo->mem.bus.is_iomem) {
struct ttm_operation_ctx ctx = {
.interruptible = false,
.no_wait_gpu = false,
@@ -240,24 +262,21 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
};
ttm = bo->ttm;
- cvma.vm_page_prot = ttm_io_prot(bo->mem.placement,
- cvma.vm_page_prot);
-
- /* Allocate all page at once, most common usage */
- if (ttm_tt_populate(ttm, &ctx)) {
+ if (ttm_tt_populate(bo->ttm, &ctx)) {
ret = VM_FAULT_OOM;
goto out_io_unlock;
}
+ } else {
+ /* Iomem should not be marked encrypted */
+ cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
}
/*
* Speculatively prefault a number of pages. Only error on
* first page.
*/
- for (i = 0; i < TTM_BO_VM_NUM_PREFAULT; ++i) {
+ for (i = 0; i < num_prefault; ++i) {
if (bo->mem.bus.is_iomem) {
- /* Iomem should not be marked encrypted */
- cvma.vm_page_prot = pgprot_decrypted(cvma.vm_page_prot);
pfn = ttm_bo_io_mem_pfn(bo, page_offset);
} else {
page = ttm->pages[page_offset];
@@ -293,28 +312,49 @@ static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
ret = VM_FAULT_NOPAGE;
out_io_unlock:
ttm_mem_io_unlock(man);
-out_unlock:
+ return ret;
+}
+EXPORT_SYMBOL(ttm_bo_vm_fault_reserved);
+
+static vm_fault_t ttm_bo_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ pgprot_t prot;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
+ vm_fault_t ret;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ prot = vm_get_page_prot(vma->vm_flags);
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, TTM_BO_VM_NUM_PREFAULT);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
dma_resv_unlock(bo->base.resv);
+
return ret;
}
-static void ttm_bo_vm_open(struct vm_area_struct *vma)
+void ttm_bo_vm_open(struct vm_area_struct *vma)
{
- struct ttm_buffer_object *bo =
- (struct ttm_buffer_object *)vma->vm_private_data;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
WARN_ON(bo->bdev->dev_mapping != vma->vm_file->f_mapping);
ttm_bo_get(bo);
}
+EXPORT_SYMBOL(ttm_bo_vm_open);
-static void ttm_bo_vm_close(struct vm_area_struct *vma)
+void ttm_bo_vm_close(struct vm_area_struct *vma)
{
- struct ttm_buffer_object *bo = (struct ttm_buffer_object *)vma->vm_private_data;
+ struct ttm_buffer_object *bo = vma->vm_private_data;
ttm_bo_put(bo);
vma->vm_private_data = NULL;
}
+EXPORT_SYMBOL(ttm_bo_vm_close);
static int ttm_bo_vm_access_kmap(struct ttm_buffer_object *bo,
unsigned long offset,
diff --git a/drivers/gpu/drm/vmwgfx/Kconfig b/drivers/gpu/drm/vmwgfx/Kconfig
index 6b28a326f8bb..15acdf2a7c0f 100644
--- a/drivers/gpu/drm/vmwgfx/Kconfig
+++ b/drivers/gpu/drm/vmwgfx/Kconfig
@@ -8,6 +8,7 @@ config DRM_VMWGFX
select FB_CFB_IMAGEBLIT
select DRM_TTM
select FB
+ select MAPPING_DIRTY_HELPERS
# Only needed for the transitional use of drm_crtc_init - can be removed
# again once vmwgfx sets up the primary plane itself.
select DRM_KMS_HELPER
diff --git a/drivers/gpu/drm/vmwgfx/Makefile b/drivers/gpu/drm/vmwgfx/Makefile
index 8841bd30e1e5..c877a21a0739 100644
--- a/drivers/gpu/drm/vmwgfx/Makefile
+++ b/drivers/gpu/drm/vmwgfx/Makefile
@@ -8,7 +8,7 @@ vmwgfx-y := vmwgfx_execbuf.o vmwgfx_gmr.o vmwgfx_kms.o vmwgfx_drv.o \
vmwgfx_cmdbuf_res.o vmwgfx_cmdbuf.o vmwgfx_stdu.o \
vmwgfx_cotable.o vmwgfx_so.o vmwgfx_binding.o vmwgfx_msg.o \
vmwgfx_simple_resource.o vmwgfx_va.o vmwgfx_blit.o \
- vmwgfx_validation.o \
+ vmwgfx_validation.o vmwgfx_page_dirty.o \
ttm_object.o ttm_lock.o
obj-$(CONFIG_DRM_VMWGFX) := vmwgfx.o
diff --git a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
index f2bfd3d80598..61414f105c67 100644
--- a/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
+++ b/drivers/gpu/drm/vmwgfx/device_include/svga3d_surfacedefs.h
@@ -1280,7 +1280,6 @@ svga3dsurface_get_pixel_offset(SVGA3dSurfaceFormat format,
return offset;
}
-
static inline u32
svga3dsurface_get_image_offset(SVGA3dSurfaceFormat format,
surf_size_struct baseLevelSize,
@@ -1375,4 +1374,236 @@ svga3dsurface_is_screen_target_format(SVGA3dSurfaceFormat format)
return svga3dsurface_is_dx_screen_target_format(format);
}
+/**
+ * struct svga3dsurface_mip - Mimpmap level information
+ * @bytes: Bytes required in the backing store of this mipmap level.
+ * @img_stride: Byte stride per image.
+ * @row_stride: Byte stride per block row.
+ * @size: The size of the mipmap.
+ */
+struct svga3dsurface_mip {
+ size_t bytes;
+ size_t img_stride;
+ size_t row_stride;
+ struct drm_vmw_size size;
+
+};
+
+/**
+ * struct svga3dsurface_cache - Cached surface information
+ * @desc: Pointer to the surface descriptor
+ * @mip: Array of mipmap level information. Valid size is @num_mip_levels.
+ * @mip_chain_bytes: Bytes required in the backing store for the whole chain
+ * of mip levels.
+ * @sheet_bytes: Bytes required in the backing store for a sheet
+ * representing a single sample.
+ * @num_mip_levels: Valid size of the @mip array. Number of mipmap levels in
+ * a chain.
+ * @num_layers: Number of slices in an array texture or number of faces in
+ * a cubemap texture.
+ */
+struct svga3dsurface_cache {
+ const struct svga3d_surface_desc *desc;
+ struct svga3dsurface_mip mip[DRM_VMW_MAX_MIP_LEVELS];
+ size_t mip_chain_bytes;
+ size_t sheet_bytes;
+ u32 num_mip_levels;
+ u32 num_layers;
+};
+
+/**
+ * struct svga3dsurface_loc - Surface location
+ * @sub_resource: Surface subresource. Defined as layer * num_mip_levels +
+ * mip_level.
+ * @x: X coordinate.
+ * @y: Y coordinate.
+ * @z: Z coordinate.
+ */
+struct svga3dsurface_loc {
+ u32 sub_resource;
+ u32 x, y, z;
+};
+
+/**
+ * svga3dsurface_subres - Compute the subresource from layer and mipmap.
+ * @cache: Surface layout data.
+ * @mip_level: The mipmap level.
+ * @layer: The surface layer (face or array slice).
+ *
+ * Return: The subresource.
+ */
+static inline u32 svga3dsurface_subres(const struct svga3dsurface_cache *cache,
+ u32 mip_level, u32 layer)
+{
+ return cache->num_mip_levels * layer + mip_level;
+}
+
+/**
+ * svga3dsurface_setup_cache - Build a surface cache entry
+ * @size: The surface base level dimensions.
+ * @format: The surface format.
+ * @num_mip_levels: Number of mipmap levels.
+ * @num_layers: Number of layers.
+ * @cache: Pointer to a struct svga3dsurface_cach object to be filled in.
+ *
+ * Return: Zero on success, -EINVAL on invalid surface layout.
+ */
+static inline int svga3dsurface_setup_cache(const struct drm_vmw_size *size,
+ SVGA3dSurfaceFormat format,
+ u32 num_mip_levels,
+ u32 num_layers,
+ u32 num_samples,
+ struct svga3dsurface_cache *cache)
+{
+ const struct svga3d_surface_desc *desc;
+ u32 i;
+
+ memset(cache, 0, sizeof(*cache));
+ cache->desc = desc = svga3dsurface_get_desc(format);
+ cache->num_mip_levels = num_mip_levels;
+ cache->num_layers = num_layers;
+ for (i = 0; i < cache->num_mip_levels; i++) {
+ struct svga3dsurface_mip *mip = &cache->mip[i];
+
+ mip->size = svga3dsurface_get_mip_size(*size, i);
+ mip->bytes = svga3dsurface_get_image_buffer_size
+ (desc, &mip->size, 0);
+ mip->row_stride =
+ __KERNEL_DIV_ROUND_UP(mip->size.width,
+ desc->block_size.width) *
+ desc->bytes_per_block * num_samples;
+ if (!mip->row_stride)
+ goto invalid_dim;
+
+ mip->img_stride =
+ __KERNEL_DIV_ROUND_UP(mip->size.height,
+ desc->block_size.height) *
+ mip->row_stride;
+ if (!mip->img_stride)
+ goto invalid_dim;
+
+ cache->mip_chain_bytes += mip->bytes;
+ }
+ cache->sheet_bytes = cache->mip_chain_bytes * num_layers;
+ if (!cache->sheet_bytes)
+ goto invalid_dim;
+
+ return 0;
+
+invalid_dim:
+ VMW_DEBUG_USER("Invalid surface layout for dirty tracking.\n");
+ return -EINVAL;
+}
+
+/**
+ * svga3dsurface_get_loc - Get a surface location from an offset into the
+ * backing store
+ * @cache: Surface layout data.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ * @offset: Offset into the surface backing store.
+ */
+static inline void
+svga3dsurface_get_loc(const struct svga3dsurface_cache *cache,
+ struct svga3dsurface_loc *loc,
+ size_t offset)
+{
+ const struct svga3dsurface_mip *mip = &cache->mip[0];
+ const struct svga3d_surface_desc *desc = cache->desc;
+ u32 layer;
+ int i;
+
+ if (offset >= cache->sheet_bytes)
+ offset %= cache->sheet_bytes;
+
+ layer = offset / cache->mip_chain_bytes;
+ offset -= layer * cache->mip_chain_bytes;
+ for (i = 0; i < cache->num_mip_levels; ++i, ++mip) {
+ if (mip->bytes > offset)
+ break;
+ offset -= mip->bytes;
+ }
+
+ loc->sub_resource = svga3dsurface_subres(cache, i, layer);
+ loc->z = offset / mip->img_stride;
+ offset -= loc->z * mip->img_stride;
+ loc->z *= desc->block_size.depth;
+ loc->y = offset / mip->row_stride;
+ offset -= loc->y * mip->row_stride;
+ loc->y *= desc->block_size.height;
+ loc->x = offset / desc->bytes_per_block;
+ loc->x *= desc->block_size.width;
+}
+
+/**
+ * svga3dsurface_inc_loc - Clamp increment a surface location with one block
+ * size
+ * in each dimension.
+ * @loc: Pointer to a struct svga3dsurface_loc to be incremented.
+ *
+ * When computing the size of a range as size = end - start, the range does not
+ * include the end element. However a location representing the last byte
+ * of a touched region in the backing store *is* included in the range.
+ * This function modifies such a location to match the end definition
+ * given as start + size which is the one used in a SVGA3dBox.
+ */
+static inline void
+svga3dsurface_inc_loc(const struct svga3dsurface_cache *cache,
+ struct svga3dsurface_loc *loc)
+{
+ const struct svga3d_surface_desc *desc = cache->desc;
+ u32 mip = loc->sub_resource % cache->num_mip_levels;
+ const struct drm_vmw_size *size = &cache->mip[mip].size;
+
+ loc->sub_resource++;
+ loc->x += desc->block_size.width;
+ if (loc->x > size->width)
+ loc->x = size->width;
+ loc->y += desc->block_size.height;
+ if (loc->y > size->height)
+ loc->y = size->height;
+ loc->z += desc->block_size.depth;
+ if (loc->z > size->depth)
+ loc->z = size->depth;
+}
+
+/**
+ * svga3dsurface_min_loc - The start location in a subresource
+ * @cache: Surface layout data.
+ * @sub_resource: The subresource.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ */
+static inline void
+svga3dsurface_min_loc(const struct svga3dsurface_cache *cache,
+ u32 sub_resource,
+ struct svga3dsurface_loc *loc)
+{
+ loc->sub_resource = sub_resource;
+ loc->x = loc->y = loc->z = 0;
+}
+
+/**
+ * svga3dsurface_min_loc - The end location in a subresource
+ * @cache: Surface layout data.
+ * @sub_resource: The subresource.
+ * @loc: Pointer to a struct svga3dsurface_loc to be filled in.
+ *
+ * Following the end definition given in svga3dsurface_inc_loc(),
+ * Compute the end location of a surface subresource.
+ */
+static inline void
+svga3dsurface_max_loc(const struct svga3dsurface_cache *cache,
+ u32 sub_resource,
+ struct svga3dsurface_loc *loc)
+{
+ const struct drm_vmw_size *size;
+ u32 mip;
+
+ loc->sub_resource = sub_resource + 1;
+ mip = sub_resource % cache->num_mip_levels;
+ size = &cache->mip[mip].size;
+ loc->x = size->width;
+ loc->y = size->height;
+ loc->z = size->depth;
+}
+
#endif /* _SVGA3D_SURFACEDEFS_H_ */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
index 74016a08d118..8b71bf6b58ef 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_bo.c
@@ -462,6 +462,8 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
{
struct vmw_buffer_object *vmw_bo = vmw_buffer_object(bo);
+ WARN_ON(vmw_bo->dirty);
+ WARN_ON(!RB_EMPTY_ROOT(&vmw_bo->res_tree));
vmw_bo_unmap(vmw_bo);
kfree(vmw_bo);
}
@@ -475,8 +477,11 @@ void vmw_bo_bo_free(struct ttm_buffer_object *bo)
static void vmw_user_bo_destroy(struct ttm_buffer_object *bo)
{
struct vmw_user_buffer_object *vmw_user_bo = vmw_user_buffer_object(bo);
+ struct vmw_buffer_object *vbo = &vmw_user_bo->vbo;
- vmw_bo_unmap(&vmw_user_bo->vbo);
+ WARN_ON(vbo->dirty);
+ WARN_ON(!RB_EMPTY_ROOT(&vbo->res_tree));
+ vmw_bo_unmap(vbo);
ttm_prime_object_kfree(vmw_user_bo, prime);
}
@@ -511,8 +516,7 @@ int vmw_bo_init(struct vmw_private *dev_priv,
memset(vmw_bo, 0, sizeof(*vmw_bo));
BUILD_BUG_ON(TTM_MAX_BO_PRIORITY <= 3);
vmw_bo->base.priority = 3;
-
- INIT_LIST_HEAD(&vmw_bo->res_list);
+ vmw_bo->res_tree = RB_ROOT;
ret = ttm_bo_init(bdev, &vmw_bo->base, size,
ttm_bo_type_device, placement,
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
index b18842f73081..a31e726d6d71 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_drv.h
@@ -56,9 +56,9 @@
#define VMWGFX_DRIVER_NAME "vmwgfx"
-#define VMWGFX_DRIVER_DATE "20180704"
+#define VMWGFX_DRIVER_DATE "20190328"
#define VMWGFX_DRIVER_MAJOR 2
-#define VMWGFX_DRIVER_MINOR 15
+#define VMWGFX_DRIVER_MINOR 16
#define VMWGFX_DRIVER_PATCHLEVEL 0
#define VMWGFX_FIFO_STATIC_SIZE (1024*1024)
#define VMWGFX_MAX_RELOCATIONS 2048
@@ -100,17 +100,18 @@ struct vmw_fpriv {
/**
* struct vmw_buffer_object - TTM buffer object with vmwgfx additions
* @base: The TTM buffer object
- * @res_list: List of resources using this buffer object as a backing MOB
+ * @res_tree: RB tree of resources using this buffer object as a backing MOB
* @pin_count: pin depth
* @cpu_writers: Number of synccpu write grabs. Protected by reservation when
* increased. May be decreased without reservation.
* @dx_query_ctx: DX context if this buffer object is used as a DX query MOB
* @map: Kmap object for semi-persistent mappings
* @res_prios: Eviction priority counts for attached resources
+ * @dirty: structure for user-space dirty-tracking
*/
struct vmw_buffer_object {
struct ttm_buffer_object base;
- struct list_head res_list;
+ struct rb_root res_tree;
s32 pin_count;
atomic_t cpu_writers;
/* Not ref-counted. Protected by binding_mutex */
@@ -118,6 +119,7 @@ struct vmw_buffer_object {
/* Protected by reservation */
struct ttm_bo_kmap_obj map;
u32 res_prios[TTM_MAX_BO_PRIORITY];
+ struct vmw_bo_dirty *dirty;
};
/**
@@ -148,7 +150,8 @@ struct vmw_res_func;
* @res_dirty: Resource contains data not yet in the backup buffer. Protected
* by resource reserved.
* @backup_dirty: Backup buffer contains data not yet in the HW resource.
- * Protecte by resource reserved.
+ * Protected by resource reserved.
+ * @coherent: Emulate coherency by tracking vm accesses.
* @backup: The backup buffer if any. Protected by resource reserved.
* @backup_offset: Offset into the backup buffer if any. Protected by resource
* reserved. Note that only a few resource types can have a @backup_offset
@@ -157,29 +160,32 @@ struct vmw_res_func;
* pin-count greater than zero. It is not on the resource LRU lists and its
* backup buffer is pinned. Hence it can't be evicted.
* @func: Method vtable for this resource. Immutable.
+ * @mob_node; Node for the MOB backup rbtree. Protected by @backup reserved.
* @lru_head: List head for the LRU list. Protected by @dev_priv::resource_lock.
- * @mob_head: List head for the MOB backup list. Protected by @backup reserved.
* @binding_head: List head for the context binding list. Protected by
* the @dev_priv::binding_mutex
* @res_free: The resource destructor.
* @hw_destroy: Callback to destroy the resource on the device, as part of
* resource destruction.
*/
+struct vmw_resource_dirty;
struct vmw_resource {
struct kref kref;
struct vmw_private *dev_priv;
int id;
u32 used_prio;
unsigned long backup_size;
- bool res_dirty;
- bool backup_dirty;
+ u32 res_dirty : 1;
+ u32 backup_dirty : 1;
+ u32 coherent : 1;
struct vmw_buffer_object *backup;
unsigned long backup_offset;
unsigned long pin_count;
const struct vmw_res_func *func;
+ struct rb_node mob_node;
struct list_head lru_head;
- struct list_head mob_head;
struct list_head binding_head;
+ struct vmw_resource_dirty *dirty;
void (*res_free) (struct vmw_resource *res);
void (*hw_destroy) (struct vmw_resource *res);
};
@@ -678,7 +684,8 @@ extern void vmw_resource_unreference(struct vmw_resource **p_res);
extern struct vmw_resource *vmw_resource_reference(struct vmw_resource *res);
extern struct vmw_resource *
vmw_resource_reference_unless_doomed(struct vmw_resource *res);
-extern int vmw_resource_validate(struct vmw_resource *res, bool intr);
+extern int vmw_resource_validate(struct vmw_resource *res, bool intr,
+ bool dirtying);
extern int vmw_resource_reserve(struct vmw_resource *res, bool interruptible,
bool no_backup);
extern bool vmw_resource_needs_backup(const struct vmw_resource *res);
@@ -720,6 +727,10 @@ extern void vmw_resource_evict_all(struct vmw_private *dev_priv);
extern void vmw_resource_unbind_list(struct vmw_buffer_object *vbo);
void vmw_resource_mob_attach(struct vmw_resource *res);
void vmw_resource_mob_detach(struct vmw_resource *res);
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+ pgoff_t end);
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+ pgoff_t end, pgoff_t *num_prefault);
/**
* vmw_resource_mob_attached - Whether a resource currently has a mob attached
@@ -729,7 +740,7 @@ void vmw_resource_mob_detach(struct vmw_resource *res);
*/
static inline bool vmw_resource_mob_attached(const struct vmw_resource *res)
{
- return !list_empty(&res->mob_head);
+ return !RB_EMPTY_NODE(&res->mob_node);
}
/**
@@ -1407,6 +1418,17 @@ int vmw_host_log(const char *log);
#define VMW_DEBUG_USER(fmt, ...) \
DRM_DEBUG_DRIVER(fmt, ##__VA_ARGS__)
+/* Resource dirtying - vmwgfx_page_dirty.c */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo);
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res);
+void vmw_bo_dirty_clear_res(struct vmw_resource *res);
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo);
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end);
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf);
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf);
+
/**
* VMW_DEBUG_KMS - Debug output for kernel mode-setting
*
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
index ff86d49dc5e8..934ad7c0c342 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_execbuf.c
@@ -2560,7 +2560,6 @@ static int vmw_cmd_dx_check_subresource(struct vmw_private *dev_priv,
offsetof(typeof(*cmd), sid));
cmd = container_of(header, typeof(*cmd), header);
-
return vmw_cmd_res_check(dev_priv, sw_context, vmw_res_surface,
VMW_RES_DIRTY_NONE, user_surface_converter,
&cmd->sid, NULL);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
new file mode 100644
index 000000000000..f07aa857587c
--- /dev/null
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_page_dirty.c
@@ -0,0 +1,488 @@
+// SPDX-License-Identifier: GPL-2.0 OR MIT
+/**************************************************************************
+ *
+ * Copyright 2019 VMware, Inc., Palo Alto, CA., USA
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#include "vmwgfx_drv.h"
+
+/*
+ * Different methods for tracking dirty:
+ * VMW_BO_DIRTY_PAGETABLE - Scan the pagetable for hardware dirty bits
+ * VMW_BO_DIRTY_MKWRITE - Write-protect page table entries and record write-
+ * accesses in the VM mkwrite() callback
+ */
+enum vmw_bo_dirty_method {
+ VMW_BO_DIRTY_PAGETABLE,
+ VMW_BO_DIRTY_MKWRITE,
+};
+
+/*
+ * No dirtied pages at scan trigger a transition to the _MKWRITE method,
+ * similarly a certain percentage of dirty pages trigger a transition to
+ * the _PAGETABLE method. How many triggers should we wait for before
+ * changing method?
+ */
+#define VMW_DIRTY_NUM_CHANGE_TRIGGERS 2
+
+/* Percentage to trigger a transition to the _PAGETABLE method */
+#define VMW_DIRTY_PERCENTAGE 10
+
+/**
+ * struct vmw_bo_dirty - Dirty information for buffer objects
+ * @start: First currently dirty bit
+ * @end: Last currently dirty bit + 1
+ * @method: The currently used dirty method
+ * @change_count: Number of consecutive method change triggers
+ * @ref_count: Reference count for this structure
+ * @bitmap_size: The size of the bitmap in bits. Typically equal to the
+ * nuber of pages in the bo.
+ * @size: The accounting size for this struct.
+ * @bitmap: A bitmap where each bit represents a page. A set bit means a
+ * dirty page.
+ */
+struct vmw_bo_dirty {
+ unsigned long start;
+ unsigned long end;
+ enum vmw_bo_dirty_method method;
+ unsigned int change_count;
+ unsigned int ref_count;
+ unsigned long bitmap_size;
+ size_t size;
+ unsigned long bitmap[0];
+};
+
+/**
+ * vmw_bo_dirty_scan_pagetable - Perform a pagetable scan for dirty bits
+ * @vbo: The buffer object to scan
+ *
+ * Scans the pagetable for dirty bits. Clear those bits and modify the
+ * dirty structure with the results. This function may change the
+ * dirty-tracking method.
+ */
+static void vmw_bo_dirty_scan_pagetable(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+ pgoff_t num_marked;
+
+ num_marked = clean_record_shared_mapping_range
+ (mapping,
+ offset, dirty->bitmap_size,
+ offset, &dirty->bitmap[0],
+ &dirty->start, &dirty->end);
+ if (num_marked == 0)
+ dirty->change_count++;
+ else
+ dirty->change_count = 0;
+
+ if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
+ dirty->change_count = 0;
+ dirty->method = VMW_BO_DIRTY_MKWRITE;
+ wp_shared_mapping_range(mapping,
+ offset, dirty->bitmap_size);
+ clean_record_shared_mapping_range(mapping,
+ offset, dirty->bitmap_size,
+ offset, &dirty->bitmap[0],
+ &dirty->start, &dirty->end);
+ }
+}
+
+/**
+ * vmw_bo_dirty_scan_mkwrite - Reset the mkwrite dirty-tracking method
+ * @vbo: The buffer object to scan
+ *
+ * Write-protect pages written to so that consecutive write accesses will
+ * trigger a call to mkwrite.
+ *
+ * This function may change the dirty-tracking method.
+ */
+static void vmw_bo_dirty_scan_mkwrite(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+ pgoff_t num_marked;
+
+ if (dirty->end <= dirty->start)
+ return;
+
+ num_marked = wp_shared_mapping_range(vbo->base.bdev->dev_mapping,
+ dirty->start + offset,
+ dirty->end - dirty->start);
+
+ if (100UL * num_marked / dirty->bitmap_size >
+ VMW_DIRTY_PERCENTAGE) {
+ dirty->change_count++;
+ } else {
+ dirty->change_count = 0;
+ }
+
+ if (dirty->change_count > VMW_DIRTY_NUM_CHANGE_TRIGGERS) {
+ pgoff_t start = 0;
+ pgoff_t end = dirty->bitmap_size;
+
+ dirty->method = VMW_BO_DIRTY_PAGETABLE;
+ clean_record_shared_mapping_range(mapping, offset, end, offset,
+ &dirty->bitmap[0],
+ &start, &end);
+ bitmap_clear(&dirty->bitmap[0], 0, dirty->bitmap_size);
+ if (dirty->start < dirty->end)
+ bitmap_set(&dirty->bitmap[0], dirty->start,
+ dirty->end - dirty->start);
+ dirty->change_count = 0;
+ }
+}
+
+/**
+ * vmw_bo_dirty_scan - Scan for dirty pages and add them to the dirty
+ * tracking structure
+ * @vbo: The buffer object to scan
+ *
+ * This function may change the dirty tracking method.
+ */
+void vmw_bo_dirty_scan(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ if (dirty->method == VMW_BO_DIRTY_PAGETABLE)
+ vmw_bo_dirty_scan_pagetable(vbo);
+ else
+ vmw_bo_dirty_scan_mkwrite(vbo);
+}
+
+/**
+ * vmw_bo_dirty_pre_unmap - write-protect and pick up dirty pages before
+ * an unmap_mapping_range operation.
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * If we're using the _PAGETABLE scan method, we may leak dirty pages
+ * when calling unmap_mapping_range(). This function makes sure we pick
+ * up all dirty pages.
+ */
+static void vmw_bo_dirty_pre_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+ if (dirty->method != VMW_BO_DIRTY_PAGETABLE || start >= end)
+ return;
+
+ wp_shared_mapping_range(mapping, start + offset, end - start);
+ clean_record_shared_mapping_range(mapping, start + offset,
+ end - start, offset,
+ &dirty->bitmap[0], &dirty->start,
+ &dirty->end);
+}
+
+/**
+ * vmw_bo_dirty_unmap - Clear all ptes pointing to a range within a bo
+ * @vbo: The buffer object,
+ * @start: First page of the range within the buffer object.
+ * @end: Last page of the range within the buffer object + 1.
+ *
+ * This is similar to ttm_bo_unmap_virtual_locked() except it takes a subrange.
+ */
+void vmw_bo_dirty_unmap(struct vmw_buffer_object *vbo,
+ pgoff_t start, pgoff_t end)
+{
+ unsigned long offset = drm_vma_node_start(&vbo->base.base.vma_node);
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+
+ vmw_bo_dirty_pre_unmap(vbo, start, end);
+ unmap_shared_mapping_range(mapping, (offset + start) << PAGE_SHIFT,
+ (loff_t) (end - start) << PAGE_SHIFT);
+}
+
+/**
+ * vmw_bo_dirty_add - Add a dirty-tracking user to a buffer object
+ * @vbo: The buffer object
+ *
+ * This function registers a dirty-tracking user to a buffer object.
+ * A user can be for example a resource or a vma in a special user-space
+ * mapping.
+ *
+ * Return: Zero on success, -ENOMEM on memory allocation failure.
+ */
+int vmw_bo_dirty_add(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ pgoff_t num_pages = vbo->base.num_pages;
+ size_t size, acc_size;
+ int ret;
+ static struct ttm_operation_ctx ctx = {
+ .interruptible = false,
+ .no_wait_gpu = false
+ };
+
+ if (dirty) {
+ dirty->ref_count++;
+ return 0;
+ }
+
+ size = sizeof(*dirty) + BITS_TO_LONGS(num_pages) * sizeof(long);
+ acc_size = ttm_round_pot(size);
+ ret = ttm_mem_global_alloc(&ttm_mem_glob, acc_size, &ctx);
+ if (ret) {
+ VMW_DEBUG_USER("Out of graphics memory for buffer object "
+ "dirty tracker.\n");
+ return ret;
+ }
+ dirty = kvzalloc(size, GFP_KERNEL);
+ if (!dirty) {
+ ret = -ENOMEM;
+ goto out_no_dirty;
+ }
+
+ dirty->size = acc_size;
+ dirty->bitmap_size = num_pages;
+ dirty->start = dirty->bitmap_size;
+ dirty->end = 0;
+ dirty->ref_count = 1;
+ if (num_pages < PAGE_SIZE / sizeof(pte_t)) {
+ dirty->method = VMW_BO_DIRTY_PAGETABLE;
+ } else {
+ struct address_space *mapping = vbo->base.bdev->dev_mapping;
+ pgoff_t offset = drm_vma_node_start(&vbo->base.base.vma_node);
+
+ dirty->method = VMW_BO_DIRTY_MKWRITE;
+
+ /* Write-protect and then pick up already dirty bits */
+ wp_shared_mapping_range(mapping, offset, num_pages);
+ clean_record_shared_mapping_range(mapping, offset, num_pages,
+ offset,
+ &dirty->bitmap[0],
+ &dirty->start, &dirty->end);
+ }
+
+ vbo->dirty = dirty;
+
+ return 0;
+
+out_no_dirty:
+ ttm_mem_global_free(&ttm_mem_glob, acc_size);
+ return ret;
+}
+
+/**
+ * vmw_bo_dirty_release - Release a dirty-tracking user from a buffer object
+ * @vbo: The buffer object
+ *
+ * This function releases a dirty-tracking user from a buffer object.
+ * If the reference count reaches zero, then the dirty-tracking object is
+ * freed and the pointer to it cleared.
+ *
+ * Return: Zero on success, -ENOMEM on memory allocation failure.
+ */
+void vmw_bo_dirty_release(struct vmw_buffer_object *vbo)
+{
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ if (dirty && --dirty->ref_count == 0) {
+ size_t acc_size = dirty->size;
+
+ kvfree(dirty);
+ ttm_mem_global_free(&ttm_mem_glob, acc_size);
+ vbo->dirty = NULL;
+ }
+}
+
+/**
+ * vmw_bo_dirty_transfer_to_res - Pick up a resource's dirty region from
+ * its backing mob.
+ * @res: The resource
+ *
+ * This function will pick up all dirty ranges affecting the resource from
+ * it's backup mob, and call vmw_resource_dirty_update() once for each
+ * range. The transferred ranges will be cleared from the backing mob's
+ * dirty tracking.
+ */
+void vmw_bo_dirty_transfer_to_res(struct vmw_resource *res)
+{
+ struct vmw_buffer_object *vbo = res->backup;
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+ pgoff_t start, cur, end;
+ unsigned long res_start = res->backup_offset;
+ unsigned long res_end = res->backup_offset + res->backup_size;
+
+ WARN_ON_ONCE(res_start & ~PAGE_MASK);
+ res_start >>= PAGE_SHIFT;
+ res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
+
+ if (res_start >= dirty->end || res_end <= dirty->start)
+ return;
+
+ cur = max(res_start, dirty->start);
+ res_end = max(res_end, dirty->end);
+ while (cur < res_end) {
+ unsigned long num;
+
+ start = find_next_bit(&dirty->bitmap[0], res_end, cur);
+ if (start >= res_end)
+ break;
+
+ end = find_next_zero_bit(&dirty->bitmap[0], res_end, start + 1);
+ cur = end + 1;
+ num = end - start;
+ bitmap_clear(&dirty->bitmap[0], start, num);
+ vmw_resource_dirty_update(res, start, end);
+ }
+
+ if (res_start <= dirty->start && res_end > dirty->start)
+ dirty->start = res_end;
+ if (res_start < dirty->end && res_end >= dirty->end)
+ dirty->end = res_start;
+}
+
+/**
+ * vmw_bo_dirty_clear_res - Clear a resource's dirty region from
+ * its backing mob.
+ * @res: The resource
+ *
+ * This function will clear all dirty ranges affecting the resource from
+ * it's backup mob's dirty tracking.
+ */
+void vmw_bo_dirty_clear_res(struct vmw_resource *res)
+{
+ unsigned long res_start = res->backup_offset;
+ unsigned long res_end = res->backup_offset + res->backup_size;
+ struct vmw_buffer_object *vbo = res->backup;
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ res_start >>= PAGE_SHIFT;
+ res_end = DIV_ROUND_UP(res_end, PAGE_SIZE);
+
+ if (res_start >= dirty->end || res_end <= dirty->start)
+ return;
+
+ res_start = max(res_start, dirty->start);
+ res_end = min(res_end, dirty->end);
+ bitmap_clear(&dirty->bitmap[0], res_start, res_end - res_start);
+
+ if (res_start <= dirty->start && res_end > dirty->start)
+ dirty->start = res_end;
+ if (res_start < dirty->end && res_end >= dirty->end)
+ dirty->end = res_start;
+}
+
+vm_fault_t vmw_bo_vm_mkwrite(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+ vma->vm_private_data;
+ vm_fault_t ret;
+ unsigned long page_offset;
+ unsigned int save_flags;
+ struct vmw_buffer_object *vbo =
+ container_of(bo, typeof(*vbo), base);
+
+ /*
+ * mkwrite() doesn't handle the VM_FAULT_RETRY return value correctly.
+ * So make sure the TTM helpers are aware.
+ */
+ save_flags = vmf->flags;
+ vmf->flags &= ~FAULT_FLAG_ALLOW_RETRY;
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ vmf->flags = save_flags;
+ if (ret)
+ return ret;
+
+ page_offset = vmf->pgoff - drm_vma_node_start(&bo->base.vma_node);
+ if (unlikely(page_offset >= bo->num_pages)) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_unlock;
+ }
+
+ if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE &&
+ !test_bit(page_offset, &vbo->dirty->bitmap[0])) {
+ struct vmw_bo_dirty *dirty = vbo->dirty;
+
+ __set_bit(page_offset, &dirty->bitmap[0]);
+ dirty->start = min(dirty->start, page_offset);
+ dirty->end = max(dirty->end, page_offset + 1);
+ }
+
+out_unlock:
+ dma_resv_unlock(bo->base.resv);
+ return ret;
+}
+
+vm_fault_t vmw_bo_vm_fault(struct vm_fault *vmf)
+{
+ struct vm_area_struct *vma = vmf->vma;
+ struct ttm_buffer_object *bo = (struct ttm_buffer_object *)
+ vma->vm_private_data;
+ struct vmw_buffer_object *vbo =
+ container_of(bo, struct vmw_buffer_object, base);
+ pgoff_t num_prefault;
+ pgprot_t prot;
+ vm_fault_t ret;
+
+ ret = ttm_bo_vm_reserve(bo, vmf);
+ if (ret)
+ return ret;
+
+ num_prefault = (vma->vm_flags & VM_RAND_READ) ? 1 :
+ TTM_BO_VM_NUM_PREFAULT;
+
+ if (vbo->dirty) {
+ pgoff_t allowed_prefault;
+ unsigned long page_offset;
+
+ page_offset = vmf->pgoff -
+ drm_vma_node_start(&bo->base.vma_node);
+ if (page_offset >= bo->num_pages ||
+ vmw_resources_clean(vbo, page_offset,
+ page_offset + PAGE_SIZE,
+ &allowed_prefault)) {
+ ret = VM_FAULT_SIGBUS;
+ goto out_unlock;
+ }
+
+ num_prefault = min(num_prefault, allowed_prefault);
+ }
+
+ /*
+ * If we don't track dirty using the MKWRITE method, make sure
+ * sure the page protection is write-enabled so we don't get
+ * a lot of unnecessary write faults.
+ */
+ if (vbo->dirty && vbo->dirty->method == VMW_BO_DIRTY_MKWRITE)
+ prot = vma->vm_page_prot;
+ else
+ prot = vm_get_page_prot(vma->vm_flags);
+
+ ret = ttm_bo_vm_fault_reserved(vmf, prot, num_prefault);
+ if (ret == VM_FAULT_RETRY && !(vmf->flags & FAULT_FLAG_RETRY_NOWAIT))
+ return ret;
+
+out_unlock:
+ dma_resv_unlock(bo->base.resv);
+
+ return ret;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
index 6dfe36fb817c..c8441030637a 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource.c
@@ -40,11 +40,24 @@
void vmw_resource_mob_attach(struct vmw_resource *res)
{
struct vmw_buffer_object *backup = res->backup;
+ struct rb_node **new = &backup->res_tree.rb_node, *parent = NULL;
dma_resv_assert_held(res->backup->base.base.resv);
res->used_prio = (res->res_dirty) ? res->func->dirty_prio :
res->func->prio;
- list_add_tail(&res->mob_head, &backup->res_list);
+
+ while (*new) {
+ struct vmw_resource *this =
+ container_of(*new, struct vmw_resource, mob_node);
+
+ parent = *new;
+ new = (res->backup_offset < this->backup_offset) ?
+ &((*new)->rb_left) : &((*new)->rb_right);
+ }
+
+ rb_link_node(&res->mob_node, parent, new);
+ rb_insert_color(&res->mob_node, &backup->res_tree);
+
vmw_bo_prio_add(backup, res->used_prio);
}
@@ -58,7 +71,8 @@ void vmw_resource_mob_detach(struct vmw_resource *res)
dma_resv_assert_held(backup->base.base.resv);
if (vmw_resource_mob_attached(res)) {
- list_del_init(&res->mob_head);
+ rb_erase(&res->mob_node, &backup->res_tree);
+ RB_CLEAR_NODE(&res->mob_node);
vmw_bo_prio_del(backup, res->used_prio);
}
}
@@ -119,6 +133,10 @@ static void vmw_resource_release(struct kref *kref)
}
res->backup_dirty = false;
vmw_resource_mob_detach(res);
+ if (res->dirty)
+ res->func->dirty_free(res);
+ if (res->coherent)
+ vmw_bo_dirty_release(res->backup);
ttm_bo_unreserve(bo);
vmw_bo_unreference(&res->backup);
}
@@ -200,15 +218,17 @@ int vmw_resource_init(struct vmw_private *dev_priv, struct vmw_resource *res,
res->res_free = res_free;
res->dev_priv = dev_priv;
res->func = func;
+ RB_CLEAR_NODE(&res->mob_node);
INIT_LIST_HEAD(&res->lru_head);
- INIT_LIST_HEAD(&res->mob_head);
INIT_LIST_HEAD(&res->binding_head);
res->id = -1;
res->backup = NULL;
res->backup_offset = 0;
res->backup_dirty = false;
res->res_dirty = false;
+ res->coherent = false;
res->used_prio = 3;
+ res->dirty = NULL;
if (delay_id)
return 0;
else
@@ -373,7 +393,8 @@ out_no_bo:
* should be retried once resources have been freed up.
*/
static int vmw_resource_do_validate(struct vmw_resource *res,
- struct ttm_validate_buffer *val_buf)
+ struct ttm_validate_buffer *val_buf,
+ bool dirtying)
{
int ret = 0;
const struct vmw_res_func *func = res->func;
@@ -395,6 +416,39 @@ static int vmw_resource_do_validate(struct vmw_resource *res,
vmw_resource_mob_attach(res);
}
+ /*
+ * Handle the case where the backup mob is marked coherent but
+ * the resource isn't.
+ */
+ if (func->dirty_alloc && vmw_resource_mob_attached(res) &&
+ !res->coherent) {
+ if (res->backup->dirty && !res->dirty) {
+ ret = func->dirty_alloc(res);
+ if (ret)
+ return ret;
+ } else if (!res->backup->dirty && res->dirty) {
+ func->dirty_free(res);
+ }
+ }
+
+ /*
+ * Transfer the dirty regions to the resource and update
+ * the resource.
+ */
+ if (res->dirty) {
+ if (dirtying && !res->res_dirty) {
+ pgoff_t start = res->backup_offset >> PAGE_SHIFT;
+ pgoff_t end = __KERNEL_DIV_ROUND_UP
+ (res->backup_offset + res->backup_size,
+ PAGE_SIZE);
+
+ vmw_bo_dirty_unmap(res->backup, start, end);
+ }
+
+ vmw_bo_dirty_transfer_to_res(res);
+ return func->dirty_sync(res);
+ }
+
return 0;
out_bind_failed:
@@ -433,16 +487,28 @@ void vmw_resource_unreserve(struct vmw_resource *res,
if (switch_backup && new_backup != res->backup) {
if (res->backup) {
vmw_resource_mob_detach(res);
+ if (res->coherent)
+ vmw_bo_dirty_release(res->backup);
vmw_bo_unreference(&res->backup);
}
if (new_backup) {
res->backup = vmw_bo_reference(new_backup);
+
+ /*
+ * The validation code should already have added a
+ * dirty tracker here.
+ */
+ WARN_ON(res->coherent && !new_backup->dirty);
+
vmw_resource_mob_attach(res);
} else {
res->backup = NULL;
}
+ } else if (switch_backup && res->coherent) {
+ vmw_bo_dirty_release(res->backup);
}
+
if (switch_backup)
res->backup_offset = new_backup_offset;
@@ -622,6 +688,7 @@ out_no_unbind:
* to the device.
* @res: The resource to make visible to the device.
* @intr: Perform waits interruptible if possible.
+ * @dirtying: Pending GPU operation will dirty the resource
*
* On succesful return, any backup DMA buffer pointed to by @res->backup will
* be reserved and validated.
@@ -631,7 +698,8 @@ out_no_unbind:
* Return: Zero on success, -ERESTARTSYS if interrupted, negative error code
* on failure.
*/
-int vmw_resource_validate(struct vmw_resource *res, bool intr)
+int vmw_resource_validate(struct vmw_resource *res, bool intr,
+ bool dirtying)
{
int ret;
struct vmw_resource *evict_res;
@@ -648,7 +716,7 @@ int vmw_resource_validate(struct vmw_resource *res, bool intr)
if (res->backup)
val_buf.bo = &res->backup->base;
do {
- ret = vmw_resource_do_validate(res, &val_buf);
+ ret = vmw_resource_do_validate(res, &val_buf, dirtying);
if (likely(ret != -EBUSY))
break;
@@ -711,19 +779,20 @@ out_no_validate:
*/
void vmw_resource_unbind_list(struct vmw_buffer_object *vbo)
{
-
- struct vmw_resource *res, *next;
struct ttm_validate_buffer val_buf = {
.bo = &vbo->base,
.num_shared = 0
};
dma_resv_assert_held(vbo->base.base.resv);
- list_for_each_entry_safe(res, next, &vbo->res_list, mob_head) {
- if (!res->func->unbind)
- continue;
+ while (!RB_EMPTY_ROOT(&vbo->res_tree)) {
+ struct rb_node *node = vbo->res_tree.rb_node;
+ struct vmw_resource *res =
+ container_of(node, struct vmw_resource, mob_node);
+
+ if (!WARN_ON_ONCE(!res->func->unbind))
+ (void) res->func->unbind(res, res->res_dirty, &val_buf);
- (void) res->func->unbind(res, res->res_dirty, &val_buf);
res->backup_dirty = true;
res->res_dirty = false;
vmw_resource_mob_detach(res);
@@ -947,7 +1016,7 @@ int vmw_resource_pin(struct vmw_resource *res, bool interruptible)
/* Do we really need to pin the MOB as well? */
vmw_bo_pin_reserved(vbo, true);
}
- ret = vmw_resource_validate(res, interruptible);
+ ret = vmw_resource_validate(res, interruptible, true);
if (vbo)
ttm_bo_unreserve(&vbo->base);
if (ret)
@@ -1007,3 +1076,101 @@ enum vmw_res_type vmw_res_type(const struct vmw_resource *res)
{
return res->func->res_type;
}
+
+/**
+ * vmw_resource_update_dirty - Update a resource's dirty tracker with a
+ * sequential range of touched backing store memory.
+ * @res: The resource.
+ * @start: The first page touched.
+ * @end: The last page touched + 1.
+ */
+void vmw_resource_dirty_update(struct vmw_resource *res, pgoff_t start,
+ pgoff_t end)
+{
+ if (res->dirty)
+ res->func->dirty_range_add(res, start << PAGE_SHIFT,
+ end << PAGE_SHIFT);
+}
+
+/**
+ * vmw_resources_clean - Clean resources intersecting a mob range
+ * @vbo: The mob buffer object
+ * @start: The mob page offset starting the range
+ * @end: The mob page offset ending the range
+ * @num_prefault: Returns how many pages including the first have been
+ * cleaned and are ok to prefault
+ */
+int vmw_resources_clean(struct vmw_buffer_object *vbo, pgoff_t start,
+ pgoff_t end, pgoff_t *num_prefault)
+{
+ struct rb_node *cur = vbo->res_tree.rb_node;
+ struct vmw_resource *found = NULL;
+ unsigned long res_start = start << PAGE_SHIFT;
+ unsigned long res_end = end << PAGE_SHIFT;
+ unsigned long last_cleaned = 0;
+
+ /*
+ * Find the resource with lowest backup_offset that intersects the
+ * range.
+ */
+ while (cur) {
+ struct vmw_resource *cur_res =
+ container_of(cur, struct vmw_resource, mob_node);
+
+ if (cur_res->backup_offset >= res_end) {
+ cur = cur->rb_left;
+ } else if (cur_res->backup_offset + cur_res->backup_size <=
+ res_start) {
+ cur = cur->rb_right;
+ } else {
+ found = cur_res;
+ cur = cur->rb_left;
+ /* Continue to look for resources with lower offsets */
+ }
+ }
+
+ /*
+ * In order of increasing backup_offset, clean dirty resorces
+ * intersecting the range.
+ */
+ while (found) {
+ if (found->res_dirty) {
+ int ret;
+
+ if (!found->func->clean)
+ return -EINVAL;
+
+ ret = found->func->clean(found);
+ if (ret)
+ return ret;
+
+ found->res_dirty = false;
+ }
+ last_cleaned = found->backup_offset + found->backup_size;
+ cur = rb_next(&found->mob_node);
+ if (!cur)
+ break;
+
+ found = container_of(cur, struct vmw_resource, mob_node);
+ if (found->backup_offset >= res_end)
+ break;
+ }
+
+ /*
+ * Set number of pages allowed prefaulting and fence the buffer object
+ */
+ *num_prefault = 1;
+ if (last_cleaned > res_start) {
+ struct ttm_buffer_object *bo = &vbo->base;
+
+ *num_prefault = __KERNEL_DIV_ROUND_UP(last_cleaned - res_start,
+ PAGE_SIZE);
+ vmw_bo_fence_single(bo, NULL);
+ if (bo->moving)
+ dma_fence_put(bo->moving);
+ bo->moving = dma_fence_get
+ (dma_resv_get_excl(bo->base.resv));
+ }
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
index 984e588c62ca..3b7438b2d289 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_resource_priv.h
@@ -71,6 +71,13 @@ struct vmw_user_resource_conv {
* @commit_notify: If the resource is a command buffer managed resource,
* callback to notify that a define or remove command
* has been committed to the device.
+ * @dirty_alloc: Allocate a dirty tracker. NULL if dirty-tracking is not
+ * supported.
+ * @dirty_free: Free the dirty tracker.
+ * @dirty_sync: Upload the dirty mob contents to the resource.
+ * @dirty_add_range: Add a sequential dirty range to the resource
+ * dirty tracker.
+ * @clean: Clean the resource.
*/
struct vmw_res_func {
enum vmw_res_type res_type;
@@ -90,6 +97,12 @@ struct vmw_res_func {
struct ttm_validate_buffer *val_buf);
void (*commit_notify)(struct vmw_resource *res,
enum vmw_cmdbuf_res_state state);
+ int (*dirty_alloc)(struct vmw_resource *res);
+ void (*dirty_free)(struct vmw_resource *res);
+ int (*dirty_sync)(struct vmw_resource *res);
+ void (*dirty_range_add)(struct vmw_resource *res, size_t start,
+ size_t end);
+ int (*clean)(struct vmw_resource *res);
};
/**
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
index de0530b4dc1b..32b9131b2bae 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_surface.c
@@ -68,6 +68,20 @@ struct vmw_surface_offset {
uint32_t bo_offset;
};
+/**
+ * vmw_surface_dirty - Surface dirty-tracker
+ * @cache: Cached layout information of the surface.
+ * @size: Accounting size for the struct vmw_surface_dirty.
+ * @num_subres: Number of subresources.
+ * @boxes: Array of SVGA3dBoxes indicating dirty regions. One per subresource.
+ */
+struct vmw_surface_dirty {
+ struct svga3dsurface_cache cache;
+ size_t size;
+ u32 num_subres;
+ SVGA3dBox boxes[0];
+};
+
static void vmw_user_surface_free(struct vmw_resource *res);
static struct vmw_resource *
vmw_user_surface_base_to_res(struct ttm_base_object *base);
@@ -96,6 +110,13 @@ vmw_gb_surface_reference_internal(struct drm_device *dev,
struct drm_vmw_gb_surface_ref_ext_rep *rep,
struct drm_file *file_priv);
+static void vmw_surface_dirty_free(struct vmw_resource *res);
+static int vmw_surface_dirty_alloc(struct vmw_resource *res);
+static int vmw_surface_dirty_sync(struct vmw_resource *res);
+static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start,
+ size_t end);
+static int vmw_surface_clean(struct vmw_resource *res);
+
static const struct vmw_user_resource_conv user_surface_conv = {
.object_type = VMW_RES_SURFACE,
.base_obj_to_res = vmw_user_surface_base_to_res,
@@ -133,7 +154,12 @@ static const struct vmw_res_func vmw_gb_surface_func = {
.create = vmw_gb_surface_create,
.destroy = vmw_gb_surface_destroy,
.bind = vmw_gb_surface_bind,
- .unbind = vmw_gb_surface_unbind
+ .unbind = vmw_gb_surface_unbind,
+ .dirty_alloc = vmw_surface_dirty_alloc,
+ .dirty_free = vmw_surface_dirty_free,
+ .dirty_sync = vmw_surface_dirty_sync,
+ .dirty_range_add = vmw_surface_dirty_range_add,
+ .clean = vmw_surface_clean,
};
/**
@@ -639,6 +665,7 @@ static void vmw_user_surface_free(struct vmw_resource *res)
struct vmw_private *dev_priv = srf->res.dev_priv;
uint32_t size = user_srf->size;
+ WARN_ON_ONCE(res->dirty);
if (user_srf->master)
drm_master_put(&user_srf->master);
kfree(srf->offsets);
@@ -1166,10 +1193,16 @@ static int vmw_gb_surface_bind(struct vmw_resource *res,
cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_SURFACE;
cmd2->header.size = sizeof(cmd2->body);
cmd2->body.sid = res->id;
- res->backup_dirty = false;
}
vmw_fifo_commit(dev_priv, submit_size);
+ if (res->backup->dirty && res->backup_dirty) {
+ /* We've just made a full upload. Cear dirty regions. */
+ vmw_bo_dirty_clear_res(res);
+ }
+
+ res->backup_dirty = false;
+
return 0;
}
@@ -1634,7 +1667,8 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
}
}
} else if (req->base.drm_surface_flags &
- drm_vmw_surface_flag_create_buffer)
+ (drm_vmw_surface_flag_create_buffer |
+ drm_vmw_surface_flag_coherent))
ret = vmw_user_bo_alloc(dev_priv, tfile,
res->backup_size,
req->base.drm_surface_flags &
@@ -1648,6 +1682,26 @@ vmw_gb_surface_define_internal(struct drm_device *dev,
goto out_unlock;
}
+ if (req->base.drm_surface_flags & drm_vmw_surface_flag_coherent) {
+ struct vmw_buffer_object *backup = res->backup;
+
+ ttm_bo_reserve(&backup->base, false, false, NULL);
+ if (!res->func->dirty_alloc)
+ ret = -EINVAL;
+ if (!ret)
+ ret = vmw_bo_dirty_add(backup);
+ if (!ret) {
+ res->coherent = true;
+ ret = res->func->dirty_alloc(res);
+ }
+ ttm_bo_unreserve(&backup->base);
+ if (ret) {
+ vmw_resource_unreference(&res);
+ goto out_unlock;
+ }
+
+ }
+
tmp = vmw_resource_reference(res);
ret = ttm_prime_object_init(tfile, res->backup_size, &user_srf->prime,
req->base.drm_surface_flags &
@@ -1756,3 +1810,338 @@ out_bad_resource:
return ret;
}
+
+/**
+ * vmw_subres_dirty_add - Add a dirty region to a subresource
+ * @dirty: The surfaces's dirty tracker.
+ * @loc_start: The location corresponding to the start of the region.
+ * @loc_end: The location corresponding to the end of the region.
+ *
+ * As we are assuming that @loc_start and @loc_end represent a sequential
+ * range of backing store memory, if the region spans multiple lines then
+ * regardless of the x coordinate, the full lines are dirtied.
+ * Correspondingly if the region spans multiple z slices, then full rather
+ * than partial z slices are dirtied.
+ */
+static void vmw_subres_dirty_add(struct vmw_surface_dirty *dirty,
+ const struct svga3dsurface_loc *loc_start,
+ const struct svga3dsurface_loc *loc_end)
+{
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ SVGA3dBox *box = &dirty->boxes[loc_start->sub_resource];
+ u32 mip = loc_start->sub_resource % cache->num_mip_levels;
+ const struct drm_vmw_size *size = &cache->mip[mip].size;
+ u32 box_c2 = box->z + box->d;
+
+ if (WARN_ON(loc_start->sub_resource >= dirty->num_subres))
+ return;
+
+ if (box->d == 0 || box->z > loc_start->z)
+ box->z = loc_start->z;
+ if (box_c2 < loc_end->z)
+ box->d = loc_end->z - box->z;
+
+ if (loc_start->z + 1 == loc_end->z) {
+ box_c2 = box->y + box->h;
+ if (box->h == 0 || box->y > loc_start->y)
+ box->y = loc_start->y;
+ if (box_c2 < loc_end->y)
+ box->h = loc_end->y - box->y;
+
+ if (loc_start->y + 1 == loc_end->y) {
+ box_c2 = box->x + box->w;
+ if (box->w == 0 || box->x > loc_start->x)
+ box->x = loc_start->x;
+ if (box_c2 < loc_end->x)
+ box->w = loc_end->x - box->x;
+ } else {
+ box->x = 0;
+ box->w = size->width;
+ }
+ } else {
+ box->y = 0;
+ box->h = size->height;
+ box->x = 0;
+ box->w = size->width;
+ }
+}
+
+/**
+ * vmw_subres_dirty_full - Mark a full subresource as dirty
+ * @dirty: The surface's dirty tracker.
+ * @subres: The subresource
+ */
+static void vmw_subres_dirty_full(struct vmw_surface_dirty *dirty, u32 subres)
+{
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ u32 mip = subres % cache->num_mip_levels;
+ const struct drm_vmw_size *size = &cache->mip[mip].size;
+ SVGA3dBox *box = &dirty->boxes[subres];
+
+ box->x = 0;
+ box->y = 0;
+ box->z = 0;
+ box->w = size->width;
+ box->h = size->height;
+ box->d = size->depth;
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for texture
+ * surfaces.
+ */
+static void vmw_surface_tex_dirty_range_add(struct vmw_resource *res,
+ size_t start, size_t end)
+{
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ size_t backup_end = res->backup_offset + res->backup_size;
+ struct svga3dsurface_loc loc1, loc2;
+ const struct svga3dsurface_cache *cache;
+
+ start = max_t(size_t, start, res->backup_offset) - res->backup_offset;
+ end = min(end, backup_end) - res->backup_offset;
+ cache = &dirty->cache;
+ svga3dsurface_get_loc(cache, &loc1, start);
+ svga3dsurface_get_loc(cache, &loc2, end - 1);
+ svga3dsurface_inc_loc(cache, &loc2);
+
+ if (loc1.sub_resource + 1 == loc2.sub_resource) {
+ /* Dirty range covers a single sub-resource */
+ vmw_subres_dirty_add(dirty, &loc1, &loc2);
+ } else {
+ /* Dirty range covers multiple sub-resources */
+ struct svga3dsurface_loc loc_min, loc_max;
+ u32 sub_res;
+
+ svga3dsurface_max_loc(cache, loc1.sub_resource, &loc_max);
+ vmw_subres_dirty_add(dirty, &loc1, &loc_max);
+ svga3dsurface_min_loc(cache, loc2.sub_resource - 1, &loc_min);
+ vmw_subres_dirty_add(dirty, &loc_min, &loc2);
+ for (sub_res = loc1.sub_resource + 1;
+ sub_res < loc2.sub_resource - 1; ++sub_res)
+ vmw_subres_dirty_full(dirty, sub_res);
+ }
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for buffer
+ * surfaces.
+ */
+static void vmw_surface_buf_dirty_range_add(struct vmw_resource *res,
+ size_t start, size_t end)
+{
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ size_t backup_end = res->backup_offset + cache->mip_chain_bytes;
+ SVGA3dBox *box = &dirty->boxes[0];
+ u32 box_c2;
+
+ box->h = box->d = 1;
+ start = max_t(size_t, start, res->backup_offset) - res->backup_offset;
+ end = min(end, backup_end) - res->backup_offset;
+ box_c2 = box->x + box->w;
+ if (box->w == 0 || box->x > start)
+ box->x = start;
+ if (box_c2 < end)
+ box->w = end - box->x;
+}
+
+/*
+ * vmw_surface_tex_dirty_add_range - The dirty_add_range callback for surfaces
+ */
+static void vmw_surface_dirty_range_add(struct vmw_resource *res, size_t start,
+ size_t end)
+{
+ struct vmw_surface *srf = vmw_res_to_srf(res);
+
+ if (WARN_ON(end <= res->backup_offset ||
+ start >= res->backup_offset + res->backup_size))
+ return;
+
+ if (srf->format == SVGA3D_BUFFER)
+ vmw_surface_buf_dirty_range_add(res, start, end);
+ else
+ vmw_surface_tex_dirty_range_add(res, start, end);
+}
+
+/*
+ * vmw_surface_dirty_sync - The surface's dirty_sync callback.
+ */
+static int vmw_surface_dirty_sync(struct vmw_resource *res)
+{
+ struct vmw_private *dev_priv = res->dev_priv;
+ bool has_dx = 0;
+ u32 i, num_dirty;
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ size_t alloc_size;
+ const struct svga3dsurface_cache *cache = &dirty->cache;
+ struct {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdDXUpdateSubResource body;
+ } *cmd1;
+ struct {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdUpdateGBImage body;
+ } *cmd2;
+ void *cmd;
+
+ num_dirty = 0;
+ for (i = 0; i < dirty->num_subres; ++i) {
+ const SVGA3dBox *box = &dirty->boxes[i];
+
+ if (box->d)
+ num_dirty++;
+ }
+
+ if (!num_dirty)
+ goto out;
+
+ alloc_size = num_dirty * ((has_dx) ? sizeof(*cmd1) : sizeof(*cmd2));
+ cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size);
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd1 = cmd;
+ cmd2 = cmd;
+
+ for (i = 0; i < dirty->num_subres; ++i) {
+ const SVGA3dBox *box = &dirty->boxes[i];
+
+ if (!box->d)
+ continue;
+
+ /*
+ * DX_UPDATE_SUBRESOURCE is aware of array surfaces.
+ * UPDATE_GB_IMAGE is not.
+ */
+ if (has_dx) {
+ cmd1->header.id = SVGA_3D_CMD_DX_UPDATE_SUBRESOURCE;
+ cmd1->header.size = sizeof(cmd1->body);
+ cmd1->body.sid = res->id;
+ cmd1->body.subResource = i;
+ cmd1->body.box = *box;
+ cmd1++;
+ } else {
+ cmd2->header.id = SVGA_3D_CMD_UPDATE_GB_IMAGE;
+ cmd2->header.size = sizeof(cmd2->body);
+ cmd2->body.image.sid = res->id;
+ cmd2->body.image.face = i / cache->num_mip_levels;
+ cmd2->body.image.mipmap = i -
+ (cache->num_mip_levels * cmd2->body.image.face);
+ cmd2->body.box = *box;
+ cmd2++;
+ }
+
+ }
+ vmw_fifo_commit(dev_priv, alloc_size);
+ out:
+ memset(&dirty->boxes[0], 0, sizeof(dirty->boxes[0]) *
+ dirty->num_subres);
+
+ return 0;
+}
+
+/*
+ * vmw_surface_dirty_alloc - The surface's dirty_alloc callback.
+ */
+static int vmw_surface_dirty_alloc(struct vmw_resource *res)
+{
+ struct vmw_surface *srf = vmw_res_to_srf(res);
+ struct vmw_surface_dirty *dirty;
+ u32 num_layers = 1;
+ u32 num_mip;
+ u32 num_subres;
+ u32 num_samples;
+ size_t dirty_size, acc_size;
+ static struct ttm_operation_ctx ctx = {
+ .interruptible = false,
+ .no_wait_gpu = false
+ };
+ int ret;
+
+ if (srf->array_size)
+ num_layers = srf->array_size;
+ else if (srf->flags & SVGA3D_SURFACE_CUBEMAP)
+ num_layers *= SVGA3D_MAX_SURFACE_FACES;
+
+ num_mip = srf->mip_levels[0];
+ if (!num_mip)
+ num_mip = 1;
+
+ num_subres = num_layers * num_mip;
+ dirty_size = sizeof(*dirty) + num_subres * sizeof(dirty->boxes[0]);
+ acc_size = ttm_round_pot(dirty_size);
+ ret = ttm_mem_global_alloc(vmw_mem_glob(res->dev_priv),
+ acc_size, &ctx);
+ if (ret) {
+ VMW_DEBUG_USER("Out of graphics memory for surface "
+ "dirty tracker.\n");
+ return ret;
+ }
+
+ dirty = kvzalloc(dirty_size, GFP_KERNEL);
+ if (!dirty) {
+ ret = -ENOMEM;
+ goto out_no_dirty;
+ }
+
+ num_samples = max_t(u32, 1, srf->multisample_count);
+ ret = svga3dsurface_setup_cache(&srf->base_size, srf->format, num_mip,
+ num_layers, num_samples, &dirty->cache);
+ if (ret)
+ goto out_no_cache;
+
+ dirty->num_subres = num_subres;
+ dirty->size = acc_size;
+ res->dirty = (struct vmw_resource_dirty *) dirty;
+
+ return 0;
+
+out_no_cache:
+ kvfree(dirty);
+out_no_dirty:
+ ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size);
+ return ret;
+}
+
+/*
+ * vmw_surface_dirty_free - The surface's dirty_free callback
+ */
+static void vmw_surface_dirty_free(struct vmw_resource *res)
+{
+ struct vmw_surface_dirty *dirty =
+ (struct vmw_surface_dirty *) res->dirty;
+ size_t acc_size = dirty->size;
+
+ kvfree(dirty);
+ ttm_mem_global_free(vmw_mem_glob(res->dev_priv), acc_size);
+ res->dirty = NULL;
+}
+
+/*
+ * vmw_surface_clean - The surface's clean callback
+ */
+static int vmw_surface_clean(struct vmw_resource *res)
+{
+ struct vmw_private *dev_priv = res->dev_priv;
+ size_t alloc_size;
+ struct {
+ SVGA3dCmdHeader header;
+ SVGA3dCmdReadbackGBSurface body;
+ } *cmd;
+
+ alloc_size = sizeof(*cmd);
+ cmd = VMW_FIFO_RESERVE(dev_priv, alloc_size);
+ if (!cmd)
+ return -ENOMEM;
+
+ cmd->header.id = SVGA_3D_CMD_READBACK_GB_SURFACE;
+ cmd->header.size = sizeof(cmd->body);
+ cmd->body.sid = res->id;
+ vmw_fifo_commit(dev_priv, alloc_size);
+
+ return 0;
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
index 5a7b8bb420de..ce288756531b 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_glue.c
@@ -29,10 +29,23 @@
int vmw_mmap(struct file *filp, struct vm_area_struct *vma)
{
+ static const struct vm_operations_struct vmw_vm_ops = {
+ .pfn_mkwrite = vmw_bo_vm_mkwrite,
+ .page_mkwrite = vmw_bo_vm_mkwrite,
+ .fault = vmw_bo_vm_fault,
+ .open = ttm_bo_vm_open,
+ .close = ttm_bo_vm_close
+ };
struct drm_file *file_priv = filp->private_data;
struct vmw_private *dev_priv = vmw_priv(file_priv->minor->dev);
+ int ret = ttm_bo_mmap(filp, vma, &dev_priv->bdev);
- return ttm_bo_mmap(filp, vma, &dev_priv->bdev);
+ if (ret)
+ return ret;
+
+ vma->vm_ops = &vmw_vm_ops;
+
+ return 0;
}
/* struct vmw_validation_mem callback */
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
index 7bff3628fc54..e69bc373ae2e 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.c
@@ -33,6 +33,8 @@
* struct vmw_validation_bo_node - Buffer object validation metadata.
* @base: Metadata used for TTM reservation- and validation.
* @hash: A hash entry used for the duplicate detection hash table.
+ * @coherent_count: If switching backup buffers, number of new coherent
+ * resources that will have this buffer as a backup buffer.
* @as_mob: Validate as mob.
* @cpu_blit: Validate for cpu blit access.
*
@@ -42,6 +44,7 @@
struct vmw_validation_bo_node {
struct ttm_validate_buffer base;
struct drm_hash_item hash;
+ unsigned int coherent_count;
u32 as_mob : 1;
u32 cpu_blit : 1;
};
@@ -459,6 +462,19 @@ int vmw_validation_res_reserve(struct vmw_validation_context *ctx,
if (ret)
goto out_unreserve;
}
+
+ if (val->switching_backup && val->new_backup &&
+ res->coherent) {
+ struct vmw_validation_bo_node *bo_node =
+ vmw_validation_find_bo_dup(ctx,
+ val->new_backup);
+
+ if (WARN_ON(!bo_node)) {
+ ret = -EINVAL;
+ goto out_unreserve;
+ }
+ bo_node->coherent_count++;
+ }
}
return 0;
@@ -565,6 +581,9 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
int ret;
list_for_each_entry(entry, &ctx->bo_list, base.head) {
+ struct vmw_buffer_object *vbo =
+ container_of(entry->base.bo, typeof(*vbo), base);
+
if (entry->cpu_blit) {
struct ttm_operation_ctx ctx = {
.interruptible = intr,
@@ -579,6 +598,27 @@ int vmw_validation_bo_validate(struct vmw_validation_context *ctx, bool intr)
}
if (ret)
return ret;
+
+ /*
+ * Rather than having the resource code allocating the bo
+ * dirty tracker in resource_unreserve() where we can't fail,
+ * Do it here when validating the buffer object.
+ */
+ if (entry->coherent_count) {
+ unsigned int coherent_count = entry->coherent_count;
+
+ while (coherent_count) {
+ ret = vmw_bo_dirty_add(vbo);
+ if (ret)
+ return ret;
+
+ coherent_count--;
+ }
+ entry->coherent_count -= coherent_count;
+ }
+
+ if (vbo->dirty)
+ vmw_bo_dirty_scan(vbo);
}
return 0;
}
@@ -604,7 +644,8 @@ int vmw_validation_res_validate(struct vmw_validation_context *ctx, bool intr)
struct vmw_resource *res = val->res;
struct vmw_buffer_object *backup = res->backup;
- ret = vmw_resource_validate(res, intr);
+ ret = vmw_resource_validate(res, intr, val->dirty_set &&
+ val->dirty);
if (ret) {
if (ret != -ERESTARTSYS)
DRM_ERROR("Failed to validate resource.\n");
@@ -831,3 +872,34 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx,
ctx->mem_size_left += size;
return 0;
}
+
+/**
+ * vmw_validation_bo_backoff - Unreserve buffer objects registered with a
+ * validation context
+ * @ctx: The validation context
+ *
+ * This function unreserves the buffer objects previously reserved using
+ * vmw_validation_bo_reserve. It's typically used as part of an error path
+ */
+void vmw_validation_bo_backoff(struct vmw_validation_context *ctx)
+{
+ struct vmw_validation_bo_node *entry;
+
+ /*
+ * Switching coherent resource backup buffers failed.
+ * Release corresponding buffer object dirty trackers.
+ */
+ list_for_each_entry(entry, &ctx->bo_list, base.head) {
+ if (entry->coherent_count) {
+ unsigned int coherent_count = entry->coherent_count;
+ struct vmw_buffer_object *vbo =
+ container_of(entry->base.bo, typeof(*vbo),
+ base);
+
+ while (coherent_count--)
+ vmw_bo_dirty_release(vbo);
+ }
+ }
+
+ ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list);
+}
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
index 71ce4b318850..739906d1b3eb 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_validation.h
@@ -174,20 +174,6 @@ vmw_validation_bo_reserve(struct vmw_validation_context *ctx,
}
/**
- * vmw_validation_bo_backoff - Unreserve buffer objects registered with a
- * validation context
- * @ctx: The validation context
- *
- * This function unreserves the buffer objects previously reserved using
- * vmw_validation_bo_reserve. It's typically used as part of an error path
- */
-static inline void
-vmw_validation_bo_backoff(struct vmw_validation_context *ctx)
-{
- ttm_eu_backoff_reservation(&ctx->ticket, &ctx->bo_list);
-}
-
-/**
* vmw_validation_bo_fence - Unreserve and fence buffer objects registered
* with a validation context
* @ctx: The validation context
@@ -269,4 +255,6 @@ int vmw_validation_preload_res(struct vmw_validation_context *ctx,
unsigned int size);
void vmw_validation_res_set_dirty(struct vmw_validation_context *ctx,
void *val_private, u32 dirty);
+void vmw_validation_bo_backoff(struct vmw_validation_context *ctx);
+
#endif
diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h
index 54fa457b26ab..65e399d280f7 100644
--- a/include/drm/ttm/ttm_bo_api.h
+++ b/include/drm/ttm/ttm_bo_api.h
@@ -727,4 +727,18 @@ static inline bool ttm_bo_uses_embedded_gem_object(struct ttm_buffer_object *bo)
{
return bo->base.dev != NULL;
}
+
+/* Default number of pre-faulted pages in the TTM fault handler */
+#define TTM_BO_VM_NUM_PREFAULT 16
+
+vm_fault_t ttm_bo_vm_reserve(struct ttm_buffer_object *bo,
+ struct vm_fault *vmf);
+
+vm_fault_t ttm_bo_vm_fault_reserved(struct vm_fault *vmf,
+ pgprot_t prot,
+ pgoff_t num_prefault);
+
+void ttm_bo_vm_open(struct vm_area_struct *vma);
+
+void ttm_bo_vm_close(struct vm_area_struct *vma);
#endif
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 93d5cf0bc716..0b84e13e88e2 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -216,7 +216,6 @@ static inline int is_swap_pmd(pmd_t pmd)
static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
struct vm_area_struct *vma)
{
- VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
if (is_swap_pmd(*pmd) || pmd_trans_huge(*pmd) || pmd_devmap(*pmd))
return __pmd_trans_huge_lock(pmd, vma);
else
@@ -225,7 +224,6 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd,
static inline spinlock_t *pud_trans_huge_lock(pud_t *pud,
struct vm_area_struct *vma)
{
- VM_BUG_ON_VMA(!rwsem_is_locked(&vma->vm_mm->mmap_sem), vma);
if (pud_trans_huge(*pud) || pud_devmap(*pud))
return __pud_trans_huge_lock(pud, vma);
else
diff --git a/include/linux/mm.h b/include/linux/mm.h
index a2adf95b3f9c..f6fb714fa851 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2632,7 +2632,6 @@ typedef int (*pte_fn_t)(pte_t *pte, unsigned long addr, void *data);
extern int apply_to_page_range(struct mm_struct *mm, unsigned long address,
unsigned long size, pte_fn_t fn, void *data);
-
#ifdef CONFIG_PAGE_POISONING
extern bool page_poisoning_enabled(void);
extern void kernel_poison_pages(struct page *page, int numpages, int enable);
@@ -2873,5 +2872,17 @@ static inline int pages_identical(struct page *page1, struct page *page2)
return !memcmp_pages(page1, page2);
}
+#ifdef CONFIG_MAPPING_DIRTY_HELPERS
+unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
+ pgoff_t first_index, pgoff_t nr,
+ pgoff_t bitmap_pgoff,
+ unsigned long *bitmap,
+ pgoff_t *start,
+ pgoff_t *end);
+
+unsigned long wp_shared_mapping_range(struct address_space *mapping,
+ pgoff_t first_index, pgoff_t nr);
+#endif
+
#endif /* __KERNEL__ */
#endif /* _LINUX_MM_H */
diff --git a/include/linux/pagewalk.h b/include/linux/pagewalk.h
index bddd9759bab9..6ec82e92c87f 100644
--- a/include/linux/pagewalk.h
+++ b/include/linux/pagewalk.h
@@ -24,6 +24,9 @@ struct mm_walk;
* "do page table walk over the current vma", returning
* a negative value means "abort current page table walk
* right now" and returning 1 means "skip the current vma"
+ * @pre_vma: if set, called before starting walk on a non-null vma.
+ * @post_vma: if set, called after a walk on a non-null vma, provided
+ * that @pre_vma and the vma walk succeeded.
*/
struct mm_walk_ops {
int (*pud_entry)(pud_t *pud, unsigned long addr,
@@ -39,6 +42,9 @@ struct mm_walk_ops {
struct mm_walk *walk);
int (*test_walk)(unsigned long addr, unsigned long next,
struct mm_walk *walk);
+ int (*pre_vma)(unsigned long start, unsigned long end,
+ struct mm_walk *walk);
+ void (*post_vma)(struct mm_walk *walk);
};
/**
@@ -62,5 +68,8 @@ int walk_page_range(struct mm_struct *mm, unsigned long start,
void *private);
int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
void *private);
+int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
+ pgoff_t nr, const struct mm_walk_ops *ops,
+ void *private);
#endif /* _LINUX_PAGEWALK_H */
diff --git a/include/uapi/drm/vmwgfx_drm.h b/include/uapi/drm/vmwgfx_drm.h
index 399f58317cff..02cab33f2f25 100644
--- a/include/uapi/drm/vmwgfx_drm.h
+++ b/include/uapi/drm/vmwgfx_drm.h
@@ -891,11 +891,13 @@ struct drm_vmw_shader_arg {
* surface.
* @drm_vmw_surface_flag_create_buffer: Create a backup buffer if none is
* given.
+ * @drm_vmw_surface_flag_coherent: Back surface with coherent memory.
*/
enum drm_vmw_surface_flags {
drm_vmw_surface_flag_shareable = (1 << 0),
drm_vmw_surface_flag_scanout = (1 << 1),
- drm_vmw_surface_flag_create_buffer = (1 << 2)
+ drm_vmw_surface_flag_create_buffer = (1 << 2),
+ drm_vmw_surface_flag_coherent = (1 << 3),
};
/**
diff --git a/mm/Kconfig b/mm/Kconfig
index a5dae9a7eb51..550f7aceb679 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -736,4 +736,7 @@ config ARCH_HAS_PTE_SPECIAL
config ARCH_HAS_HUGEPD
bool
+config MAPPING_DIRTY_HELPERS
+ bool
+
endmenu
diff --git a/mm/Makefile b/mm/Makefile
index d996846697ef..1937cc251883 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -107,3 +107,4 @@ obj-$(CONFIG_PERCPU_STATS) += percpu-stats.o
obj-$(CONFIG_ZONE_DEVICE) += memremap.o
obj-$(CONFIG_HMM_MIRROR) += hmm.o
obj-$(CONFIG_MEMFD_CREATE) += memfd.o
+obj-$(CONFIG_MAPPING_DIRTY_HELPERS) += mapping_dirty_helpers.o
diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c
new file mode 100644
index 000000000000..71070dda9643
--- /dev/null
+++ b/mm/mapping_dirty_helpers.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/pagewalk.h>
+#include <linux/hugetlb.h>
+#include <linux/bitops.h>
+#include <linux/mmu_notifier.h>
+#include <asm/cacheflush.h>
+#include <asm/tlbflush.h>
+
+/**
+ * struct wp_walk - Private struct for pagetable walk callbacks
+ * @range: Range for mmu notifiers
+ * @tlbflush_start: Address of first modified pte
+ * @tlbflush_end: Address of last modified pte + 1
+ * @total: Total number of modified ptes
+ */
+struct wp_walk {
+ struct mmu_notifier_range range;
+ unsigned long tlbflush_start;
+ unsigned long tlbflush_end;
+ unsigned long total;
+};
+
+/**
+ * wp_pte - Write-protect a pte
+ * @pte: Pointer to the pte
+ * @addr: The virtual page address
+ * @walk: pagetable walk callback argument
+ *
+ * The function write-protects a pte and records the range in
+ * virtual address space of touched ptes for efficient range TLB flushes.
+ */
+static int wp_pte(pte_t *pte, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct wp_walk *wpwalk = walk->private;
+ pte_t ptent = *pte;
+
+ if (pte_write(ptent)) {
+ pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
+
+ ptent = pte_wrprotect(old_pte);
+ ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
+ wpwalk->total++;
+ wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
+ wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
+ addr + PAGE_SIZE);
+ }
+
+ return 0;
+}
+
+/**
+ * struct clean_walk - Private struct for the clean_record_pte function.
+ * @base: struct wp_walk we derive from
+ * @bitmap_pgoff: Address_space Page offset of the first bit in @bitmap
+ * @bitmap: Bitmap with one bit for each page offset in the address_space range
+ * covered.
+ * @start: Address_space page offset of first modified pte relative
+ * to @bitmap_pgoff
+ * @end: Address_space page offset of last modified pte relative
+ * to @bitmap_pgoff
+ */
+struct clean_walk {
+ struct wp_walk base;
+ pgoff_t bitmap_pgoff;
+ unsigned long *bitmap;
+ pgoff_t start;
+ pgoff_t end;
+};
+
+#define to_clean_walk(_wpwalk) container_of(_wpwalk, struct clean_walk, base)
+
+/**
+ * clean_record_pte - Clean a pte and record its address space offset in a
+ * bitmap
+ * @pte: Pointer to the pte
+ * @addr: The virtual page address
+ * @walk: pagetable walk callback argument
+ *
+ * The function cleans a pte and records the range in
+ * virtual address space of touched ptes for efficient TLB flushes.
+ * It also records dirty ptes in a bitmap representing page offsets
+ * in the address_space, as well as the first and last of the bits
+ * touched.
+ */
+static int clean_record_pte(pte_t *pte, unsigned long addr,
+ unsigned long end, struct mm_walk *walk)
+{
+ struct wp_walk *wpwalk = walk->private;
+ struct clean_walk *cwalk = to_clean_walk(wpwalk);
+ pte_t ptent = *pte;
+
+ if (pte_dirty(ptent)) {
+ pgoff_t pgoff = ((addr - walk->vma->vm_start) >> PAGE_SHIFT) +
+ walk->vma->vm_pgoff - cwalk->bitmap_pgoff;
+ pte_t old_pte = ptep_modify_prot_start(walk->vma, addr, pte);
+
+ ptent = pte_mkclean(old_pte);
+ ptep_modify_prot_commit(walk->vma, addr, pte, old_pte, ptent);
+
+ wpwalk->total++;
+ wpwalk->tlbflush_start = min(wpwalk->tlbflush_start, addr);
+ wpwalk->tlbflush_end = max(wpwalk->tlbflush_end,
+ addr + PAGE_SIZE);
+
+ __set_bit(pgoff, cwalk->bitmap);
+ cwalk->start = min(cwalk->start, pgoff);
+ cwalk->end = max(cwalk->end, pgoff + 1);
+ }
+
+ return 0;
+}
+
+/* wp_clean_pmd_entry - The pagewalk pmd callback. */
+static int wp_clean_pmd_entry(pmd_t *pmd, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ /* Dirty-tracking should be handled on the pte level */
+ pmd_t pmdval = pmd_read_atomic(pmd);
+
+ if (pmd_trans_huge(pmdval) || pmd_devmap(pmdval))
+ WARN_ON(pmd_write(pmdval) || pmd_dirty(pmdval));
+
+ return 0;
+}
+
+/* wp_clean_pud_entry - The pagewalk pud callback. */
+static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end,
+ struct mm_walk *walk)
+{
+ /* Dirty-tracking should be handled on the pte level */
+ pud_t pudval = READ_ONCE(*pud);
+
+ if (pud_trans_huge(pudval) || pud_devmap(pudval))
+ WARN_ON(pud_write(pudval) || pud_dirty(pudval));
+
+ return 0;
+}
+
+/*
+ * wp_clean_pre_vma - The pagewalk pre_vma callback.
+ *
+ * The pre_vma callback performs the cache flush, stages the tlb flush
+ * and calls the necessary mmu notifiers.
+ */
+static int wp_clean_pre_vma(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ struct wp_walk *wpwalk = walk->private;
+
+ wpwalk->tlbflush_start = end;
+ wpwalk->tlbflush_end = start;
+
+ mmu_notifier_range_init(&wpwalk->range, MMU_NOTIFY_PROTECTION_PAGE, 0,
+ walk->vma, walk->mm, start, end);
+ mmu_notifier_invalidate_range_start(&wpwalk->range);
+ flush_cache_range(walk->vma, start, end);
+
+ /*
+ * We're not using tlb_gather_mmu() since typically
+ * only a small subrange of PTEs are affected, whereas
+ * tlb_gather_mmu() records the full range.
+ */
+ inc_tlb_flush_pending(walk->mm);
+
+ return 0;
+}
+
+/*
+ * wp_clean_post_vma - The pagewalk post_vma callback.
+ *
+ * The post_vma callback performs the tlb flush and calls necessary mmu
+ * notifiers.
+ */
+static void wp_clean_post_vma(struct mm_walk *walk)
+{
+ struct wp_walk *wpwalk = walk->private;
+
+ if (mm_tlb_flush_nested(walk->mm))
+ flush_tlb_range(walk->vma, wpwalk->range.start,
+ wpwalk->range.end);
+ else if (wpwalk->tlbflush_end > wpwalk->tlbflush_start)
+ flush_tlb_range(walk->vma, wpwalk->tlbflush_start,
+ wpwalk->tlbflush_end);
+
+ mmu_notifier_invalidate_range_end(&wpwalk->range);
+ dec_tlb_flush_pending(walk->mm);
+}
+
+/*
+ * wp_clean_test_walk - The pagewalk test_walk callback.
+ *
+ * Won't perform dirty-tracking on COW, read-only or HUGETLB vmas.
+ */
+static int wp_clean_test_walk(unsigned long start, unsigned long end,
+ struct mm_walk *walk)
+{
+ unsigned long vm_flags = READ_ONCE(walk->vma->vm_flags);
+
+ /* Skip non-applicable VMAs */
+ if ((vm_flags & (VM_SHARED | VM_MAYWRITE | VM_HUGETLB)) !=
+ (VM_SHARED | VM_MAYWRITE))
+ return 1;
+
+ return 0;
+}
+
+static const struct mm_walk_ops clean_walk_ops = {
+ .pte_entry = clean_record_pte,
+ .pmd_entry = wp_clean_pmd_entry,
+ .pud_entry = wp_clean_pud_entry,
+ .test_walk = wp_clean_test_walk,
+ .pre_vma = wp_clean_pre_vma,
+ .post_vma = wp_clean_post_vma
+};
+
+static const struct mm_walk_ops wp_walk_ops = {
+ .pte_entry = wp_pte,
+ .pmd_entry = wp_clean_pmd_entry,
+ .pud_entry = wp_clean_pud_entry,
+ .test_walk = wp_clean_test_walk,
+ .pre_vma = wp_clean_pre_vma,
+ .post_vma = wp_clean_post_vma
+};
+
+/**
+ * wp_shared_mapping_range - Write-protect all ptes in an address space range
+ * @mapping: The address_space we want to write protect
+ * @first_index: The first page offset in the range
+ * @nr: Number of incremental page offsets to cover
+ *
+ * Note: This function currently skips transhuge page-table entries, since
+ * it's intended for dirty-tracking on the PTE level. It will warn on
+ * encountering transhuge write-enabled entries, though, and can easily be
+ * extended to handle them as well.
+ *
+ * Return: The number of ptes actually write-protected. Note that
+ * already write-protected ptes are not counted.
+ */
+unsigned long wp_shared_mapping_range(struct address_space *mapping,
+ pgoff_t first_index, pgoff_t nr)
+{
+ struct wp_walk wpwalk = { .total = 0 };
+
+ i_mmap_lock_read(mapping);
+ WARN_ON(walk_page_mapping(mapping, first_index, nr, &wp_walk_ops,
+ &wpwalk));
+ i_mmap_unlock_read(mapping);
+
+ return wpwalk.total;
+}
+EXPORT_SYMBOL_GPL(wp_shared_mapping_range);
+
+/**
+ * clean_record_shared_mapping_range - Clean and record all ptes in an
+ * address space range
+ * @mapping: The address_space we want to clean
+ * @first_index: The first page offset in the range
+ * @nr: Number of incremental page offsets to cover
+ * @bitmap_pgoff: The page offset of the first bit in @bitmap
+ * @bitmap: Pointer to a bitmap of at least @nr bits. The bitmap needs to
+ * cover the whole range @first_index..@first_index + @nr.
+ * @start: Pointer to number of the first set bit in @bitmap.
+ * is modified as new bits are set by the function.
+ * @end: Pointer to the number of the last set bit in @bitmap.
+ * none set. The value is modified as new bits are set by the function.
+ *
+ * Note: When this function returns there is no guarantee that a CPU has
+ * not already dirtied new ptes. However it will not clean any ptes not
+ * reported in the bitmap. The guarantees are as follows:
+ * a) All ptes dirty when the function starts executing will end up recorded
+ * in the bitmap.
+ * b) All ptes dirtied after that will either remain dirty, be recorded in the
+ * bitmap or both.
+ *
+ * If a caller needs to make sure all dirty ptes are picked up and none
+ * additional are added, it first needs to write-protect the address-space
+ * range and make sure new writers are blocked in page_mkwrite() or
+ * pfn_mkwrite(). And then after a TLB flush following the write-protection
+ * pick up all dirty bits.
+ *
+ * Note: This function currently skips transhuge page-table entries, since
+ * it's intended for dirty-tracking on the PTE level. It will warn on
+ * encountering transhuge dirty entries, though, and can easily be extended
+ * to handle them as well.
+ *
+ * Return: The number of dirty ptes actually cleaned.
+ */
+unsigned long clean_record_shared_mapping_range(struct address_space *mapping,
+ pgoff_t first_index, pgoff_t nr,
+ pgoff_t bitmap_pgoff,
+ unsigned long *bitmap,
+ pgoff_t *start,
+ pgoff_t *end)
+{
+ bool none_set = (*start >= *end);
+ struct clean_walk cwalk = {
+ .base = { .total = 0 },
+ .bitmap_pgoff = bitmap_pgoff,
+ .bitmap = bitmap,
+ .start = none_set ? nr : *start,
+ .end = none_set ? 0 : *end,
+ };
+
+ i_mmap_lock_read(mapping);
+ WARN_ON(walk_page_mapping(mapping, first_index, nr, &clean_walk_ops,
+ &cwalk.base));
+ i_mmap_unlock_read(mapping);
+
+ *start = cwalk.start;
+ *end = cwalk.end;
+
+ return cwalk.base.total;
+}
+EXPORT_SYMBOL_GPL(clean_record_shared_mapping_range);
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index d48c2a986ea3..ea0b9e606ad1 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -10,8 +10,9 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte_t *pte;
int err = 0;
const struct mm_walk_ops *ops = walk->ops;
+ spinlock_t *ptl;
- pte = pte_offset_map(pmd, addr);
+ pte = pte_offset_map_lock(walk->mm, pmd, addr, &ptl);
for (;;) {
err = ops->pte_entry(pte, addr, addr + PAGE_SIZE, walk);
if (err)
@@ -22,7 +23,7 @@ static int walk_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end,
pte++;
}
- pte_unmap(pte);
+ pte_unmap_unlock(pte, ptl);
return err;
}
@@ -253,13 +254,23 @@ static int __walk_page_range(unsigned long start, unsigned long end,
{
int err = 0;
struct vm_area_struct *vma = walk->vma;
+ const struct mm_walk_ops *ops = walk->ops;
+
+ if (vma && ops->pre_vma) {
+ err = ops->pre_vma(start, end, walk);
+ if (err)
+ return err;
+ }
if (vma && is_vm_hugetlb_page(vma)) {
- if (walk->ops->hugetlb_entry)
+ if (ops->hugetlb_entry)
err = walk_hugetlb_range(start, end, walk);
} else
err = walk_pgd_range(start, end, walk);
+ if (vma && ops->post_vma)
+ ops->post_vma(walk);
+
return err;
}
@@ -290,6 +301,11 @@ static int __walk_page_range(unsigned long start, unsigned long end,
* its vm_flags. walk_page_test() and @ops->test_walk() are used for this
* purpose.
*
+ * If operations need to be staged before and committed after a vma is walked,
+ * there are two callbacks, pre_vma() and post_vma(). Note that post_vma(),
+ * since it is intended to handle commit-type operations, can't return any
+ * errors.
+ *
* struct mm_walk keeps current values of some common data like vma and pmd,
* which are useful for the access from callbacks. If you want to pass some
* caller-specific data to callbacks, @private should be helpful.
@@ -376,3 +392,80 @@ int walk_page_vma(struct vm_area_struct *vma, const struct mm_walk_ops *ops,
return err;
return __walk_page_range(vma->vm_start, vma->vm_end, &walk);
}
+
+/**
+ * walk_page_mapping - walk all memory areas mapped into a struct address_space.
+ * @mapping: Pointer to the struct address_space
+ * @first_index: First page offset in the address_space
+ * @nr: Number of incremental page offsets to cover
+ * @ops: operation to call during the walk
+ * @private: private data for callbacks' usage
+ *
+ * This function walks all memory areas mapped into a struct address_space.
+ * The walk is limited to only the given page-size index range, but if
+ * the index boundaries cross a huge page-table entry, that entry will be
+ * included.
+ *
+ * Also see walk_page_range() for additional information.
+ *
+ * Locking:
+ * This function can't require that the struct mm_struct::mmap_sem is held,
+ * since @mapping may be mapped by multiple processes. Instead
+ * @mapping->i_mmap_rwsem must be held. This might have implications in the
+ * callbacks, and it's up tho the caller to ensure that the
+ * struct mm_struct::mmap_sem is not needed.
+ *
+ * Also this means that a caller can't rely on the struct
+ * vm_area_struct::vm_flags to be constant across a call,
+ * except for immutable flags. Callers requiring this shouldn't use
+ * this function.
+ *
+ * Return: 0 on success, negative error code on failure, positive number on
+ * caller defined premature termination.
+ */
+int walk_page_mapping(struct address_space *mapping, pgoff_t first_index,
+ pgoff_t nr, const struct mm_walk_ops *ops,
+ void *private)
+{
+ struct mm_walk walk = {
+ .ops = ops,
+ .private = private,
+ };
+ struct vm_area_struct *vma;
+ pgoff_t vba, vea, cba, cea;
+ unsigned long start_addr, end_addr;
+ int err = 0;
+
+ lockdep_assert_held(&mapping->i_mmap_rwsem);
+ vma_interval_tree_foreach(vma, &mapping->i_mmap, first_index,
+ first_index + nr - 1) {
+ /* Clip to the vma */
+ vba = vma->vm_pgoff;
+ vea = vba + vma_pages(vma);
+ cba = first_index;
+ cba = max(cba, vba);
+ cea = first_index + nr;
+ cea = min(cea, vea);
+
+ start_addr = ((cba - vba) << PAGE_SHIFT) + vma->vm_start;
+ end_addr = ((cea - vba) << PAGE_SHIFT) + vma->vm_start;
+ if (start_addr >= end_addr)
+ continue;
+
+ walk.vma = vma;
+ walk.mm = vma->vm_mm;
+
+ err = walk_page_test(vma->vm_start, vma->vm_end, &walk);
+ if (err > 0) {
+ err = 0;
+ break;
+ } else if (err < 0)
+ break;
+
+ err = __walk_page_range(start_addr, end_addr, &walk);
+ if (err)
+ break;
+ }
+
+ return err;
+}