diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 09:15:43 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-14 09:15:43 -0800 |
commit | 08cdc2157966c07d3f986a097ddaa74cee312751 (patch) | |
tree | dad2562768b49876c642c2505813e90a467ae40a /lib | |
parent | aa5ad10f6cca6d42f3fef6cb862e03b220ea19a6 (diff) | |
parent | d6c55c0a20e5059abdde81713ddf6324a946eb3c (diff) | |
download | linux-08cdc2157966c07d3f986a097ddaa74cee312751.tar.bz2 |
Merge tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd
Pull iommufd implementation from Jason Gunthorpe:
"iommufd is the user API to control the IOMMU subsystem as it relates
to managing IO page tables that point at user space memory.
It takes over from drivers/vfio/vfio_iommu_type1.c (aka the VFIO
container) which is the VFIO specific interface for a similar idea.
We see a broad need for extended features, some being highly IOMMU
device specific:
- Binding iommu_domain's to PASID/SSID
- Userspace IO page tables, for ARM, x86 and S390
- Kernel bypassed invalidation of user page tables
- Re-use of the KVM page table in the IOMMU
- Dirty page tracking in the IOMMU
- Runtime Increase/Decrease of IOPTE size
- PRI support with faults resolved in userspace
Many of these HW features exist to support VM use cases - for instance
the combination of PASID, PRI and Userspace IO Page Tables allows an
implementation of DMA Shared Virtual Addressing (vSVA) within a guest.
Dirty tracking enables VM live migration with SRIOV devices and PASID
support allow creating "scalable IOV" devices, among other things.
As these features are fundamental to a VM platform they need to be
uniformly exposed to all the driver families that do DMA into VMs,
which is currently VFIO and VDPA"
For more background, see the extended explanations in Jason's pull request:
https://lore.kernel.org/lkml/Y5dzTU8dlmXTbzoJ@nvidia.com/
* tag 'for-linus-iommufd' of git://git.kernel.org/pub/scm/linux/kernel/git/jgg/iommufd: (62 commits)
iommufd: Change the order of MSI setup
iommufd: Improve a few unclear bits of code
iommufd: Fix comment typos
vfio: Move vfio group specific code into group.c
vfio: Refactor dma APIs for emulated devices
vfio: Wrap vfio group module init/clean code into helpers
vfio: Refactor vfio_device open and close
vfio: Make vfio_device_open() truly device specific
vfio: Swap order of vfio_device_container_register() and open_device()
vfio: Set device->group in helper function
vfio: Create wrappers for group register/unregister
vfio: Move the sanity check of the group to vfio_create_group()
vfio: Simplify vfio_create_group()
iommufd: Allow iommufd to supply /dev/vfio/vfio
vfio: Make vfio_container optionally compiled
vfio: Move container related MODULE_ALIAS statements into container.c
vfio-iommufd: Support iommufd for emulated VFIO devices
vfio-iommufd: Support iommufd for physical VFIO devices
vfio-iommufd: Allow iommufd to be used in place of a container fd
vfio: Use IOMMU_CAP_ENFORCE_CACHE_COHERENCY for vfio_file_enforced_coherent()
...
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig | 4 | ||||
-rw-r--r-- | lib/interval_tree.c | 132 |
2 files changed, 136 insertions, 0 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 48e021846472..ce2abffb9ed8 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -480,6 +480,10 @@ config INTERVAL_TREE for more information. +config INTERVAL_TREE_SPAN_ITER + bool + depends on INTERVAL_TREE + config XARRAY_MULTI bool help diff --git a/lib/interval_tree.c b/lib/interval_tree.c index 593ce56ece50..3412737ff365 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -15,3 +15,135 @@ EXPORT_SYMBOL_GPL(interval_tree_insert); EXPORT_SYMBOL_GPL(interval_tree_remove); EXPORT_SYMBOL_GPL(interval_tree_iter_first); EXPORT_SYMBOL_GPL(interval_tree_iter_next); + +#ifdef CONFIG_INTERVAL_TREE_SPAN_ITER +/* + * Roll nodes[1] into nodes[0] by advancing nodes[1] to the end of a contiguous + * span of nodes. This makes nodes[0]->last the end of that contiguous used span + * indexes that started at the original nodes[1]->start. nodes[1] is now the + * first node starting the next used span. A hole span is between nodes[0]->last + * and nodes[1]->start. nodes[1] must be !NULL. + */ +static void +interval_tree_span_iter_next_gap(struct interval_tree_span_iter *state) +{ + struct interval_tree_node *cur = state->nodes[1]; + + state->nodes[0] = cur; + do { + if (cur->last > state->nodes[0]->last) + state->nodes[0] = cur; + cur = interval_tree_iter_next(cur, state->first_index, + state->last_index); + } while (cur && (state->nodes[0]->last >= cur->start || + state->nodes[0]->last + 1 == cur->start)); + state->nodes[1] = cur; +} + +void interval_tree_span_iter_first(struct interval_tree_span_iter *iter, + struct rb_root_cached *itree, + unsigned long first_index, + unsigned long last_index) +{ + iter->first_index = first_index; + iter->last_index = last_index; + iter->nodes[0] = NULL; + iter->nodes[1] = + interval_tree_iter_first(itree, first_index, last_index); + if (!iter->nodes[1]) { + /* No nodes intersect the span, whole span is hole */ + iter->start_hole = first_index; + iter->last_hole = last_index; + iter->is_hole = 1; + return; + } + if (iter->nodes[1]->start > first_index) { + /* Leading hole on first iteration */ + iter->start_hole = first_index; + iter->last_hole = iter->nodes[1]->start - 1; + iter->is_hole = 1; + interval_tree_span_iter_next_gap(iter); + return; + } + + /* Starting inside a used */ + iter->start_used = first_index; + iter->is_hole = 0; + interval_tree_span_iter_next_gap(iter); + iter->last_used = iter->nodes[0]->last; + if (iter->last_used >= last_index) { + iter->last_used = last_index; + iter->nodes[0] = NULL; + iter->nodes[1] = NULL; + } +} +EXPORT_SYMBOL_GPL(interval_tree_span_iter_first); + +void interval_tree_span_iter_next(struct interval_tree_span_iter *iter) +{ + if (!iter->nodes[0] && !iter->nodes[1]) { + iter->is_hole = -1; + return; + } + + if (iter->is_hole) { + iter->start_used = iter->last_hole + 1; + iter->last_used = iter->nodes[0]->last; + if (iter->last_used >= iter->last_index) { + iter->last_used = iter->last_index; + iter->nodes[0] = NULL; + iter->nodes[1] = NULL; + } + iter->is_hole = 0; + return; + } + + if (!iter->nodes[1]) { + /* Trailing hole */ + iter->start_hole = iter->nodes[0]->last + 1; + iter->last_hole = iter->last_index; + iter->nodes[0] = NULL; + iter->is_hole = 1; + return; + } + + /* must have both nodes[0] and [1], interior hole */ + iter->start_hole = iter->nodes[0]->last + 1; + iter->last_hole = iter->nodes[1]->start - 1; + iter->is_hole = 1; + interval_tree_span_iter_next_gap(iter); +} +EXPORT_SYMBOL_GPL(interval_tree_span_iter_next); + +/* + * Advance the iterator index to a specific position. The returned used/hole is + * updated to start at new_index. This is faster than calling + * interval_tree_span_iter_first() as it can avoid full searches in several + * cases where the iterator is already set. + */ +void interval_tree_span_iter_advance(struct interval_tree_span_iter *iter, + struct rb_root_cached *itree, + unsigned long new_index) +{ + if (iter->is_hole == -1) + return; + + iter->first_index = new_index; + if (new_index > iter->last_index) { + iter->is_hole = -1; + return; + } + + /* Rely on the union aliasing hole/used */ + if (iter->start_hole <= new_index && new_index <= iter->last_hole) { + iter->start_hole = new_index; + return; + } + if (new_index == iter->last_hole + 1) + interval_tree_span_iter_next(iter); + else + interval_tree_span_iter_first(iter, itree, new_index, + iter->last_index); +} +EXPORT_SYMBOL_GPL(interval_tree_span_iter_advance); +#endif |