From 88f54a3581eb9deaa3bd1aade40aef266d782385 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 30 Nov 2016 17:51:59 +1100 Subject: powerpc/iommu: Pass mm_struct to init/cleanup helpers We are going to get rid of @current references in mmu_context_boos3s64.c and cache mm_struct in the VFIO container. Since mm_context_t does not have reference counting, we will be using mm_struct which does have the reference counter. This changes mm_iommu_init/mm_iommu_cleanup to receive mm_struct rather than mm_context_t (which is embedded into mm). This should not cause any behavioral change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mmu_context_iommu.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/powerpc/mm/mmu_context_iommu.c') diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index e0f1c33601dd..ad2e575fd418 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -373,16 +373,17 @@ void mm_iommu_mapped_dec(struct mm_iommu_table_group_mem_t *mem) } EXPORT_SYMBOL_GPL(mm_iommu_mapped_dec); -void mm_iommu_init(mm_context_t *ctx) +void mm_iommu_init(struct mm_struct *mm) { - INIT_LIST_HEAD_RCU(&ctx->iommu_group_mem_list); + INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); } -void mm_iommu_cleanup(mm_context_t *ctx) +void mm_iommu_cleanup(struct mm_struct *mm) { struct mm_iommu_table_group_mem_t *mem, *tmp; - list_for_each_entry_safe(mem, tmp, &ctx->iommu_group_mem_list, next) { + list_for_each_entry_safe(mem, tmp, &mm->context.iommu_group_mem_list, + next) { list_del_rcu(&mem->next); mm_iommu_do_free(mem); } -- cgit v1.2.3 From d7baee6901b34c4895eb78efdbf13a49079d7404 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 30 Nov 2016 17:52:00 +1100 Subject: powerpc/iommu: Stop using @current in mm_iommu_xxx This changes mm_iommu_xxx helpers to take mm_struct as a parameter instead of getting it from @current which in some situations may not have a valid reference to mm. This changes helpers to receive @mm and moves all references to @current to the caller, including checks for !current and !current->mm; checks in mm_iommu_preregistered() are removed as there is no caller yet. This moves the mm_iommu_adjust_locked_vm() call to the caller as it receives mm_iommu_table_group_mem_t but it needs mm. This should cause no behavioral change. Signed-off-by: Alexey Kardashevskiy Reviewed-by: David Gibson Acked-by: Alex Williamson Signed-off-by: Michael Ellerman --- arch/powerpc/include/asm/mmu_context.h | 16 ++++++------ arch/powerpc/mm/mmu_context_iommu.c | 46 +++++++++++++--------------------- drivers/vfio/vfio_iommu_spapr_tce.c | 14 ++++++++--- 3 files changed, 36 insertions(+), 40 deletions(-) (limited to 'arch/powerpc/mm/mmu_context_iommu.c') diff --git a/arch/powerpc/include/asm/mmu_context.h b/arch/powerpc/include/asm/mmu_context.h index 424844bc2a57..b9e3f0aca261 100644 --- a/arch/powerpc/include/asm/mmu_context.h +++ b/arch/powerpc/include/asm/mmu_context.h @@ -19,16 +19,18 @@ extern void destroy_context(struct mm_struct *mm); struct mm_iommu_table_group_mem_t; extern int isolate_lru_page(struct page *page); /* from internal.h */ -extern bool mm_iommu_preregistered(void); -extern long mm_iommu_get(unsigned long ua, unsigned long entries, +extern bool mm_iommu_preregistered(struct mm_struct *mm); +extern long mm_iommu_get(struct mm_struct *mm, + unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem); -extern long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem); +extern long mm_iommu_put(struct mm_struct *mm, + struct mm_iommu_table_group_mem_t *mem); extern void mm_iommu_init(struct mm_struct *mm); extern void mm_iommu_cleanup(struct mm_struct *mm); -extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, - unsigned long size); -extern struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, - unsigned long entries); +extern struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, + unsigned long ua, unsigned long size); +extern struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, + unsigned long ua, unsigned long entries); extern long mm_iommu_ua_to_hpa(struct mm_iommu_table_group_mem_t *mem, unsigned long ua, unsigned long *hpa); extern long mm_iommu_mapped_inc(struct mm_iommu_table_group_mem_t *mem); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index ad2e575fd418..4c6db09e77ad 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -56,7 +56,7 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, } pr_debug("[%d] RLIMIT_MEMLOCK HASH64 %c%ld %ld/%ld\n", - current->pid, + current ? current->pid : 0, incr ? '+' : '-', npages << PAGE_SHIFT, mm->locked_vm << PAGE_SHIFT, @@ -66,12 +66,9 @@ static long mm_iommu_adjust_locked_vm(struct mm_struct *mm, return ret; } -bool mm_iommu_preregistered(void) +bool mm_iommu_preregistered(struct mm_struct *mm) { - if (!current || !current->mm) - return false; - - return !list_empty(¤t->mm->context.iommu_group_mem_list); + return !list_empty(&mm->context.iommu_group_mem_list); } EXPORT_SYMBOL_GPL(mm_iommu_preregistered); @@ -124,19 +121,16 @@ static int mm_iommu_move_page_from_cma(struct page *page) return 0; } -long mm_iommu_get(unsigned long ua, unsigned long entries, +long mm_iommu_get(struct mm_struct *mm, unsigned long ua, unsigned long entries, struct mm_iommu_table_group_mem_t **pmem) { struct mm_iommu_table_group_mem_t *mem; long i, j, ret = 0, locked_entries = 0; struct page *page = NULL; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - mutex_lock(&mem_list_mutex); - list_for_each_entry_rcu(mem, ¤t->mm->context.iommu_group_mem_list, + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ++mem->used; @@ -154,7 +148,7 @@ long mm_iommu_get(unsigned long ua, unsigned long entries, } - ret = mm_iommu_adjust_locked_vm(current->mm, entries, true); + ret = mm_iommu_adjust_locked_vm(mm, entries, true); if (ret) goto unlock_exit; @@ -215,11 +209,11 @@ populate: mem->entries = entries; *pmem = mem; - list_add_rcu(&mem->next, ¤t->mm->context.iommu_group_mem_list); + list_add_rcu(&mem->next, &mm->context.iommu_group_mem_list); unlock_exit: if (locked_entries && ret) - mm_iommu_adjust_locked_vm(current->mm, locked_entries, false); + mm_iommu_adjust_locked_vm(mm, locked_entries, false); mutex_unlock(&mem_list_mutex); @@ -264,17 +258,13 @@ static void mm_iommu_free(struct rcu_head *head) static void mm_iommu_release(struct mm_iommu_table_group_mem_t *mem) { list_del_rcu(&mem->next); - mm_iommu_adjust_locked_vm(current->mm, mem->entries, false); call_rcu(&mem->rcu, mm_iommu_free); } -long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) +long mm_iommu_put(struct mm_struct *mm, struct mm_iommu_table_group_mem_t *mem) { long ret = 0; - if (!current || !current->mm) - return -ESRCH; /* process exited */ - mutex_lock(&mem_list_mutex); if (mem->used == 0) { @@ -297,6 +287,8 @@ long mm_iommu_put(struct mm_iommu_table_group_mem_t *mem) /* @mapped became 0 so now mappings are disabled, release the region */ mm_iommu_release(mem); + mm_iommu_adjust_locked_vm(mm, mem->entries, false); + unlock_exit: mutex_unlock(&mem_list_mutex); @@ -304,14 +296,12 @@ unlock_exit: } EXPORT_SYMBOL_GPL(mm_iommu_put); -struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, - unsigned long size) +struct mm_iommu_table_group_mem_t *mm_iommu_lookup(struct mm_struct *mm, + unsigned long ua, unsigned long size) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; - list_for_each_entry_rcu(mem, - ¤t->mm->context.iommu_group_mem_list, - next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua <= ua) && (ua + size <= mem->ua + (mem->entries << PAGE_SHIFT))) { @@ -324,14 +314,12 @@ struct mm_iommu_table_group_mem_t *mm_iommu_lookup(unsigned long ua, } EXPORT_SYMBOL_GPL(mm_iommu_lookup); -struct mm_iommu_table_group_mem_t *mm_iommu_find(unsigned long ua, - unsigned long entries) +struct mm_iommu_table_group_mem_t *mm_iommu_find(struct mm_struct *mm, + unsigned long ua, unsigned long entries) { struct mm_iommu_table_group_mem_t *mem, *ret = NULL; - list_for_each_entry_rcu(mem, - ¤t->mm->context.iommu_group_mem_list, - next) { + list_for_each_entry_rcu(mem, &mm->context.iommu_group_mem_list, next) { if ((mem->ua == ua) && (mem->entries == entries)) { ret = mem; break; diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 80378ddadc5c..d0c38b201267 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -107,14 +107,17 @@ static long tce_iommu_unregister_pages(struct tce_container *container, { struct mm_iommu_table_group_mem_t *mem; + if (!current || !current->mm) + return -ESRCH; /* process exited */ + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) return -EINVAL; - mem = mm_iommu_find(vaddr, size >> PAGE_SHIFT); + mem = mm_iommu_find(current->mm, vaddr, size >> PAGE_SHIFT); if (!mem) return -ENOENT; - return mm_iommu_put(mem); + return mm_iommu_put(current->mm, mem); } static long tce_iommu_register_pages(struct tce_container *container, @@ -124,11 +127,14 @@ static long tce_iommu_register_pages(struct tce_container *container, struct mm_iommu_table_group_mem_t *mem = NULL; unsigned long entries = size >> PAGE_SHIFT; + if (!current || !current->mm) + return -ESRCH; /* process exited */ + if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || ((vaddr + size) < vaddr)) return -EINVAL; - ret = mm_iommu_get(vaddr, entries, &mem); + ret = mm_iommu_get(current->mm, vaddr, entries, &mem); if (ret) return ret; @@ -375,7 +381,7 @@ static int tce_iommu_prereg_ua_to_hpa(unsigned long tce, unsigned long size, long ret = 0; struct mm_iommu_table_group_mem_t *mem; - mem = mm_iommu_lookup(tce, size); + mem = mm_iommu_lookup(current->mm, tce, size); if (!mem) return -EINVAL; -- cgit v1.2.3 From 4b6fad7097f883335b6d9627c883cb7f276d94c9 Mon Sep 17 00:00:00 2001 From: Alexey Kardashevskiy Date: Wed, 30 Nov 2016 17:52:05 +1100 Subject: powerpc/mm/iommu, vfio/spapr: Put pages on VFIO container shutdown At the moment the userspace tool is expected to request pinning of the entire guest RAM when VFIO IOMMU SPAPR v2 driver is present. When the userspace process finishes, all the pinned pages need to be put; this is done as a part of the userspace memory context (MM) destruction which happens on the very last mmdrop(). This approach has a problem that a MM of the userspace process may live longer than the userspace process itself as kernel threads use userspace process MMs which was runnning on a CPU where the kernel thread was scheduled to. If this happened, the MM remains referenced until this exact kernel thread wakes up again and releases the very last reference to the MM, on an idle system this can take even hours. This moves preregistered regions tracking from MM to VFIO; insteads of using mm_iommu_table_group_mem_t::used, tce_container::prereg_list is added so each container releases regions which it has pre-registered. This changes the userspace interface to return EBUSY if a memory region is already registered in a container. However it should not have any practical effect as the only userspace tool available now does register memory region once per container anyway. As tce_iommu_register_pages/tce_iommu_unregister_pages are called under container->lock, this does not need additional locking. Signed-off-by: Alexey Kardashevskiy Reviewed-by: Nicholas Piggin Acked-by: Alex Williamson Reviewed-by: David Gibson Signed-off-by: Michael Ellerman --- arch/powerpc/mm/mmu_context_book3s64.c | 4 +-- arch/powerpc/mm/mmu_context_iommu.c | 11 ------ drivers/vfio/vfio_iommu_spapr_tce.c | 61 +++++++++++++++++++++++++++++++++- 3 files changed, 61 insertions(+), 15 deletions(-) (limited to 'arch/powerpc/mm/mmu_context_iommu.c') diff --git a/arch/powerpc/mm/mmu_context_book3s64.c b/arch/powerpc/mm/mmu_context_book3s64.c index ad8273590975..73bf6e14c3aa 100644 --- a/arch/powerpc/mm/mmu_context_book3s64.c +++ b/arch/powerpc/mm/mmu_context_book3s64.c @@ -156,13 +156,11 @@ static inline void destroy_pagetable_page(struct mm_struct *mm) } #endif - void destroy_context(struct mm_struct *mm) { #ifdef CONFIG_SPAPR_TCE_IOMMU - mm_iommu_cleanup(mm); + WARN_ON_ONCE(!list_empty(&mm->context.iommu_group_mem_list)); #endif - #ifdef CONFIG_PPC_ICSWX drop_cop(mm->context.acop, mm); kfree(mm->context.cop_lockp); diff --git a/arch/powerpc/mm/mmu_context_iommu.c b/arch/powerpc/mm/mmu_context_iommu.c index 4c6db09e77ad..104bad029ce9 100644 --- a/arch/powerpc/mm/mmu_context_iommu.c +++ b/arch/powerpc/mm/mmu_context_iommu.c @@ -365,14 +365,3 @@ void mm_iommu_init(struct mm_struct *mm) { INIT_LIST_HEAD_RCU(&mm->context.iommu_group_mem_list); } - -void mm_iommu_cleanup(struct mm_struct *mm) -{ - struct mm_iommu_table_group_mem_t *mem, *tmp; - - list_for_each_entry_safe(mem, tmp, &mm->context.iommu_group_mem_list, - next) { - list_del_rcu(&mem->next); - mm_iommu_do_free(mem); - } -} diff --git a/drivers/vfio/vfio_iommu_spapr_tce.c b/drivers/vfio/vfio_iommu_spapr_tce.c index 4c03c8525c26..c8823578a1b2 100644 --- a/drivers/vfio/vfio_iommu_spapr_tce.c +++ b/drivers/vfio/vfio_iommu_spapr_tce.c @@ -88,6 +88,15 @@ struct tce_iommu_group { struct iommu_group *grp; }; +/* + * A container needs to remember which preregistered region it has + * referenced to do proper cleanup at the userspace process exit. + */ +struct tce_iommu_prereg { + struct list_head next; + struct mm_iommu_table_group_mem_t *mem; +}; + /* * The container descriptor supports only a single group per container. * Required by the API as the container is not supplied with the IOMMU group @@ -102,6 +111,7 @@ struct tce_container { struct mm_struct *mm; struct iommu_table *tables[IOMMU_TABLE_GROUP_MAX_TABLES]; struct list_head group_list; + struct list_head prereg_list; }; static long tce_iommu_mm_set(struct tce_container *container) @@ -118,10 +128,27 @@ static long tce_iommu_mm_set(struct tce_container *container) return 0; } +static long tce_iommu_prereg_free(struct tce_container *container, + struct tce_iommu_prereg *tcemem) +{ + long ret; + + ret = mm_iommu_put(container->mm, tcemem->mem); + if (ret) + return ret; + + list_del(&tcemem->next); + kfree(tcemem); + + return 0; +} + static long tce_iommu_unregister_pages(struct tce_container *container, __u64 vaddr, __u64 size) { struct mm_iommu_table_group_mem_t *mem; + struct tce_iommu_prereg *tcemem; + bool found = false; if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK)) return -EINVAL; @@ -130,7 +157,17 @@ static long tce_iommu_unregister_pages(struct tce_container *container, if (!mem) return -ENOENT; - return mm_iommu_put(container->mm, mem); + list_for_each_entry(tcemem, &container->prereg_list, next) { + if (tcemem->mem == mem) { + found = true; + break; + } + } + + if (!found) + return -ENOENT; + + return tce_iommu_prereg_free(container, tcemem); } static long tce_iommu_register_pages(struct tce_container *container, @@ -138,16 +175,29 @@ static long tce_iommu_register_pages(struct tce_container *container, { long ret = 0; struct mm_iommu_table_group_mem_t *mem = NULL; + struct tce_iommu_prereg *tcemem; unsigned long entries = size >> PAGE_SHIFT; if ((vaddr & ~PAGE_MASK) || (size & ~PAGE_MASK) || ((vaddr + size) < vaddr)) return -EINVAL; + mem = mm_iommu_find(container->mm, vaddr, entries); + if (mem) { + list_for_each_entry(tcemem, &container->prereg_list, next) { + if (tcemem->mem == mem) + return -EBUSY; + } + } + ret = mm_iommu_get(container->mm, vaddr, entries, &mem); if (ret) return ret; + tcemem = kzalloc(sizeof(*tcemem), GFP_KERNEL); + tcemem->mem = mem; + list_add(&tcemem->next, &container->prereg_list); + container->enabled = true; return 0; @@ -334,6 +384,7 @@ static void *tce_iommu_open(unsigned long arg) mutex_init(&container->lock); INIT_LIST_HEAD_RCU(&container->group_list); + INIT_LIST_HEAD_RCU(&container->prereg_list); container->v2 = arg == VFIO_SPAPR_TCE_v2_IOMMU; @@ -372,6 +423,14 @@ static void tce_iommu_release(void *iommu_data) tce_iommu_free_table(container, tbl); } + while (!list_empty(&container->prereg_list)) { + struct tce_iommu_prereg *tcemem; + + tcemem = list_first_entry(&container->prereg_list, + struct tce_iommu_prereg, next); + WARN_ON_ONCE(tce_iommu_prereg_free(container, tcemem)); + } + tce_iommu_disable(container); if (container->mm) mmdrop(container->mm); -- cgit v1.2.3