diff options
author | Matthew Wilcox <willy@infradead.org> | 2018-03-28 11:48:03 -0400 |
---|---|---|
committer | Matthew Wilcox <willy@infradead.org> | 2018-10-21 10:46:43 -0400 |
commit | cfc93c6c6c963d96ef7081c17fb95d2738a75f55 (patch) | |
tree | f923f8939814e934c7b6fcba90f6688a5aef15f3 | |
parent | ec4907ff69fb16161d9d9370260303a73dd5acde (diff) | |
download | linux-cfc93c6c6c963d96ef7081c17fb95d2738a75f55.tar.bz2 |
dax: Convert dax_insert_pfn_mkwrite to XArray
Add some XArray-based helper functions to replace the radix tree based
metaphors currently in use. The biggest change is that converted code
doesn't see its own lock bit; get_unlocked_entry() always returns an
entry with the lock bit clear. So we don't have to mess around loading
the current entry and clearing the lock bit; we can just store the
unlocked entry that we already have.
Signed-off-by: Matthew Wilcox <willy@infradead.org>
-rw-r--r-- | fs/dax.c | 149 |
1 files changed, 117 insertions, 32 deletions
@@ -38,6 +38,17 @@ #define CREATE_TRACE_POINTS #include <trace/events/fs_dax.h> +static inline unsigned int pe_order(enum page_entry_size pe_size) +{ + if (pe_size == PE_SIZE_PTE) + return PAGE_SHIFT - PAGE_SHIFT; + if (pe_size == PE_SIZE_PMD) + return PMD_SHIFT - PAGE_SHIFT; + if (pe_size == PE_SIZE_PUD) + return PUD_SHIFT - PAGE_SHIFT; + return ~0; +} + /* We choose 4096 entries - same as per-zone page wait tables */ #define DAX_WAIT_TABLE_BITS 12 #define DAX_WAIT_TABLE_ENTRIES (1 << DAX_WAIT_TABLE_BITS) @@ -46,6 +57,9 @@ #define PG_PMD_COLOUR ((PMD_SIZE >> PAGE_SHIFT) - 1) #define PG_PMD_NR (PMD_SIZE >> PAGE_SHIFT) +/* The order of a PMD entry */ +#define PMD_ORDER (PMD_SHIFT - PAGE_SHIFT) + static wait_queue_head_t wait_table[DAX_WAIT_TABLE_ENTRIES]; static int __init init_dax_wait_table(void) @@ -85,10 +99,15 @@ static void *dax_make_locked(unsigned long pfn, unsigned long flags) DAX_LOCKED); } +static bool dax_is_locked(void *entry) +{ + return xa_to_value(entry) & DAX_LOCKED; +} + static unsigned int dax_entry_order(void *entry) { if (xa_to_value(entry) & DAX_PMD) - return PMD_SHIFT - PAGE_SHIFT; + return PMD_ORDER; return 0; } @@ -181,6 +200,81 @@ static void dax_wake_mapping_entry_waiter(struct xarray *xa, __wake_up(wq, TASK_NORMAL, wake_all ? 0 : 1, &key); } +static void dax_wake_entry(struct xa_state *xas, void *entry, bool wake_all) +{ + return dax_wake_mapping_entry_waiter(xas->xa, xas->xa_index, entry, + wake_all); +} + +/* + * Look up entry in page cache, wait for it to become unlocked if it + * is a DAX entry and return it. The caller must subsequently call + * put_unlocked_entry() if it did not lock the entry or dax_unlock_entry() + * if it did. + * + * Must be called with the i_pages lock held. + */ +static void *get_unlocked_entry(struct xa_state *xas) +{ + void *entry; + struct wait_exceptional_entry_queue ewait; + wait_queue_head_t *wq; + + init_wait(&ewait.wait); + ewait.wait.func = wake_exceptional_entry_func; + + for (;;) { + entry = xas_load(xas); + if (!entry || xa_is_internal(entry) || + WARN_ON_ONCE(!xa_is_value(entry)) || + !dax_is_locked(entry)) + return entry; + + wq = dax_entry_waitqueue(xas->xa, xas->xa_index, entry, + &ewait.key); + prepare_to_wait_exclusive(wq, &ewait.wait, + TASK_UNINTERRUPTIBLE); + xas_unlock_irq(xas); + xas_reset(xas); + schedule(); + finish_wait(wq, &ewait.wait); + xas_lock_irq(xas); + } +} + +static void put_unlocked_entry(struct xa_state *xas, void *entry) +{ + /* If we were the only waiter woken, wake the next one */ + if (entry) + dax_wake_entry(xas, entry, false); +} + +/* + * We used the xa_state to get the entry, but then we locked the entry and + * dropped the xa_lock, so we know the xa_state is stale and must be reset + * before use. + */ +static void dax_unlock_entry(struct xa_state *xas, void *entry) +{ + void *old; + + xas_reset(xas); + xas_lock_irq(xas); + old = xas_store(xas, entry); + xas_unlock_irq(xas); + BUG_ON(!dax_is_locked(old)); + dax_wake_entry(xas, entry, false); +} + +/* + * Return: The entry stored at this location before it was locked. + */ +static void *dax_lock_entry(struct xa_state *xas, void *entry) +{ + unsigned long v = xa_to_value(entry); + return xas_store(xas, xa_mk_value(v | DAX_LOCKED)); +} + /* * Check whether the given slot is locked. Must be called with the i_pages * lock held. @@ -1728,50 +1822,46 @@ EXPORT_SYMBOL_GPL(dax_iomap_fault); /* * dax_insert_pfn_mkwrite - insert PTE or PMD entry into page tables * @vmf: The description of the fault - * @pe_size: Size of entry to be inserted * @pfn: PFN to insert + * @order: Order of entry to insert. * * This function inserts a writeable PTE or PMD entry into the page tables * for an mmaped DAX file. It also marks the page cache entry as dirty. */ -static vm_fault_t dax_insert_pfn_mkwrite(struct vm_fault *vmf, - enum page_entry_size pe_size, - pfn_t pfn) +static vm_fault_t +dax_insert_pfn_mkwrite(struct vm_fault *vmf, pfn_t pfn, unsigned int order) { struct address_space *mapping = vmf->vma->vm_file->f_mapping; - void *entry, **slot; - pgoff_t index = vmf->pgoff; + XA_STATE_ORDER(xas, &mapping->i_pages, vmf->pgoff, order); + void *entry; vm_fault_t ret; - xa_lock_irq(&mapping->i_pages); - entry = get_unlocked_mapping_entry(mapping, index, &slot); + xas_lock_irq(&xas); + entry = get_unlocked_entry(&xas); /* Did we race with someone splitting entry or so? */ if (!entry || - (pe_size == PE_SIZE_PTE && !dax_is_pte_entry(entry)) || - (pe_size == PE_SIZE_PMD && !dax_is_pmd_entry(entry))) { - put_unlocked_mapping_entry(mapping, index, entry); - xa_unlock_irq(&mapping->i_pages); + (order == 0 && !dax_is_pte_entry(entry)) || + (order == PMD_ORDER && (xa_is_internal(entry) || + !dax_is_pmd_entry(entry)))) { + put_unlocked_entry(&xas, entry); + xas_unlock_irq(&xas); trace_dax_insert_pfn_mkwrite_no_entry(mapping->host, vmf, VM_FAULT_NOPAGE); return VM_FAULT_NOPAGE; } - radix_tree_tag_set(&mapping->i_pages, index, PAGECACHE_TAG_DIRTY); - entry = lock_slot(mapping, slot); - xa_unlock_irq(&mapping->i_pages); - switch (pe_size) { - case PE_SIZE_PTE: + xas_set_mark(&xas, PAGECACHE_TAG_DIRTY); + dax_lock_entry(&xas, entry); + xas_unlock_irq(&xas); + if (order == 0) ret = vmf_insert_mixed_mkwrite(vmf->vma, vmf->address, pfn); - break; #ifdef CONFIG_FS_DAX_PMD - case PE_SIZE_PMD: + else if (order == PMD_ORDER) ret = vmf_insert_pfn_pmd(vmf->vma, vmf->address, vmf->pmd, pfn, true); - break; #endif - default: + else ret = VM_FAULT_FALLBACK; - } - put_locked_mapping_entry(mapping, index); + dax_unlock_entry(&xas, entry); trace_dax_insert_pfn_mkwrite(mapping->host, vmf, ret); return ret; } @@ -1791,17 +1881,12 @@ vm_fault_t dax_finish_sync_fault(struct vm_fault *vmf, { int err; loff_t start = ((loff_t)vmf->pgoff) << PAGE_SHIFT; - size_t len = 0; + unsigned int order = pe_order(pe_size); + size_t len = PAGE_SIZE << order; - if (pe_size == PE_SIZE_PTE) - len = PAGE_SIZE; - else if (pe_size == PE_SIZE_PMD) - len = PMD_SIZE; - else - WARN_ON_ONCE(1); err = vfs_fsync_range(vmf->vma->vm_file, start, start + len - 1, 1); if (err) return VM_FAULT_SIGBUS; - return dax_insert_pfn_mkwrite(vmf, pe_size, pfn); + return dax_insert_pfn_mkwrite(vmf, pfn, order); } EXPORT_SYMBOL_GPL(dax_finish_sync_fault); |