diff options
Diffstat (limited to 'include')
-rw-r--r-- | include/linux/mm.h | 82 |
1 files changed, 67 insertions, 15 deletions
diff --git a/include/linux/mm.h b/include/linux/mm.h index c54fb96cb1e6..10be09c8227e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1001,6 +1001,8 @@ static inline void get_page(struct page *page) page_ref_inc(page); } +bool __must_check try_grab_page(struct page *page, unsigned int flags); + static inline __must_check bool try_get_page(struct page *page) { page = compound_head(page); @@ -1029,29 +1031,79 @@ static inline void put_page(struct page *page) __put_page(page); } -/** - * unpin_user_page() - release a gup-pinned page - * @page: pointer to page to be released +/* + * GUP_PIN_COUNTING_BIAS, and the associated functions that use it, overload + * the page's refcount so that two separate items are tracked: the original page + * reference count, and also a new count of how many pin_user_pages() calls were + * made against the page. ("gup-pinned" is another term for the latter). + * + * With this scheme, pin_user_pages() becomes special: such pages are marked as + * distinct from normal pages. As such, the unpin_user_page() call (and its + * variants) must be used in order to release gup-pinned pages. + * + * Choice of value: + * + * By making GUP_PIN_COUNTING_BIAS a power of two, debugging of page reference + * counts with respect to pin_user_pages() and unpin_user_page() becomes + * simpler, due to the fact that adding an even power of two to the page + * refcount has the effect of using only the upper N bits, for the code that + * counts up using the bias value. This means that the lower bits are left for + * the exclusive use of the original code that increments and decrements by one + * (or at least, by much smaller values than the bias value). * - * Pages that were pinned via pin_user_pages*() must be released via either - * unpin_user_page(), or one of the unpin_user_pages*() routines. This is so - * that eventually such pages can be separately tracked and uniquely handled. In - * particular, interactions with RDMA and filesystems need special handling. + * Of course, once the lower bits overflow into the upper bits (and this is + * OK, because subtraction recovers the original values), then visual inspection + * no longer suffices to directly view the separate counts. However, for normal + * applications that don't have huge page reference counts, this won't be an + * issue. * - * unpin_user_page() and put_page() are not interchangeable, despite this early - * implementation that makes them look the same. unpin_user_page() calls must - * be perfectly matched up with pin*() calls. + * Locking: the lockless algorithm described in page_cache_get_speculative() + * and page_cache_gup_pin_speculative() provides safe operation for + * get_user_pages and page_mkclean and other calls that race to set up page + * table entries. */ -static inline void unpin_user_page(struct page *page) -{ - put_page(page); -} +#define GUP_PIN_COUNTING_BIAS (1U << 10) +void unpin_user_page(struct page *page); void unpin_user_pages_dirty_lock(struct page **pages, unsigned long npages, bool make_dirty); - void unpin_user_pages(struct page **pages, unsigned long npages); +/** + * page_maybe_dma_pinned() - report if a page is pinned for DMA. + * + * This function checks if a page has been pinned via a call to + * pin_user_pages*(). + * + * For non-huge pages, the return value is partially fuzzy: false is not fuzzy, + * because it means "definitely not pinned for DMA", but true means "probably + * pinned for DMA, but possibly a false positive due to having at least + * GUP_PIN_COUNTING_BIAS worth of normal page references". + * + * False positives are OK, because: a) it's unlikely for a page to get that many + * refcounts, and b) all the callers of this routine are expected to be able to + * deal gracefully with a false positive. + * + * For more information, please see Documentation/vm/pin_user_pages.rst. + * + * @page: pointer to page to be queried. + * @Return: True, if it is likely that the page has been "dma-pinned". + * False, if the page is definitely not dma-pinned. + */ +static inline bool page_maybe_dma_pinned(struct page *page) +{ + /* + * page_ref_count() is signed. If that refcount overflows, then + * page_ref_count() returns a negative value, and callers will avoid + * further incrementing the refcount. + * + * Here, for that overflow case, use the signed bit to count a little + * bit higher via unsigned math, and thus still get an accurate result. + */ + return ((unsigned int)page_ref_count(compound_head(page))) >= + GUP_PIN_COUNTING_BIAS; +} + #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) #define SECTION_IN_PAGE_FLAGS #endif |