diff options
author | David S. Miller <davem@davemloft.net> | 2019-06-19 11:23:14 -0400 |
---|---|---|
committer | David S. Miller <davem@davemloft.net> | 2019-06-19 11:23:14 -0400 |
commit | 2a54003e7af1eaddc05848dac14f7bcd77301478 (patch) | |
tree | 31644496760e3272d55f8248ef49eed16c32ec1d /include | |
parent | 9371a56f7101cc3f12b57db4bfbb6159205211f4 (diff) | |
parent | f71fec47c2df704c7081f946d7e46fe036a4208b (diff) | |
download | linux-2a54003e7af1eaddc05848dac14f7bcd77301478.tar.bz2 |
Merge branch 'xdp-page_pool-fixes-and-in-flight-accounting'
Jesper Dangaard Brouer says:
====================
xdp: page_pool fixes and in-flight accounting
This patchset fix page_pool API and users, such that drivers can use it for
DMA-mapping. A number of places exist, where the DMA-mapping would not get
released/unmapped, all these are fixed. This occurs e.g. when an xdp_frame
gets converted to an SKB. As network stack doesn't have any callback for XDP
memory models.
The patchset also address a shutdown race-condition. Today removing a XDP
memory model, based on page_pool, is only delayed one RCU grace period. This
isn't enough as redirected xdp_frames can still be in-flight on different
queues (remote driver TX, cpumap or veth).
We stress that when drivers use page_pool for DMA-mapping, then they MUST
use one packet per page. This might change in the future, but more work lies
ahead, before we can lift this restriction.
This patchset change the page_pool API to be more strict, as in-flight page
accounting is added.
====================
Signed-off-by: David S. Miller <davem@davemloft.net>
Diffstat (limited to 'include')
-rw-r--r-- | include/net/page_pool.h | 69 | ||||
-rw-r--r-- | include/net/xdp.h | 15 | ||||
-rw-r--r-- | include/net/xdp_priv.h | 23 | ||||
-rw-r--r-- | include/trace/events/page_pool.h | 87 | ||||
-rw-r--r-- | include/trace/events/xdp.h | 115 |
5 files changed, 300 insertions, 9 deletions
diff --git a/include/net/page_pool.h b/include/net/page_pool.h index 694d055e01ef..f09b3f1994e6 100644 --- a/include/net/page_pool.h +++ b/include/net/page_pool.h @@ -16,14 +16,16 @@ * page_pool_alloc_pages() call. Drivers should likely use * page_pool_dev_alloc_pages() replacing dev_alloc_pages(). * - * If page_pool handles DMA mapping (use page->private), then API user - * is responsible for invoking page_pool_put_page() once. In-case of - * elevated refcnt, the DMA state is released, assuming other users of - * the page will eventually call put_page(). + * API keeps track of in-flight pages, in-order to let API user know + * when it is safe to dealloactor page_pool object. Thus, API users + * must make sure to call page_pool_release_page() when a page is + * "leaving" the page_pool. Or call page_pool_put_page() where + * appropiate. For maintaining correct accounting. * - * If no DMA mapping is done, then it can act as shim-layer that - * fall-through to alloc_page. As no state is kept on the page, the - * regular put_page() call is sufficient. + * API user must only call page_pool_put_page() once on a page, as it + * will either recycle the page, or in case of elevated refcnt, it + * will release the DMA mapping and in-flight state accounting. We + * hope to lift this requirement in the future. */ #ifndef _NET_PAGE_POOL_H #define _NET_PAGE_POOL_H @@ -66,9 +68,10 @@ struct page_pool_params { }; struct page_pool { - struct rcu_head rcu; struct page_pool_params p; + u32 pages_state_hold_cnt; + /* * Data structure for allocation side * @@ -96,6 +99,8 @@ struct page_pool { * TODO: Implement bulk return pages into this structure. */ struct ptr_ring ring; + + atomic_t pages_state_release_cnt; }; struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp); @@ -109,7 +114,16 @@ static inline struct page *page_pool_dev_alloc_pages(struct page_pool *pool) struct page_pool *page_pool_create(const struct page_pool_params *params); -void page_pool_destroy(struct page_pool *pool); +void __page_pool_free(struct page_pool *pool); +static inline void page_pool_free(struct page_pool *pool) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + __page_pool_free(pool); +#endif +} /* Never call this directly, use helpers below */ void __page_pool_put_page(struct page_pool *pool, @@ -132,6 +146,43 @@ static inline void page_pool_recycle_direct(struct page_pool *pool, __page_pool_put_page(pool, page, true); } +/* API user MUST have disconnected alloc-side (not allowed to call + * page_pool_alloc_pages()) before calling this. The free-side can + * still run concurrently, to handle in-flight packet-pages. + * + * A request to shutdown can fail (with false) if there are still + * in-flight packet-pages. + */ +bool __page_pool_request_shutdown(struct page_pool *pool); +static inline bool page_pool_request_shutdown(struct page_pool *pool) +{ + /* When page_pool isn't compiled-in, net/core/xdp.c doesn't + * allow registering MEM_TYPE_PAGE_POOL, but shield linker. + */ +#ifdef CONFIG_PAGE_POOL + return __page_pool_request_shutdown(pool); +#endif +} + +/* Disconnects a page (from a page_pool). API users can have a need + * to disconnect a page (from a page_pool), to allow it to be used as + * a regular page (that will eventually be returned to the normal + * page-allocator via put_page). + */ +void page_pool_unmap_page(struct page_pool *pool, struct page *page); +static inline void page_pool_release_page(struct page_pool *pool, + struct page *page) +{ +#ifdef CONFIG_PAGE_POOL + page_pool_unmap_page(pool, page); +#endif +} + +static inline dma_addr_t page_pool_get_dma_addr(struct page *page) +{ + return page->dma_addr; +} + static inline bool is_page_pool_compiled_in(void) { #ifdef CONFIG_PAGE_POOL diff --git a/include/net/xdp.h b/include/net/xdp.h index 8e0deddef35c..40c6d3398458 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -129,6 +129,21 @@ void xdp_return_frame(struct xdp_frame *xdpf); void xdp_return_frame_rx_napi(struct xdp_frame *xdpf); void xdp_return_buff(struct xdp_buff *xdp); +/* When sending xdp_frame into the network stack, then there is no + * return point callback, which is needed to release e.g. DMA-mapping + * resources with page_pool. Thus, have explicit function to release + * frame resources. + */ +void __xdp_release_frame(void *data, struct xdp_mem_info *mem); +static inline void xdp_release_frame(struct xdp_frame *xdpf) +{ + struct xdp_mem_info *mem = &xdpf->mem; + + /* Curr only page_pool needs this */ + if (mem->type == MEM_TYPE_PAGE_POOL) + __xdp_release_frame(xdpf->data, mem); +} + int xdp_rxq_info_reg(struct xdp_rxq_info *xdp_rxq, struct net_device *dev, u32 queue_index); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq); diff --git a/include/net/xdp_priv.h b/include/net/xdp_priv.h new file mode 100644 index 000000000000..6a8cba6ea79a --- /dev/null +++ b/include/net/xdp_priv.h @@ -0,0 +1,23 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_NET_XDP_PRIV_H__ +#define __LINUX_NET_XDP_PRIV_H__ + +#include <linux/rhashtable.h> + +/* Private to net/core/xdp.c, but used by trace/events/xdp.h */ +struct xdp_mem_allocator { + struct xdp_mem_info mem; + union { + void *allocator; + struct page_pool *page_pool; + struct zero_copy_allocator *zc_alloc; + }; + int disconnect_cnt; + unsigned long defer_start; + struct rhash_head node; + struct rcu_head rcu; + struct delayed_work defer_wq; + unsigned long defer_warn; +}; + +#endif /* __LINUX_NET_XDP_PRIV_H__ */ diff --git a/include/trace/events/page_pool.h b/include/trace/events/page_pool.h new file mode 100644 index 000000000000..47b5ee880aa9 --- /dev/null +++ b/include/trace/events/page_pool.h @@ -0,0 +1,87 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM page_pool + +#if !defined(_TRACE_PAGE_POOL_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_PAGE_POOL_H + +#include <linux/types.h> +#include <linux/tracepoint.h> + +#include <net/page_pool.h> + +TRACE_EVENT(page_pool_inflight, + + TP_PROTO(const struct page_pool *pool, + s32 inflight, u32 hold, u32 release), + + TP_ARGS(pool, inflight, hold, release), + + TP_STRUCT__entry( + __field(const struct page_pool *, pool) + __field(s32, inflight) + __field(u32, hold) + __field(u32, release) + ), + + TP_fast_assign( + __entry->pool = pool; + __entry->inflight = inflight; + __entry->hold = hold; + __entry->release = release; + ), + + TP_printk("page_pool=%p inflight=%d hold=%u release=%u", + __entry->pool, __entry->inflight, __entry->hold, __entry->release) +); + +TRACE_EVENT(page_pool_state_release, + + TP_PROTO(const struct page_pool *pool, + const struct page *page, u32 release), + + TP_ARGS(pool, page, release), + + TP_STRUCT__entry( + __field(const struct page_pool *, pool) + __field(const struct page *, page) + __field(u32, release) + ), + + TP_fast_assign( + __entry->pool = pool; + __entry->page = page; + __entry->release = release; + ), + + TP_printk("page_pool=%p page=%p release=%u", + __entry->pool, __entry->page, __entry->release) +); + +TRACE_EVENT(page_pool_state_hold, + + TP_PROTO(const struct page_pool *pool, + const struct page *page, u32 hold), + + TP_ARGS(pool, page, hold), + + TP_STRUCT__entry( + __field(const struct page_pool *, pool) + __field(const struct page *, page) + __field(u32, hold) + ), + + TP_fast_assign( + __entry->pool = pool; + __entry->page = page; + __entry->hold = hold; + ), + + TP_printk("page_pool=%p page=%p hold=%u", + __entry->pool, __entry->page, __entry->hold) +); + +#endif /* _TRACE_PAGE_POOL_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h index e95cb86b65cf..bb5e380e2ef3 100644 --- a/include/trace/events/xdp.h +++ b/include/trace/events/xdp.h @@ -269,6 +269,121 @@ TRACE_EVENT(xdp_devmap_xmit, __entry->from_ifindex, __entry->to_ifindex, __entry->err) ); +/* Expect users already include <net/xdp.h>, but not xdp_priv.h */ +#include <net/xdp_priv.h> + +#define __MEM_TYPE_MAP(FN) \ + FN(PAGE_SHARED) \ + FN(PAGE_ORDER0) \ + FN(PAGE_POOL) \ + FN(ZERO_COPY) + +#define __MEM_TYPE_TP_FN(x) \ + TRACE_DEFINE_ENUM(MEM_TYPE_##x); +#define __MEM_TYPE_SYM_FN(x) \ + { MEM_TYPE_##x, #x }, +#define __MEM_TYPE_SYM_TAB \ + __MEM_TYPE_MAP(__MEM_TYPE_SYM_FN) { -1, 0 } +__MEM_TYPE_MAP(__MEM_TYPE_TP_FN) + +TRACE_EVENT(mem_disconnect, + + TP_PROTO(const struct xdp_mem_allocator *xa, + bool safe_to_remove, bool force), + + TP_ARGS(xa, safe_to_remove, force), + + TP_STRUCT__entry( + __field(const struct xdp_mem_allocator *, xa) + __field(u32, mem_id) + __field(u32, mem_type) + __field(const void *, allocator) + __field(bool, safe_to_remove) + __field(bool, force) + __field(int, disconnect_cnt) + ), + + TP_fast_assign( + __entry->xa = xa; + __entry->mem_id = xa->mem.id; + __entry->mem_type = xa->mem.type; + __entry->allocator = xa->allocator; + __entry->safe_to_remove = safe_to_remove; + __entry->force = force; + __entry->disconnect_cnt = xa->disconnect_cnt; + ), + + TP_printk("mem_id=%d mem_type=%s allocator=%p" + " safe_to_remove=%s force=%s disconnect_cnt=%d", + __entry->mem_id, + __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), + __entry->allocator, + __entry->safe_to_remove ? "true" : "false", + __entry->force ? "true" : "false", + __entry->disconnect_cnt + ) +); + +TRACE_EVENT(mem_connect, + + TP_PROTO(const struct xdp_mem_allocator *xa, + const struct xdp_rxq_info *rxq), + + TP_ARGS(xa, rxq), + + TP_STRUCT__entry( + __field(const struct xdp_mem_allocator *, xa) + __field(u32, mem_id) + __field(u32, mem_type) + __field(const void *, allocator) + __field(const struct xdp_rxq_info *, rxq) + __field(int, ifindex) + ), + + TP_fast_assign( + __entry->xa = xa; + __entry->mem_id = xa->mem.id; + __entry->mem_type = xa->mem.type; + __entry->allocator = xa->allocator; + __entry->rxq = rxq; + __entry->ifindex = rxq->dev->ifindex; + ), + + TP_printk("mem_id=%d mem_type=%s allocator=%p" + " ifindex=%d", + __entry->mem_id, + __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), + __entry->allocator, + __entry->ifindex + ) +); + +TRACE_EVENT(mem_return_failed, + + TP_PROTO(const struct xdp_mem_info *mem, + const struct page *page), + + TP_ARGS(mem, page), + + TP_STRUCT__entry( + __field(const struct page *, page) + __field(u32, mem_id) + __field(u32, mem_type) + ), + + TP_fast_assign( + __entry->page = page; + __entry->mem_id = mem->id; + __entry->mem_type = mem->type; + ), + + TP_printk("mem_id=%d mem_type=%s page=%p", + __entry->mem_id, + __print_symbolic(__entry->mem_type, __MEM_TYPE_SYM_TAB), + __entry->page + ) +); + #endif /* _TRACE_XDP_H */ #include <trace/define_trace.h> |