From b0d1beeff2a97a0cf1965ea8f1d13b8973f22582 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 28 Aug 2018 14:44:25 +0200 Subject: xdp: implement convert_to_xdp_frame for MEM_TYPE_ZERO_COPY MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This commit adds proper MEM_TYPE_ZERO_COPY support for convert_to_xdp_frame. Converting a MEM_TYPE_ZERO_COPY xdp_buff to an xdp_frame is done by transforming the MEM_TYPE_ZERO_COPY buffer into a MEM_TYPE_PAGE_ORDER0 frame. This is costly, and in the future it might make sense to implement a more sophisticated thread-safe alloc/free scheme for MEM_TYPE_ZERO_COPY, so that no allocation and copy is required in the fast-path. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 76b95256c266..0d5c6fb4b2e2 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -91,6 +91,8 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame) frame->dev_rx = NULL; } +struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp); + /* Convert xdp_buff to xdp_frame */ static inline struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) @@ -99,9 +101,8 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp) int metasize; int headroom; - /* TODO: implement clone, copy, use "native" MEM_TYPE */ if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) - return NULL; + return xdp_convert_zc_to_xdp_frame(xdp); /* Assure headroom is available for storing info */ headroom = xdp->data - xdp->data_hard_start; -- cgit v1.2.3 From dce5bd6140a436e3348f6d13a1efb6e6c5a89acd Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 28 Aug 2018 14:44:26 +0200 Subject: xdp: export xdp_rxq_info_unreg_mem_model MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export __xdp_rxq_info_unreg_mem_model as xdp_rxq_info_unreg_mem_model, so it can be used from netdev drivers. Also, add additional checks for the memory type. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov --- include/net/xdp.h | 1 + net/core/xdp.c | 15 +++++++++++++-- 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/xdp.h b/include/net/xdp.h index 0d5c6fb4b2e2..0f25b3675c5c 100644 --- a/include/net/xdp.h +++ b/include/net/xdp.h @@ -136,6 +136,7 @@ void xdp_rxq_info_unused(struct xdp_rxq_info *xdp_rxq); bool xdp_rxq_info_is_reg(struct xdp_rxq_info *xdp_rxq); int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq, enum xdp_mem_type type, void *allocator); +void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq); /* Drivers not supporting XDP metadata can use this helper, which * rejects any room expansion for metadata as a result. diff --git a/net/core/xdp.c b/net/core/xdp.c index be6cb2f0e722..654dbb19707e 100644 --- a/net/core/xdp.c +++ b/net/core/xdp.c @@ -94,11 +94,21 @@ static void __xdp_mem_allocator_rcu_free(struct rcu_head *rcu) kfree(xa); } -static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) +void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) { struct xdp_mem_allocator *xa; int id = xdp_rxq->mem.id; + if (xdp_rxq->reg_state != REG_STATE_REGISTERED) { + WARN(1, "Missing register, driver bug"); + return; + } + + if (xdp_rxq->mem.type != MEM_TYPE_PAGE_POOL && + xdp_rxq->mem.type != MEM_TYPE_ZERO_COPY) { + return; + } + if (id == 0) return; @@ -110,6 +120,7 @@ static void __xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq) mutex_unlock(&mem_id_lock); } +EXPORT_SYMBOL_GPL(xdp_rxq_info_unreg_mem_model); void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) { @@ -119,7 +130,7 @@ void xdp_rxq_info_unreg(struct xdp_rxq_info *xdp_rxq) WARN(!(xdp_rxq->reg_state == REG_STATE_REGISTERED), "Driver BUG"); - __xdp_rxq_info_unreg_mem_model(xdp_rxq); + xdp_rxq_info_unreg_mem_model(xdp_rxq); xdp_rxq->reg_state = REG_STATE_UNREGISTERED; xdp_rxq->dev = NULL; -- cgit v1.2.3 From 902540342096af8a13351f6a22bfdd7a8e19ffd2 Mon Sep 17 00:00:00 2001 From: Björn Töpel Date: Tue, 28 Aug 2018 14:44:27 +0200 Subject: xsk: expose xdp_umem_get_{data,dma} to drivers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Move the xdp_umem_get_{data,dma} functions to include/net/xdp_sock.h, so that the upcoming zero-copy implementation in the Ethernet drivers can utilize them. Also, supply some dummy function implementations for CONFIG_XDP_SOCKETS=n configs. Signed-off-by: Björn Töpel Signed-off-by: Alexei Starovoitov --- include/net/xdp_sock.h | 43 +++++++++++++++++++++++++++++++++++++++++++ net/xdp/xdp_umem.h | 10 ---------- 2 files changed, 43 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 7161856bcf9c..56994ad1ab40 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -79,6 +79,16 @@ void xsk_umem_discard_addr(struct xdp_umem *umem); void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries); bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, u32 *len); void xsk_umem_consume_tx_done(struct xdp_umem *umem); + +static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) +{ + return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); +} + +static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) +{ + return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); +} #else static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp) { @@ -98,6 +108,39 @@ static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs) { return false; } + +static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr) +{ + return NULL; +} + +static inline void xsk_umem_discard_addr(struct xdp_umem *umem) +{ +} + +static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries) +{ +} + +static inline bool xsk_umem_consume_tx(struct xdp_umem *umem, dma_addr_t *dma, + u32 *len) +{ + return false; +} + +static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem) +{ +} + +static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) +{ + return NULL; +} + +static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) +{ + return 0; +} #endif /* CONFIG_XDP_SOCKETS */ #endif /* _LINUX_XDP_SOCK_H */ diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h index f11560334f88..c8be1ad3eb88 100644 --- a/net/xdp/xdp_umem.h +++ b/net/xdp/xdp_umem.h @@ -8,16 +8,6 @@ #include -static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr) -{ - return umem->pages[addr >> PAGE_SHIFT].addr + (addr & (PAGE_SIZE - 1)); -} - -static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr) -{ - return umem->pages[addr >> PAGE_SHIFT].dma + (addr & (PAGE_SIZE - 1)); -} - int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev, u32 queue_id, u16 flags); bool xdp_umem_validate_queues(struct xdp_umem *umem); -- cgit v1.2.3 From 6c5c9581044dd6e0cd284ab653502fb9264f08b6 Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Tue, 28 Aug 2018 14:44:28 +0200 Subject: net: add napi_if_scheduled_mark_missed The function napi_if_scheduled_mark_missed is used to check if the NAPI context is scheduled, if so set NAPIF_STATE_MISSED and return true. Used by the AF_XDP zero-copy i40e Tx code implementation in order to make sure that irq affinity is honored by the napi context. Signed-off-by: Magnus Karlsson Signed-off-by: Alexei Starovoitov --- include/linux/netdevice.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index ca5ab98053c8..4271f6b4e419 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -535,6 +535,32 @@ static inline void napi_synchronize(const struct napi_struct *n) barrier(); } +/** + * napi_if_scheduled_mark_missed - if napi is running, set the + * NAPIF_STATE_MISSED + * @n: NAPI context + * + * If napi is running, set the NAPIF_STATE_MISSED, and return true if + * NAPI is scheduled. + **/ +static inline bool napi_if_scheduled_mark_missed(struct napi_struct *n) +{ + unsigned long val, new; + + do { + val = READ_ONCE(n->state); + if (val & NAPIF_STATE_DISABLE) + return true; + + if (!(val & NAPIF_STATE_SCHED)) + return false; + + new = val | NAPIF_STATE_MISSED; + } while (cmpxchg(&n->state, val, new) != val); + + return true; +} + enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_STACK_XOFF, -- cgit v1.2.3 From 679c782de14bd48c19dd74cd1af20a2bc05dd936 Mon Sep 17 00:00:00 2001 From: Edward Cree Date: Wed, 22 Aug 2018 20:02:19 +0100 Subject: bpf/verifier: per-register parent pointers By giving each register its own liveness chain, we elide the skip_callee() logic. Instead, each register's parent is the state it inherits from; both check_func_call() and prepare_func_exit() automatically connect reg states to the correct chain since when they copy the reg state across (r1-r5 into the callee as args, and r0 out as the return value) they also copy the parent pointer. Signed-off-by: Edward Cree Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 8 +- kernel/bpf/verifier.c | 184 +++++++++++-------------------------------- 2 files changed, 47 insertions(+), 145 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 38b04f559ad3..b42b60a83e19 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -41,6 +41,7 @@ enum bpf_reg_liveness { }; struct bpf_reg_state { + /* Ordering of fields matters. See states_equal() */ enum bpf_reg_type type; union { /* valid when type == PTR_TO_PACKET */ @@ -59,7 +60,6 @@ struct bpf_reg_state { * came from, when one is tested for != NULL. */ u32 id; - /* Ordering of fields matters. See states_equal() */ /* For scalar types (SCALAR_VALUE), this represents our knowledge of * the actual value. * For pointer types, this represents the variable part of the offset @@ -76,15 +76,15 @@ struct bpf_reg_state { s64 smax_value; /* maximum possible (s64)value */ u64 umin_value; /* minimum possible (u64)value */ u64 umax_value; /* maximum possible (u64)value */ + /* parentage chain for liveness checking */ + struct bpf_reg_state *parent; /* Inside the callee two registers can be both PTR_TO_STACK like * R1=fp-8 and R2=fp-8, but one of them points to this function stack * while another to the caller's stack. To differentiate them 'frameno' * is used which is an index in bpf_verifier_state->frame[] array * pointing to bpf_func_state. - * This field must be second to last, for states_equal() reasons. */ u32 frameno; - /* This field must be last, for states_equal() reasons. */ enum bpf_reg_liveness live; }; @@ -107,7 +107,6 @@ struct bpf_stack_state { */ struct bpf_func_state { struct bpf_reg_state regs[MAX_BPF_REG]; - struct bpf_verifier_state *parent; /* index of call instruction that called into this func */ int callsite; /* stack frame number of this function state from pov of @@ -129,7 +128,6 @@ struct bpf_func_state { struct bpf_verifier_state { /* call stack tracking */ struct bpf_func_state *frame[MAX_CALL_FRAMES]; - struct bpf_verifier_state *parent; u32 curframe; }; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 92246117d2b0..68568d22d6bd 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -380,9 +380,9 @@ static int copy_stack_state(struct bpf_func_state *dst, /* do_check() starts with zero-sized stack in struct bpf_verifier_state to * make it consume minimal amount of memory. check_stack_write() access from * the program calls into realloc_func_state() to grow the stack size. - * Note there is a non-zero 'parent' pointer inside bpf_verifier_state - * which this function copies over. It points to previous bpf_verifier_state - * which is never reallocated + * Note there is a non-zero parent pointer inside each reg of bpf_verifier_state + * which this function copies over. It points to corresponding reg in previous + * bpf_verifier_state which is never reallocated */ static int realloc_func_state(struct bpf_func_state *state, int size, bool copy_old) @@ -466,7 +466,6 @@ static int copy_verifier_state(struct bpf_verifier_state *dst_state, dst_state->frame[i] = NULL; } dst_state->curframe = src->curframe; - dst_state->parent = src->parent; for (i = 0; i <= src->curframe; i++) { dst = dst_state->frame[i]; if (!dst) { @@ -732,6 +731,7 @@ static void init_reg_state(struct bpf_verifier_env *env, for (i = 0; i < MAX_BPF_REG; i++) { mark_reg_not_init(env, regs, i); regs[i].live = REG_LIVE_NONE; + regs[i].parent = NULL; } /* frame pointer */ @@ -876,74 +876,21 @@ next: return 0; } -static -struct bpf_verifier_state *skip_callee(struct bpf_verifier_env *env, - const struct bpf_verifier_state *state, - struct bpf_verifier_state *parent, - u32 regno) -{ - struct bpf_verifier_state *tmp = NULL; - - /* 'parent' could be a state of caller and - * 'state' could be a state of callee. In such case - * parent->curframe < state->curframe - * and it's ok for r1 - r5 registers - * - * 'parent' could be a callee's state after it bpf_exit-ed. - * In such case parent->curframe > state->curframe - * and it's ok for r0 only - */ - if (parent->curframe == state->curframe || - (parent->curframe < state->curframe && - regno >= BPF_REG_1 && regno <= BPF_REG_5) || - (parent->curframe > state->curframe && - regno == BPF_REG_0)) - return parent; - - if (parent->curframe > state->curframe && - regno >= BPF_REG_6) { - /* for callee saved regs we have to skip the whole chain - * of states that belong to callee and mark as LIVE_READ - * the registers before the call - */ - tmp = parent; - while (tmp && tmp->curframe != state->curframe) { - tmp = tmp->parent; - } - if (!tmp) - goto bug; - parent = tmp; - } else { - goto bug; - } - return parent; -bug: - verbose(env, "verifier bug regno %d tmp %p\n", regno, tmp); - verbose(env, "regno %d parent frame %d current frame %d\n", - regno, parent->curframe, state->curframe); - return NULL; -} - +/* Parentage chain of this register (or stack slot) should take care of all + * issues like callee-saved registers, stack slot allocation time, etc. + */ static int mark_reg_read(struct bpf_verifier_env *env, - const struct bpf_verifier_state *state, - struct bpf_verifier_state *parent, - u32 regno) + const struct bpf_reg_state *state, + struct bpf_reg_state *parent) { bool writes = parent == state->parent; /* Observe write marks */ - if (regno == BPF_REG_FP) - /* We don't need to worry about FP liveness because it's read-only */ - return 0; - while (parent) { /* if read wasn't screened by an earlier write ... */ - if (writes && state->frame[state->curframe]->regs[regno].live & REG_LIVE_WRITTEN) + if (writes && state->live & REG_LIVE_WRITTEN) break; - parent = skip_callee(env, state, parent, regno); - if (!parent) - return -EFAULT; /* ... then we depend on parent's value */ - parent->frame[parent->curframe]->regs[regno].live |= REG_LIVE_READ; + parent->live |= REG_LIVE_READ; state = parent; parent = state->parent; writes = true; @@ -969,7 +916,10 @@ static int check_reg_arg(struct bpf_verifier_env *env, u32 regno, verbose(env, "R%d !read_ok\n", regno); return -EACCES; } - return mark_reg_read(env, vstate, vstate->parent, regno); + /* We don't need to worry about FP liveness because it's read-only */ + if (regno != BPF_REG_FP) + return mark_reg_read(env, ®s[regno], + regs[regno].parent); } else { /* check whether register used as dest operand can be written to */ if (regno == BPF_REG_FP) { @@ -1080,8 +1030,8 @@ static int check_stack_write(struct bpf_verifier_env *env, } else { u8 type = STACK_MISC; - /* regular write of data into stack */ - state->stack[spi].spilled_ptr = (struct bpf_reg_state) {}; + /* regular write of data into stack destroys any spilled ptr */ + state->stack[spi].spilled_ptr.type = NOT_INIT; /* only mark the slot as written if all 8 bytes were written * otherwise read propagation may incorrectly stop too soon @@ -1106,61 +1056,6 @@ static int check_stack_write(struct bpf_verifier_env *env, return 0; } -/* registers of every function are unique and mark_reg_read() propagates - * the liveness in the following cases: - * - from callee into caller for R1 - R5 that were used as arguments - * - from caller into callee for R0 that used as result of the call - * - from caller to the same caller skipping states of the callee for R6 - R9, - * since R6 - R9 are callee saved by implicit function prologue and - * caller's R6 != callee's R6, so when we propagate liveness up to - * parent states we need to skip callee states for R6 - R9. - * - * stack slot marking is different, since stacks of caller and callee are - * accessible in both (since caller can pass a pointer to caller's stack to - * callee which can pass it to another function), hence mark_stack_slot_read() - * has to propagate the stack liveness to all parent states at given frame number. - * Consider code: - * f1() { - * ptr = fp - 8; - * *ptr = ctx; - * call f2 { - * .. = *ptr; - * } - * .. = *ptr; - * } - * First *ptr is reading from f1's stack and mark_stack_slot_read() has - * to mark liveness at the f1's frame and not f2's frame. - * Second *ptr is also reading from f1's stack and mark_stack_slot_read() has - * to propagate liveness to f2 states at f1's frame level and further into - * f1 states at f1's frame level until write into that stack slot - */ -static void mark_stack_slot_read(struct bpf_verifier_env *env, - const struct bpf_verifier_state *state, - struct bpf_verifier_state *parent, - int slot, int frameno) -{ - bool writes = parent == state->parent; /* Observe write marks */ - - while (parent) { - if (parent->frame[frameno]->allocated_stack <= slot * BPF_REG_SIZE) - /* since LIVE_WRITTEN mark is only done for full 8-byte - * write the read marks are conservative and parent - * state may not even have the stack allocated. In such case - * end the propagation, since the loop reached beginning - * of the function - */ - break; - /* if read wasn't screened by an earlier write ... */ - if (writes && state->frame[frameno]->stack[slot].spilled_ptr.live & REG_LIVE_WRITTEN) - break; - /* ... then we depend on parent's value */ - parent->frame[frameno]->stack[slot].spilled_ptr.live |= REG_LIVE_READ; - state = parent; - parent = state->parent; - writes = true; - } -} - static int check_stack_read(struct bpf_verifier_env *env, struct bpf_func_state *reg_state /* func where register points to */, int off, int size, int value_regno) @@ -1198,8 +1093,8 @@ static int check_stack_read(struct bpf_verifier_env *env, */ state->regs[value_regno].live |= REG_LIVE_WRITTEN; } - mark_stack_slot_read(env, vstate, vstate->parent, spi, - reg_state->frameno); + mark_reg_read(env, ®_state->stack[spi].spilled_ptr, + reg_state->stack[spi].spilled_ptr.parent); return 0; } else { int zeros = 0; @@ -1215,8 +1110,8 @@ static int check_stack_read(struct bpf_verifier_env *env, off, i, size); return -EACCES; } - mark_stack_slot_read(env, vstate, vstate->parent, spi, - reg_state->frameno); + mark_reg_read(env, ®_state->stack[spi].spilled_ptr, + reg_state->stack[spi].spilled_ptr.parent); if (value_regno >= 0) { if (zeros == size) { /* any size read into register is zero extended, @@ -1908,8 +1803,8 @@ mark: /* reading any byte out of 8-byte 'spill_slot' will cause * the whole slot to be marked as 'read' */ - mark_stack_slot_read(env, env->cur_state, env->cur_state->parent, - spi, state->frameno); + mark_reg_read(env, &state->stack[spi].spilled_ptr, + state->stack[spi].spilled_ptr.parent); } return update_stack_depth(env, state, off); } @@ -2366,11 +2261,13 @@ static int check_func_call(struct bpf_verifier_env *env, struct bpf_insn *insn, state->curframe + 1 /* frameno within this callchain */, subprog /* subprog number within this prog */); - /* copy r1 - r5 args that callee can access */ + /* copy r1 - r5 args that callee can access. The copy includes parent + * pointers, which connects us up to the liveness chain + */ for (i = BPF_REG_1; i <= BPF_REG_5; i++) callee->regs[i] = caller->regs[i]; - /* after the call regsiters r0 - r5 were scratched */ + /* after the call registers r0 - r5 were scratched */ for (i = 0; i < CALLER_SAVED_REGS; i++) { mark_reg_not_init(env, caller->regs, caller_saved[i]); check_reg_arg(env, caller_saved[i], DST_OP_NO_MARK); @@ -4370,7 +4267,7 @@ static bool regsafe(struct bpf_reg_state *rold, struct bpf_reg_state *rcur, /* explored state didn't use this */ return true; - equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, frameno)) == 0; + equal = memcmp(rold, rcur, offsetof(struct bpf_reg_state, parent)) == 0; if (rold->type == PTR_TO_STACK) /* two stack pointers are equal only if they're pointing to @@ -4603,7 +4500,7 @@ static bool states_equal(struct bpf_verifier_env *env, * equivalent state (jump target or such) we didn't arrive by the straight-line * code, so read marks in the state must propagate to the parent regardless * of the state's write marks. That's what 'parent == state->parent' comparison - * in mark_reg_read() and mark_stack_slot_read() is for. + * in mark_reg_read() is for. */ static int propagate_liveness(struct bpf_verifier_env *env, const struct bpf_verifier_state *vstate, @@ -4624,7 +4521,8 @@ static int propagate_liveness(struct bpf_verifier_env *env, if (vparent->frame[vparent->curframe]->regs[i].live & REG_LIVE_READ) continue; if (vstate->frame[vstate->curframe]->regs[i].live & REG_LIVE_READ) { - err = mark_reg_read(env, vstate, vparent, i); + err = mark_reg_read(env, &vstate->frame[vstate->curframe]->regs[i], + &vparent->frame[vstate->curframe]->regs[i]); if (err) return err; } @@ -4639,7 +4537,8 @@ static int propagate_liveness(struct bpf_verifier_env *env, if (parent->stack[i].spilled_ptr.live & REG_LIVE_READ) continue; if (state->stack[i].spilled_ptr.live & REG_LIVE_READ) - mark_stack_slot_read(env, vstate, vparent, i, frame); + mark_reg_read(env, &state->stack[i].spilled_ptr, + &parent->stack[i].spilled_ptr); } } return err; @@ -4649,7 +4548,7 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) { struct bpf_verifier_state_list *new_sl; struct bpf_verifier_state_list *sl; - struct bpf_verifier_state *cur = env->cur_state; + struct bpf_verifier_state *cur = env->cur_state, *new; int i, j, err; sl = env->explored_states[insn_idx]; @@ -4691,16 +4590,18 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) return -ENOMEM; /* add new state to the head of linked list */ - err = copy_verifier_state(&new_sl->state, cur); + new = &new_sl->state; + err = copy_verifier_state(new, cur); if (err) { - free_verifier_state(&new_sl->state, false); + free_verifier_state(new, false); kfree(new_sl); return err; } new_sl->next = env->explored_states[insn_idx]; env->explored_states[insn_idx] = new_sl; /* connect new state to parentage chain */ - cur->parent = &new_sl->state; + for (i = 0; i < BPF_REG_FP; i++) + cur_regs(env)[i].parent = &new->frame[new->curframe]->regs[i]; /* clear write marks in current state: the writes we did are not writes * our child did, so they don't screen off its reads from us. * (There are no read marks in current state, because reads always mark @@ -4713,9 +4614,13 @@ static int is_state_visited(struct bpf_verifier_env *env, int insn_idx) /* all stack frames are accessible from callee, clear them all */ for (j = 0; j <= cur->curframe; j++) { struct bpf_func_state *frame = cur->frame[j]; + struct bpf_func_state *newframe = new->frame[j]; - for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) + for (i = 0; i < frame->allocated_stack / BPF_REG_SIZE; i++) { frame->stack[i].spilled_ptr.live = REG_LIVE_NONE; + frame->stack[i].spilled_ptr.parent = + &newframe->stack[i].spilled_ptr; + } } return 0; } @@ -4734,7 +4639,6 @@ static int do_check(struct bpf_verifier_env *env) if (!state) return -ENOMEM; state->curframe = 0; - state->parent = NULL; state->frame[0] = kzalloc(sizeof(struct bpf_func_state), GFP_KERNEL); if (!state->frame[0]) { kfree(state); -- cgit v1.2.3 From 93ee30f3e8b412c5fc2d2f7d9d002529d9a209ad Mon Sep 17 00:00:00 2001 From: Magnus Karlsson Date: Fri, 31 Aug 2018 13:40:02 +0200 Subject: xsk: i40e: get rid of useless struct xdp_umem_props This commit gets rid of the structure xdp_umem_props. It was there to be able to break a dependency at one point, but this is no longer needed. The values in the struct are instead stored directly in the xdp_umem structure. This simplifies the xsk code as well as af_xdp zero-copy drivers and as a bonus gets rid of one internal header file. The i40e driver is also adapted to the new interface in this commit. Signed-off-by: Magnus Karlsson Signed-off-by: Daniel Borkmann --- drivers/net/ethernet/intel/i40e/i40e_xsk.c | 4 ++-- include/net/xdp_sock.h | 8 ++------ net/xdp/xdp_umem.c | 4 ++-- net/xdp/xdp_umem_props.h | 14 -------------- net/xdp/xsk.c | 10 ++++++---- net/xdp/xsk_queue.c | 5 +++-- net/xdp/xsk_queue.h | 13 +++++++------ 7 files changed, 22 insertions(+), 36 deletions(-) delete mode 100644 net/xdp/xdp_umem_props.h (limited to 'include') diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c index 41ca7e1310bc..2ebfc78bbd09 100644 --- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c +++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c @@ -442,7 +442,7 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring, struct i40e_rx_buffer *old_bi) { struct i40e_rx_buffer *new_bi = &rx_ring->rx_bi[rx_ring->next_to_alloc]; - unsigned long mask = (unsigned long)rx_ring->xsk_umem->props.chunk_mask; + unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask; u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM; u16 nta = rx_ring->next_to_alloc; @@ -477,7 +477,7 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle) rx_ring = container_of(alloc, struct i40e_ring, zca); hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM; - mask = rx_ring->xsk_umem->props.chunk_mask; + mask = rx_ring->xsk_umem->chunk_mask; nta = rx_ring->next_to_alloc; bi = &rx_ring->rx_bi[nta]; diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h index 56994ad1ab40..932ca0dad6f3 100644 --- a/include/net/xdp_sock.h +++ b/include/net/xdp_sock.h @@ -16,11 +16,6 @@ struct net_device; struct xsk_queue; -struct xdp_umem_props { - u64 chunk_mask; - u64 size; -}; - struct xdp_umem_page { void *addr; dma_addr_t dma; @@ -30,7 +25,8 @@ struct xdp_umem { struct xsk_queue *fq; struct xsk_queue *cq; struct xdp_umem_page *pages; - struct xdp_umem_props props; + u64 chunk_mask; + u64 size; u32 headroom; u32 chunk_size_nohr; struct user_struct *user; diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c index d179732617dc..b3b632c5aeae 100644 --- a/net/xdp/xdp_umem.c +++ b/net/xdp/xdp_umem.c @@ -312,8 +312,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr) umem->pid = get_task_pid(current, PIDTYPE_PID); umem->address = (unsigned long)addr; - umem->props.chunk_mask = ~((u64)chunk_size - 1); - umem->props.size = size; + umem->chunk_mask = ~((u64)chunk_size - 1); + umem->size = size; umem->headroom = headroom; umem->chunk_size_nohr = chunk_size - headroom; umem->npgs = size / PAGE_SIZE; diff --git a/net/xdp/xdp_umem_props.h b/net/xdp/xdp_umem_props.h deleted file mode 100644 index 40eab10dfc49..000000000000 --- a/net/xdp/xdp_umem_props.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* XDP user-space packet buffer - * Copyright(c) 2018 Intel Corporation. - */ - -#ifndef XDP_UMEM_PROPS_H_ -#define XDP_UMEM_PROPS_H_ - -struct xdp_umem_props { - u64 chunk_mask; - u64 size; -}; - -#endif /* XDP_UMEM_PROPS_H_ */ diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c index 569048e299df..5a432dfee4ee 100644 --- a/net/xdp/xsk.c +++ b/net/xdp/xsk.c @@ -470,8 +470,10 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) goto out_unlock; } else { /* This xsk has its own umem. */ - xskq_set_umem(xs->umem->fq, &xs->umem->props); - xskq_set_umem(xs->umem->cq, &xs->umem->props); + xskq_set_umem(xs->umem->fq, xs->umem->size, + xs->umem->chunk_mask); + xskq_set_umem(xs->umem->cq, xs->umem->size, + xs->umem->chunk_mask); err = xdp_umem_assign_dev(xs->umem, dev, qid, flags); if (err) @@ -481,8 +483,8 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len) xs->dev = dev; xs->zc = xs->umem->zc; xs->queue_id = qid; - xskq_set_umem(xs->rx, &xs->umem->props); - xskq_set_umem(xs->tx, &xs->umem->props); + xskq_set_umem(xs->rx, xs->umem->size, xs->umem->chunk_mask); + xskq_set_umem(xs->tx, xs->umem->size, xs->umem->chunk_mask); xdp_add_sk_umem(xs->umem, xs); out_unlock: diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c index 6c32e92e98fc..2dc1384d9f27 100644 --- a/net/xdp/xsk_queue.c +++ b/net/xdp/xsk_queue.c @@ -7,12 +7,13 @@ #include "xsk_queue.h" -void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props) +void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask) { if (!q) return; - q->umem_props = *umem_props; + q->size = size; + q->chunk_mask = chunk_mask; } static u32 xskq_umem_get_ring_size(struct xsk_queue *q) diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h index 8a64b150be54..82252cccb4e0 100644 --- a/net/xdp/xsk_queue.h +++ b/net/xdp/xsk_queue.h @@ -31,7 +31,8 @@ struct xdp_umem_ring { }; struct xsk_queue { - struct xdp_umem_props umem_props; + u64 chunk_mask; + u64 size; u32 ring_mask; u32 nentries; u32 prod_head; @@ -78,7 +79,7 @@ static inline u32 xskq_nb_free(struct xsk_queue *q, u32 producer, u32 dcnt) static inline bool xskq_is_valid_addr(struct xsk_queue *q, u64 addr) { - if (addr >= q->umem_props.size) { + if (addr >= q->size) { q->invalid_descs++; return false; } @@ -92,7 +93,7 @@ static inline u64 *xskq_validate_addr(struct xsk_queue *q, u64 *addr) struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring; unsigned int idx = q->cons_tail & q->ring_mask; - *addr = READ_ONCE(ring->desc[idx]) & q->umem_props.chunk_mask; + *addr = READ_ONCE(ring->desc[idx]) & q->chunk_mask; if (xskq_is_valid_addr(q, *addr)) return addr; @@ -173,8 +174,8 @@ static inline bool xskq_is_valid_desc(struct xsk_queue *q, struct xdp_desc *d) if (!xskq_is_valid_addr(q, d->addr)) return false; - if (((d->addr + d->len) & q->umem_props.chunk_mask) != - (d->addr & q->umem_props.chunk_mask)) { + if (((d->addr + d->len) & q->chunk_mask) != + (d->addr & q->chunk_mask)) { q->invalid_descs++; return false; } @@ -253,7 +254,7 @@ static inline bool xskq_empty_desc(struct xsk_queue *q) return xskq_nb_free(q, q->prod_tail, q->nentries) == q->nentries; } -void xskq_set_umem(struct xsk_queue *q, struct xdp_umem_props *umem_props); +void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask); struct xsk_queue *xskq_create(u32 nentries, bool umem_queue); void xskq_destroy(struct xsk_queue *q_ops); -- cgit v1.2.3