From 7b4ccb3c466f62bbf2f4dd5d6a143d945a6f3051 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 25 Apr 2017 19:36:25 +0200 Subject: soc: renesas: Provide dummy rcar_rst_read_mode_pins() for compile-testing If the R-Car RST driver is not included, compile-testing R-Car clock drivers fails with a link error: undefined reference to `rcar_rst_read_mode_pins' To fix this, provide a dummy version. Use the exact same test logic as in drivers/soc/renesas/Makefile, as there is no Kconfig symbol (yet) to control compilation of the R-Car RST driver. Fixes: 527c02f66d263d2e ("soc: renesas: Add R-Car RST driver") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- include/linux/soc/renesas/rcar-rst.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h index a18e0783946b..787e7ad53d45 100644 --- a/include/linux/soc/renesas/rcar-rst.h +++ b/include/linux/soc/renesas/rcar-rst.h @@ -1,6 +1,11 @@ #ifndef __LINUX_SOC_RENESAS_RCAR_RST_H__ #define __LINUX_SOC_RENESAS_RCAR_RST_H__ +#if defined(CONFIG_ARCH_RCAR_GEN1) || defined(CONFIG_ARCH_RCAR_GEN2) || \ + defined(CONFIG_ARCH_R8A7795) || defined(CONFIG_ARCH_R8A7796) int rcar_rst_read_mode_pins(u32 *mode); +#else +static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; } +#endif #endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */ -- cgit v1.2.3 From df3ed932394488e57e72dd0e73c224d1804fdc8f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 11 May 2017 10:15:10 -0500 Subject: Partially Revert "of: fix sparse warnings in fdt, irq, reserved mem, and resolver code" A change to function pointers that was meant to address a sparse warning turned out to cause hundreds of new gcc-7 warnings: include/linux/of_irq.h:11:13: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers] drivers/of/of_reserved_mem.c: In function '__reserved_mem_init_node': drivers/of/of_reserved_mem.c:200:7: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers] int const (*initfn)(struct reserved_mem *rmem) = i->data; Turns out the sparse warnings were spurious and have been fixed in upstream sparse since 0.5.0 in commit "sparse: treat function pointers as pointers to const data". This partially reverts commit 17a70355ea576843a7ac851f1db26872a50b2850. Fixes: 17a70355ea57 ("of: fix sparse warnings in fdt, irq, reserved mem, and resolver code") Reported-by: Arnd Bergmann Signed-off-by: Rob Herring --- drivers/of/of_reserved_mem.c | 2 +- include/linux/of_irq.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/of/of_reserved_mem.c b/drivers/of/of_reserved_mem.c index 4dec07ea510f..d507c3569a88 100644 --- a/drivers/of/of_reserved_mem.c +++ b/drivers/of/of_reserved_mem.c @@ -197,7 +197,7 @@ static int __init __reserved_mem_init_node(struct reserved_mem *rmem) const struct of_device_id *i; for (i = __reservedmem_of_table; i < &__rmem_of_table_sentinel; i++) { - int const (*initfn)(struct reserved_mem *rmem) = i->data; + reservedmem_of_init_fn initfn = i->data; const char *compat = i->compatible; if (!of_flat_dt_is_compatible(rmem->fdt_node, compat)) diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index ec6b11deb773..1e0deb8e8494 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -8,7 +8,7 @@ #include #include -typedef int const (*of_irq_init_cb_t)(struct device_node *, struct device_node *); +typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *); /* * Workarounds only applied to 32bit powermac machines -- cgit v1.2.3 From d1174416747d790d750742d0514915deeed93acf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 May 2017 11:22:52 -0700 Subject: bpf: Track alignment of register values in the verifier. Currently if we add only constant values to pointers we can fully validate the alignment, and properly check if we need to reject the program on !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS architectures. However, once an unknown value is introduced we only allow byte sized memory accesses which is too restrictive. Add logic to track the known minimum alignment of register values, and propagate this state into registers containing pointers. The most common paradigm that makes use of this new logic is computing the transport header using the IP header length field. For example: struct ethhdr *ep = skb->data; struct iphdr *iph = (struct iphdr *) (ep + 1); struct tcphdr *th; ... n = iph->ihl; th = ((void *)iph + (n * 4)); port = th->dest; The existing code will reject the load of th->dest because it cannot validate that the alignment is at least 2 once "n * 4" is added the the packet pointer. In the new code, the register holding "n * 4" will have a reg->min_align value of 4, because any value multiplied by 4 will be at least 4 byte aligned. (actually, the eBPF code emitted by the compiler in this case is most likely to use a shift left by 2, but the end result is identical) At the critical addition: th = ((void *)iph + (n * 4)); The register holding 'th' will start with reg->off value of 14. The pointer addition will transform that reg into something that looks like: reg->aux_off = 14 reg->aux_off_align = 4 Next, the verifier will look at the th->dest load, and it will see a load offset of 2, and first check: if (reg->aux_off_align % size) which will pass because aux_off_align is 4. reg_off will be computed: reg_off = reg->off; ... reg_off += reg->aux_off; plus we have off==2, and it will thus check: if ((NET_IP_ALIGN + reg_off + off) % size != 0) which evaluates to: if ((NET_IP_ALIGN + 14 + 2) % size != 0) On strict alignment architectures, NET_IP_ALIGN is 2, thus: if ((2 + 14 + 2) % size != 0) which passes. These pointer transformations and checks work regardless of whether the constant offset or the variable with known alignment is added first to the pointer register. Signed-off-by: David S. Miller Acked-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 3 ++ kernel/bpf/verifier.c | 108 +++++++++++++++++++++++++++++++++++-------- 2 files changed, 92 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5efb4db44e1e..7c6a51924afc 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -40,6 +40,9 @@ struct bpf_reg_state { */ s64 min_value; u64 max_value; + u32 min_align; + u32 aux_off; + u32 aux_off_align; }; enum bpf_stack_slot_type { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c5b56c92f8e2..cc7b626fa447 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -241,6 +241,12 @@ static void print_verifier_state(struct bpf_verifier_state *state) if (reg->max_value != BPF_REGISTER_MAX_RANGE) verbose(",max_value=%llu", (unsigned long long)reg->max_value); + if (reg->min_align) + verbose(",min_align=%u", reg->min_align); + if (reg->aux_off) + verbose(",aux_off=%u", reg->aux_off); + if (reg->aux_off_align) + verbose(",aux_off_align=%u", reg->aux_off_align); } for (i = 0; i < MAX_BPF_STACK; i += BPF_REG_SIZE) { if (state->stack_slot_type[i] == STACK_SPILL) @@ -466,6 +472,9 @@ static void init_reg_state(struct bpf_reg_state *regs) regs[i].imm = 0; regs[i].min_value = BPF_REGISTER_MIN_RANGE; regs[i].max_value = BPF_REGISTER_MAX_RANGE; + regs[i].min_align = 0; + regs[i].aux_off = 0; + regs[i].aux_off_align = 0; } /* frame pointer */ @@ -492,6 +501,7 @@ static void reset_reg_range_values(struct bpf_reg_state *regs, u32 regno) { regs[regno].min_value = BPF_REGISTER_MIN_RANGE; regs[regno].max_value = BPF_REGISTER_MAX_RANGE; + regs[regno].min_align = 0; } static void mark_reg_unknown_value_and_range(struct bpf_reg_state *regs, @@ -779,17 +789,28 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) } static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, - int off, int size) + int off, int size, bool strict) { - if (reg->id && size != 1) { - verbose("Unknown alignment. Only byte-sized access allowed in packet access.\n"); - return -EACCES; + int reg_off; + + /* Byte size accesses are always allowed. */ + if (!strict || size == 1) + return 0; + + reg_off = reg->off; + if (reg->id) { + if (reg->aux_off_align % size) { + verbose("Packet access is only %u byte aligned, %d byte access not allowed\n", + reg->aux_off_align, size); + return -EACCES; + } + reg_off += reg->aux_off; } /* skb->data is NET_IP_ALIGN-ed */ - if ((NET_IP_ALIGN + reg->off + off) % size != 0) { + if ((NET_IP_ALIGN + reg_off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", - NET_IP_ALIGN, reg->off, off, size); + NET_IP_ALIGN, reg_off, off, size); return -EACCES; } @@ -797,9 +818,9 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, } static int check_val_ptr_alignment(const struct bpf_reg_state *reg, - int size) + int size, bool strict) { - if (size != 1) { + if (strict && size != 1) { verbose("Unknown alignment. Only byte-sized access allowed in value access.\n"); return -EACCES; } @@ -810,13 +831,16 @@ static int check_val_ptr_alignment(const struct bpf_reg_state *reg, static int check_ptr_alignment(const struct bpf_reg_state *reg, int off, int size) { + bool strict = false; + + if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) + strict = true; + switch (reg->type) { case PTR_TO_PACKET: - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : - check_pkt_ptr_alignment(reg, off, size); + return check_pkt_ptr_alignment(reg, off, size, strict); case PTR_TO_MAP_VALUE_ADJ: - return IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) ? 0 : - check_val_ptr_alignment(reg, size); + return check_val_ptr_alignment(reg, size, strict); default: if (off % size != 0) { verbose("misaligned access off %d size %d\n", @@ -883,6 +907,8 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, value_regno); /* note that reg.[id|off|range] == 0 */ state->regs[value_regno].type = reg_type; + state->regs[value_regno].aux_off = 0; + state->regs[value_regno].aux_off_align = 0; } } else if (reg->type == FRAME_PTR || reg->type == PTR_TO_STACK) { @@ -1455,6 +1481,8 @@ add_imm: */ dst_reg->off += imm; } else { + bool had_id; + if (src_reg->type == PTR_TO_PACKET) { /* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */ tmp_reg = *dst_reg; /* save r7 state */ @@ -1488,14 +1516,23 @@ add_imm: src_reg->imm); return -EACCES; } + + had_id = (dst_reg->id != 0); + /* dst_reg stays as pkt_ptr type and since some positive * integer value was added to the pointer, increment its 'id' */ dst_reg->id = ++env->id_gen; - /* something was added to pkt_ptr, set range and off to zero */ + /* something was added to pkt_ptr, set range to zero */ + dst_reg->aux_off = dst_reg->off; dst_reg->off = 0; dst_reg->range = 0; + if (had_id) + dst_reg->aux_off_align = min(dst_reg->aux_off_align, + src_reg->min_align); + else + dst_reg->aux_off_align = src_reg->min_align; } return 0; } @@ -1669,6 +1706,13 @@ static void check_reg_overflow(struct bpf_reg_state *reg) reg->min_value = BPF_REGISTER_MIN_RANGE; } +static u32 calc_align(u32 imm) +{ + if (!imm) + return 1U << 31; + return imm - ((imm - 1) & imm); +} + static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, struct bpf_insn *insn) { @@ -1676,8 +1720,10 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, s64 min_val = BPF_REGISTER_MIN_RANGE; u64 max_val = BPF_REGISTER_MAX_RANGE; u8 opcode = BPF_OP(insn->code); + u32 dst_align, src_align; dst_reg = ®s[insn->dst_reg]; + src_align = 0; if (BPF_SRC(insn->code) == BPF_X) { check_reg_overflow(®s[insn->src_reg]); min_val = regs[insn->src_reg].min_value; @@ -1693,12 +1739,18 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, regs[insn->src_reg].type != UNKNOWN_VALUE) { min_val = BPF_REGISTER_MIN_RANGE; max_val = BPF_REGISTER_MAX_RANGE; + src_align = 0; + } else { + src_align = regs[insn->src_reg].min_align; } } else if (insn->imm < BPF_REGISTER_MAX_RANGE && (s64)insn->imm > BPF_REGISTER_MIN_RANGE) { min_val = max_val = insn->imm; + src_align = calc_align(insn->imm); } + dst_align = dst_reg->min_align; + /* We don't know anything about what was done to this register, mark it * as unknown. */ @@ -1723,18 +1775,21 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, dst_reg->min_value += min_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value += max_val; + dst_reg->min_align = min(src_align, dst_align); break; case BPF_SUB: if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) dst_reg->min_value -= min_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value -= max_val; + dst_reg->min_align = min(src_align, dst_align); break; case BPF_MUL: if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) dst_reg->min_value *= min_val; if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value *= max_val; + dst_reg->min_align = max(src_align, dst_align); break; case BPF_AND: /* Disallow AND'ing of negative numbers, ain't nobody got time @@ -1746,17 +1801,23 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, else dst_reg->min_value = 0; dst_reg->max_value = max_val; + dst_reg->min_align = max(src_align, dst_align); break; case BPF_LSH: /* Gotta have special overflow logic here, if we're shifting * more than MAX_RANGE then just assume we have an invalid * range. */ - if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) + if (min_val > ilog2(BPF_REGISTER_MAX_RANGE)) { dst_reg->min_value = BPF_REGISTER_MIN_RANGE; - else if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) - dst_reg->min_value <<= min_val; - + dst_reg->min_align = 1; + } else { + if (dst_reg->min_value != BPF_REGISTER_MIN_RANGE) + dst_reg->min_value <<= min_val; + if (!dst_reg->min_align) + dst_reg->min_align = 1; + dst_reg->min_align <<= min_val; + } if (max_val > ilog2(BPF_REGISTER_MAX_RANGE)) dst_reg->max_value = BPF_REGISTER_MAX_RANGE; else if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) @@ -1766,11 +1827,19 @@ static void adjust_reg_min_max_vals(struct bpf_verifier_env *env, /* RSH by a negative number is undefined, and the BPF_RSH is an * unsigned shift, so make the appropriate casts. */ - if (min_val < 0 || dst_reg->min_value < 0) + if (min_val < 0 || dst_reg->min_value < 0) { dst_reg->min_value = BPF_REGISTER_MIN_RANGE; - else + } else { dst_reg->min_value = (u64)(dst_reg->min_value) >> min_val; + } + if (min_val < 0) { + dst_reg->min_align = 1; + } else { + dst_reg->min_align >>= (u64) min_val; + if (!dst_reg->min_align) + dst_reg->min_align = 1; + } if (dst_reg->max_value != BPF_REGISTER_MAX_RANGE) dst_reg->max_value >>= max_val; break; @@ -1872,6 +1941,7 @@ static int check_alu_op(struct bpf_verifier_env *env, struct bpf_insn *insn) regs[insn->dst_reg].imm = insn->imm; regs[insn->dst_reg].max_value = insn->imm; regs[insn->dst_reg].min_value = insn->imm; + regs[insn->dst_reg].min_align = calc_align(insn->imm); } } else if (opcode > BPF_END) { -- cgit v1.2.3 From e07b98d9bffe410019dfcf62c3428d4a96c56a2c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 May 2017 11:38:07 -0700 Subject: bpf: Add strict alignment flag for BPF_PROG_LOAD. Add a new field, "prog_flags", and an initial flag value BPF_F_STRICT_ALIGNMENT. When set, the verifier will enforce strict pointer alignment regardless of the setting of CONFIG_EFFICIENT_UNALIGNED_ACCESS. The verifier, in this mode, will also use a fixed value of "2" in place of NET_IP_ALIGN. This facilitates test cases that will exercise and validate this part of the verifier even when run on architectures where alignment doesn't matter. Signed-off-by: David S. Miller Acked-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 1 + include/uapi/linux/bpf.h | 8 ++++++++ kernel/bpf/syscall.c | 5 ++++- kernel/bpf/verifier.c | 23 +++++++++++++++++------ tools/build/feature/test-bpf.c | 1 + tools/include/uapi/linux/bpf.h | 11 +++++++++-- 6 files changed, 40 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7c6a51924afc..d5093b52b485 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -90,6 +90,7 @@ struct bpf_verifier_env { struct bpf_prog *prog; /* eBPF program being verified */ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ + bool strict_alignment; /* perform strict pointer alignment checks */ struct bpf_verifier_state cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 945a1f5f63c5..94dfa9def355 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -132,6 +132,13 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -177,6 +184,7 @@ union bpf_attr { __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_flags; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index fd2411fd6914..265a0d854e33 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -783,7 +783,7 @@ struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type) EXPORT_SYMBOL_GPL(bpf_prog_get_type); /* last field in 'union bpf_attr' used by this command */ -#define BPF_PROG_LOAD_LAST_FIELD kern_version +#define BPF_PROG_LOAD_LAST_FIELD prog_flags static int bpf_prog_load(union bpf_attr *attr) { @@ -796,6 +796,9 @@ static int bpf_prog_load(union bpf_attr *attr) if (CHECK_ATTR(BPF_PROG_LOAD)) return -EINVAL; + if (attr->prog_flags & ~BPF_F_STRICT_ALIGNMENT) + return -EINVAL; + /* copy eBPF program license from user space */ if (strncpy_from_user(license, u64_to_user_ptr(attr->license), sizeof(license) - 1) < 0) diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ff2bfe1d656a..e74fb1b87855 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -791,6 +791,7 @@ static bool is_pointer_value(struct bpf_verifier_env *env, int regno) static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, int off, int size, bool strict) { + int ip_align; int reg_off; /* Byte size accesses are always allowed. */ @@ -807,10 +808,14 @@ static int check_pkt_ptr_alignment(const struct bpf_reg_state *reg, reg_off += reg->aux_off; } - /* skb->data is NET_IP_ALIGN-ed */ - if ((NET_IP_ALIGN + reg_off + off) % size != 0) { + /* skb->data is NET_IP_ALIGN-ed, but for strict alignment checking + * we force this to 2 which is universally what architectures use + * when they don't set CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS. + */ + ip_align = strict ? 2 : NET_IP_ALIGN; + if ((ip_align + reg_off + off) % size != 0) { verbose("misaligned packet access off %d+%d+%d size %d\n", - NET_IP_ALIGN, reg_off, off, size); + ip_align, reg_off, off, size); return -EACCES; } @@ -828,10 +833,11 @@ static int check_val_ptr_alignment(const struct bpf_reg_state *reg, return 0; } -static int check_ptr_alignment(const struct bpf_reg_state *reg, +static int check_ptr_alignment(struct bpf_verifier_env *env, + const struct bpf_reg_state *reg, int off, int size) { - bool strict = false; + bool strict = env->strict_alignment; if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS)) strict = true; @@ -873,7 +879,7 @@ static int check_mem_access(struct bpf_verifier_env *env, u32 regno, int off, if (size < 0) return size; - err = check_ptr_alignment(reg, off, size); + err = check_ptr_alignment(env, reg, off, size); if (err) return err; @@ -3568,6 +3574,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr) } else { log_level = 0; } + if (attr->prog_flags & BPF_F_STRICT_ALIGNMENT) + env->strict_alignment = true; + else + env->strict_alignment = false; ret = replace_map_fd_with_map_ptr(env); if (ret < 0) @@ -3673,6 +3683,7 @@ int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, mutex_lock(&bpf_verifier_lock); log_level = 0; + env->strict_alignment = false; env->explored_states = kcalloc(env->prog->len, sizeof(struct bpf_verifier_state_list *), diff --git a/tools/build/feature/test-bpf.c b/tools/build/feature/test-bpf.c index ebc6dceddb58..7598361ef1f1 100644 --- a/tools/build/feature/test-bpf.c +++ b/tools/build/feature/test-bpf.c @@ -29,6 +29,7 @@ int main(void) attr.log_size = 0; attr.log_level = 0; attr.kern_version = 0; + attr.prog_flags = 0; /* * Test existence of __NR_bpf and BPF_PROG_LOAD. diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e553529929f6..94dfa9def355 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -132,6 +132,13 @@ enum bpf_attach_type { */ #define BPF_F_ALLOW_OVERRIDE (1U << 0) +/* If BPF_F_STRICT_ALIGNMENT is used in BPF_PROG_LOAD command, the + * verifier will perform strict alignment checking as if the kernel + * has been built with CONFIG_EFFICIENT_UNALIGNED_ACCESS not set, + * and NET_IP_ALIGN defined to 2. + */ +#define BPF_F_STRICT_ALIGNMENT (1U << 0) + #define BPF_PSEUDO_MAP_FD 1 /* flags for BPF_MAP_UPDATE_ELEM command */ @@ -177,6 +184,7 @@ union bpf_attr { __u32 log_size; /* size of user buffer */ __aligned_u64 log_buf; /* user supplied buffer */ __u32 kern_version; /* checked when prog_type=kprobe */ + __u32 prog_flags; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -481,8 +489,7 @@ union bpf_attr { * u32 bpf_get_socket_uid(skb) * Get the owner uid of the socket stored inside sk_buff. * @skb: pointer to skb - * Return: uid of the socket owner on success or 0 if the socket pointer - * inside sk_buff is NULL + * Return: uid of the socket owner on success or overflowuid if failed. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ -- cgit v1.2.3 From 0489df9a430e9607de8130a6bc4bf4c02f96eaf1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 12 May 2017 01:04:45 +0200 Subject: xdp: add flag to enforce driver mode After commit b5cdae3291f7 ("net: Generic XDP") we automatically fall back to a generic XDP variant if the driver does not support native XDP. Allow for an option where the user can specify that always the native XDP variant should be selected and in case it's not supported by a driver, just bail out. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/if_link.h | 6 ++++-- net/core/dev.c | 2 ++ net/core/rtnetlink.c | 5 +++++ samples/bpf/xdp1_user.c | 8 ++++++-- samples/bpf/xdp_tx_iptunnel_user.c | 7 ++++++- 5 files changed, 23 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 8e56ac70e0d1..549ac8a98b44 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -888,9 +888,11 @@ enum { /* XDP section */ #define XDP_FLAGS_UPDATE_IF_NOEXIST (1U << 0) -#define XDP_FLAGS_SKB_MODE (2U << 0) +#define XDP_FLAGS_SKB_MODE (1U << 1) +#define XDP_FLAGS_DRV_MODE (1U << 2) #define XDP_FLAGS_MASK (XDP_FLAGS_UPDATE_IF_NOEXIST | \ - XDP_FLAGS_SKB_MODE) + XDP_FLAGS_SKB_MODE | \ + XDP_FLAGS_DRV_MODE) enum { IFLA_XDP_UNSPEC, diff --git a/net/core/dev.c b/net/core/dev.c index 96cf83da0d66..e56cb71351d4 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6873,6 +6873,8 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, ASSERT_RTNL(); xdp_op = ops->ndo_xdp; + if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE)) + return -EOPNOTSUPP; if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) xdp_op = generic_xdp_install; diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index bcb0f610ee42..dda9f1636356 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -2199,6 +2199,11 @@ static int do_setlink(const struct sk_buff *skb, err = -EINVAL; goto errout; } + if ((xdp_flags & XDP_FLAGS_SKB_MODE) && + (xdp_flags & XDP_FLAGS_DRV_MODE)) { + err = -EINVAL; + goto errout; + } } if (xdp[IFLA_XDP_FD]) { diff --git a/samples/bpf/xdp1_user.c b/samples/bpf/xdp1_user.c index 378850c70eb8..17be9ea3ecb2 100644 --- a/samples/bpf/xdp1_user.c +++ b/samples/bpf/xdp1_user.c @@ -62,13 +62,14 @@ static void usage(const char *prog) fprintf(stderr, "usage: %s [OPTS] IFINDEX\n\n" "OPTS:\n" - " -S use skb-mode\n", + " -S use skb-mode\n" + " -N enforce native mode\n", prog); } int main(int argc, char **argv) { - const char *optstr = "S"; + const char *optstr = "SN"; char filename[256]; int opt; @@ -77,6 +78,9 @@ int main(int argc, char **argv) case 'S': xdp_flags |= XDP_FLAGS_SKB_MODE; break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; default: usage(basename(argv[0])); return 1; diff --git a/samples/bpf/xdp_tx_iptunnel_user.c b/samples/bpf/xdp_tx_iptunnel_user.c index 92b8bde9337c..631cdcc41c97 100644 --- a/samples/bpf/xdp_tx_iptunnel_user.c +++ b/samples/bpf/xdp_tx_iptunnel_user.c @@ -79,6 +79,8 @@ static void usage(const char *cmd) printf(" -m Used in sending the IP Tunneled pkt\n"); printf(" -T Default: 0 (forever)\n"); printf(" -P Default is TCP\n"); + printf(" -S use skb-mode\n"); + printf(" -N enforce native mode\n"); printf(" -h Display this help\n"); } @@ -138,7 +140,7 @@ int main(int argc, char **argv) { unsigned char opt_flags[256] = {}; unsigned int kill_after_s = 0; - const char *optstr = "i:a:p:s:d:m:T:P:Sh"; + const char *optstr = "i:a:p:s:d:m:T:P:SNh"; int min_port = 0, max_port = 0; struct iptnl_info tnl = {}; struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY}; @@ -206,6 +208,9 @@ int main(int argc, char **argv) case 'S': xdp_flags |= XDP_FLAGS_SKB_MODE; break; + case 'N': + xdp_flags |= XDP_FLAGS_DRV_MODE; + break; default: usage(argv[0]); return 1; -- cgit v1.2.3 From d67b9cd28c1d7f82c2e5e727731ea7c89b23a0a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 12 May 2017 01:04:46 +0200 Subject: xdp: refine xdp api with regards to generic xdp While working on the iproute2 generic XDP frontend, I noticed that as of right now it's possible to have native *and* generic XDP programs loaded both at the same time for the case when a driver supports native XDP. The intended model for generic XDP from b5cdae3291f7 ("net: Generic XDP") is, however, that only one out of the two can be present at once which is also indicated as such in the XDP netlink dump part. The main rationale for generic XDP is to ease accessibility (in case a driver does not yet have XDP support) and to generically provide a semantical model as an example for driver developers wanting to add XDP support. The generic XDP option for an XDP aware driver can still be useful for comparing and testing both implementations. However, it is not intended to have a second XDP processing stage or layer with exactly the same functionality of the first native stage. Only reason could be to have a partial fallback for future XDP features that are not supported yet in the native implementation and we probably also shouldn't strive for such fallback and instead encourage native feature support in the first place. Given there's currently no such fallback issue or use case, lets not go there yet if we don't need to. Therefore, change semantics for loading XDP and bail out if the user tries to load a generic XDP program when a native one is present and vice versa. Another alternative to bailing out would be to handle the transition from one flavor to another gracefully, but that would require to bring the device down, exchange both types of programs, and bring it up again in order to avoid a tiny window where a packet could hit both hooks. Given this complicates the logic for just a debugging feature in the native case, I went with the simpler variant. For the dump, remove IFLA_XDP_FLAGS that was added with b5cdae3291f7 and reuse IFLA_XDP_ATTACHED for indicating the mode. Dumping all or just a subset of flags that were used for loading the XDP prog is suboptimal in the long run since not all flags are useful for dumping and if we start to reuse the same flag definitions for load and dump, then we'll waste bit space. What we really just want is to dump the mode for now. Current IFLA_XDP_ATTACHED semantics are: nothing was installed (0), a program is running at the native driver layer (1). Thus, add a mode that says that a program is running at generic XDP layer (2). Applications will handle this fine in that older binaries will just indicate that something is attached at XDP layer, effectively this is similar to IFLA_XDP_FLAGS attr that we would have had modulo the redundancy. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++++-- include/uapi/linux/if_link.h | 7 ++++++ net/core/dev.c | 55 +++++++++++++++++++++++++++++--------------- net/core/rtnetlink.c | 40 +++++++++++++++----------------- 4 files changed, 67 insertions(+), 43 deletions(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c23bd2efb56..3f39d27decf4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3296,11 +3296,15 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, u32 flags); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); + +typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp); +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, u32 flags); +bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op); + int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h index 549ac8a98b44..15ac20382aba 100644 --- a/include/uapi/linux/if_link.h +++ b/include/uapi/linux/if_link.h @@ -894,6 +894,13 @@ enum { XDP_FLAGS_SKB_MODE | \ XDP_FLAGS_DRV_MODE) +/* These are stored into IFLA_XDP_ATTACHED on dump. */ +enum { + XDP_ATTACHED_NONE = 0, + XDP_ATTACHED_DRV, + XDP_ATTACHED_SKB, +}; + enum { IFLA_XDP_UNSPEC, IFLA_XDP_FD, diff --git a/net/core/dev.c b/net/core/dev.c index e56cb71351d4..fca407b4a6ea 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -6852,6 +6852,32 @@ int dev_change_proto_down(struct net_device *dev, bool proto_down) } EXPORT_SYMBOL(dev_change_proto_down); +bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op) +{ + struct netdev_xdp xdp; + + memset(&xdp, 0, sizeof(xdp)); + xdp.command = XDP_QUERY_PROG; + + /* Query must always succeed. */ + WARN_ON(xdp_op(dev, &xdp) < 0); + return xdp.prog_attached; +} + +static int dev_xdp_install(struct net_device *dev, xdp_op_t xdp_op, + struct netlink_ext_ack *extack, + struct bpf_prog *prog) +{ + struct netdev_xdp xdp; + + memset(&xdp, 0, sizeof(xdp)); + xdp.command = XDP_SETUP_PROG; + xdp.extack = extack; + xdp.prog = prog; + + return xdp_op(dev, &xdp); +} + /** * dev_change_xdp_fd - set or clear a bpf program for a device rx path * @dev: device @@ -6864,43 +6890,34 @@ EXPORT_SYMBOL(dev_change_proto_down); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags) { - int (*xdp_op)(struct net_device *dev, struct netdev_xdp *xdp); const struct net_device_ops *ops = dev->netdev_ops; struct bpf_prog *prog = NULL; - struct netdev_xdp xdp; + xdp_op_t xdp_op, xdp_chk; int err; ASSERT_RTNL(); - xdp_op = ops->ndo_xdp; + xdp_op = xdp_chk = ops->ndo_xdp; if (!xdp_op && (flags & XDP_FLAGS_DRV_MODE)) return -EOPNOTSUPP; if (!xdp_op || (flags & XDP_FLAGS_SKB_MODE)) xdp_op = generic_xdp_install; + if (xdp_op == xdp_chk) + xdp_chk = generic_xdp_install; if (fd >= 0) { - if (flags & XDP_FLAGS_UPDATE_IF_NOEXIST) { - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_QUERY_PROG; - - err = xdp_op(dev, &xdp); - if (err < 0) - return err; - if (xdp.prog_attached) - return -EBUSY; - } + if (xdp_chk && __dev_xdp_attached(dev, xdp_chk)) + return -EEXIST; + if ((flags & XDP_FLAGS_UPDATE_IF_NOEXIST) && + __dev_xdp_attached(dev, xdp_op)) + return -EBUSY; prog = bpf_prog_get_type(fd, BPF_PROG_TYPE_XDP); if (IS_ERR(prog)) return PTR_ERR(prog); } - memset(&xdp, 0, sizeof(xdp)); - xdp.command = XDP_SETUP_PROG; - xdp.extack = extack; - xdp.prog = prog; - - err = xdp_op(dev, &xdp); + err = dev_xdp_install(dev, xdp_op, extack, prog); if (err < 0 && prog) bpf_prog_put(prog); diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c index dda9f1636356..d7f82c3450b1 100644 --- a/net/core/rtnetlink.c +++ b/net/core/rtnetlink.c @@ -899,8 +899,7 @@ static size_t rtnl_port_size(const struct net_device *dev, static size_t rtnl_xdp_size(void) { size_t xdp_size = nla_total_size(0) + /* nest IFLA_XDP */ - nla_total_size(1) + /* XDP_ATTACHED */ - nla_total_size(4); /* XDP_FLAGS */ + nla_total_size(1); /* XDP_ATTACHED */ return xdp_size; } @@ -1247,37 +1246,34 @@ static int rtnl_fill_link_ifmap(struct sk_buff *skb, struct net_device *dev) return 0; } +static u8 rtnl_xdp_attached_mode(struct net_device *dev) +{ + const struct net_device_ops *ops = dev->netdev_ops; + + ASSERT_RTNL(); + + if (rcu_access_pointer(dev->xdp_prog)) + return XDP_ATTACHED_SKB; + if (ops->ndo_xdp && __dev_xdp_attached(dev, ops->ndo_xdp)) + return XDP_ATTACHED_DRV; + + return XDP_ATTACHED_NONE; +} + static int rtnl_xdp_fill(struct sk_buff *skb, struct net_device *dev) { struct nlattr *xdp; - u32 xdp_flags = 0; - u8 val = 0; int err; xdp = nla_nest_start(skb, IFLA_XDP); if (!xdp) return -EMSGSIZE; - if (rcu_access_pointer(dev->xdp_prog)) { - xdp_flags = XDP_FLAGS_SKB_MODE; - val = 1; - } else if (dev->netdev_ops->ndo_xdp) { - struct netdev_xdp xdp_op = {}; - - xdp_op.command = XDP_QUERY_PROG; - err = dev->netdev_ops->ndo_xdp(dev, &xdp_op); - if (err) - goto err_cancel; - val = xdp_op.prog_attached; - } - err = nla_put_u8(skb, IFLA_XDP_ATTACHED, val); + + err = nla_put_u8(skb, IFLA_XDP_ATTACHED, + rtnl_xdp_attached_mode(dev)); if (err) goto err_cancel; - if (xdp_flags) { - err = nla_put_u32(skb, IFLA_XDP_FLAGS, xdp_flags); - if (err) - goto err_cancel; - } nla_nest_end(skb, xdp); return 0; -- cgit v1.2.3 From f5705aa8cfed142d980ecac12bee0d81b756479e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 13 May 2017 16:31:05 -0700 Subject: dax, xfs, ext4: compile out iomap-dax paths in the FS_DAX=n case Tetsuo reports: fs/built-in.o: In function `xfs_file_iomap_end': xfs_iomap.c:(.text+0xe0ef9): undefined reference to `put_dax' fs/built-in.o: In function `xfs_file_iomap_begin': xfs_iomap.c:(.text+0xe1a7f): undefined reference to `dax_get_by_host' make: *** [vmlinux] Error 1 $ grep DAX .config CONFIG_DAX=m # CONFIG_DEV_DAX is not set # CONFIG_FS_DAX is not set When FS_DAX=n we can/must throw away the dax code in filesystems. Implement 'fs_' versions of dax_get_by_host() and put_dax() that are nops in the FS_DAX=n case. Cc: Cc: Cc: Jan Kara Cc: "Theodore Ts'o" Cc: "Darrick J. Wong" Cc: Ross Zwisler Tested-by: Tony Luck Fixes: ef51042472f5 ("block, dax: move 'select DAX' from BLOCK to FS_DAX") Reported-by: Tetsuo Handa Signed-off-by: Dan Williams --- fs/ext2/inode.c | 4 ++-- fs/ext4/inode.c | 4 ++-- fs/xfs/xfs_iomap.c | 4 ++-- include/linux/dax.h | 34 +++++++++++++++++++++++++++------- 4 files changed, 33 insertions(+), 13 deletions(-) (limited to 'include') diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index 26d77f9f8c12..2dcbd5698884 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -817,7 +817,7 @@ static int ext2_iomap_begin(struct inode *inode, loff_t offset, loff_t length, iomap->bdev = bdev; iomap->offset = (u64)first_block << blkbits; if (blk_queue_dax(bdev->bd_queue)) - iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); + iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); else iomap->dax_dev = NULL; @@ -841,7 +841,7 @@ static int ext2_iomap_end(struct inode *inode, loff_t offset, loff_t length, ssize_t written, unsigned flags, struct iomap *iomap) { - put_dax(iomap->dax_dev); + fs_put_dax(iomap->dax_dev); if (iomap->type == IOMAP_MAPPED && written < length && (flags & IOMAP_WRITE)) diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 5834c4d76be8..1bd0bfa547f6 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3412,7 +3412,7 @@ retry: bdev = inode->i_sb->s_bdev; iomap->bdev = bdev; if (blk_queue_dax(bdev->bd_queue)) - iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); + iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); else iomap->dax_dev = NULL; iomap->offset = first_block << blkbits; @@ -3447,7 +3447,7 @@ static int ext4_iomap_end(struct inode *inode, loff_t offset, loff_t length, int blkbits = inode->i_blkbits; bool truncate = false; - put_dax(iomap->dax_dev); + fs_put_dax(iomap->dax_dev); if (!(flags & IOMAP_WRITE) || (flags & IOMAP_FAULT)) return 0; diff --git a/fs/xfs/xfs_iomap.c b/fs/xfs/xfs_iomap.c index a63f61c256bd..94e5bdf7304c 100644 --- a/fs/xfs/xfs_iomap.c +++ b/fs/xfs/xfs_iomap.c @@ -1068,7 +1068,7 @@ xfs_file_iomap_begin( /* optionally associate a dax device with the iomap bdev */ bdev = iomap->bdev; if (blk_queue_dax(bdev->bd_queue)) - iomap->dax_dev = dax_get_by_host(bdev->bd_disk->disk_name); + iomap->dax_dev = fs_dax_get_by_host(bdev->bd_disk->disk_name); else iomap->dax_dev = NULL; @@ -1149,7 +1149,7 @@ xfs_file_iomap_end( unsigned flags, struct iomap *iomap) { - put_dax(iomap->dax_dev); + fs_put_dax(iomap->dax_dev); if ((flags & IOMAP_WRITE) && iomap->type == IOMAP_DELALLOC) return xfs_file_iomap_end_delalloc(XFS_I(inode), offset, length, written, iomap); diff --git a/include/linux/dax.h b/include/linux/dax.h index 00ebac854bb7..5ec1f6c47716 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -18,6 +18,20 @@ struct dax_operations { void **, pfn_t *); }; +#if IS_ENABLED(CONFIG_DAX) +struct dax_device *dax_get_by_host(const char *host); +void put_dax(struct dax_device *dax_dev); +#else +static inline struct dax_device *dax_get_by_host(const char *host) +{ + return NULL; +} + +static inline void put_dax(struct dax_device *dax_dev) +{ +} +#endif + int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) int __bdev_dax_supported(struct super_block *sb, int blocksize); @@ -25,23 +39,29 @@ static inline int bdev_dax_supported(struct super_block *sb, int blocksize) { return __bdev_dax_supported(sb, blocksize); } + +static inline struct dax_device *fs_dax_get_by_host(const char *host) +{ + return dax_get_by_host(host); +} + +static inline void fs_put_dax(struct dax_device *dax_dev) +{ + put_dax(dax_dev); +} + #else static inline int bdev_dax_supported(struct super_block *sb, int blocksize) { return -EOPNOTSUPP; } -#endif -#if IS_ENABLED(CONFIG_DAX) -struct dax_device *dax_get_by_host(const char *host); -void put_dax(struct dax_device *dax_dev); -#else -static inline struct dax_device *dax_get_by_host(const char *host) +static inline struct dax_device *fs_dax_get_by_host(const char *host) { return NULL; } -static inline void put_dax(struct dax_device *dax_dev) +static inline void fs_put_dax(struct dax_device *dax_dev) { } #endif -- cgit v1.2.3 From 508541146af18e43072e41a31aa62fac2b01aac1 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 25 Apr 2017 10:39:57 +0300 Subject: net/mlx5: Use underlay QPN from the root name space Root flow table is dynamically changed by the underlying flow steering layer, and IPoIB/ULPs have no idea what will be the root flow table in the future, hence we need a dynamic infrastructure to move Underlay QPs with the root flow table. Fixes: b3ba51498bdd ("net/mlx5: Refactor create flow table method to accept underlay QP") Signed-off-by: Erez Shitrit Signed-off-by: Maor Gottlieb Signed-off-by: Yishai Hadas Signed-off-by: Saeed Mahameed --- drivers/net/ethernet/mellanox/mlx5/core/en.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/en_fs.c | 5 ++--- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 9 +++----- drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h | 3 ++- drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 25 +++++++++++++++++++---- drivers/net/ethernet/mellanox/mlx5/core/fs_core.h | 2 +- drivers/net/ethernet/mellanox/mlx5/core/ipoib.c | 7 +++++-- include/linux/mlx5/fs.h | 4 +++- 8 files changed, 38 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h index 0099a3e397bc..2fd044b23875 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h @@ -1003,7 +1003,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_direct_tirs(struct mlx5e_priv *priv); void mlx5e_destroy_rqt(struct mlx5e_priv *priv, struct mlx5e_rqt *rqt); -int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn); +int mlx5e_create_ttc_table(struct mlx5e_priv *priv); void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv); int mlx5e_create_tis(struct mlx5_core_dev *mdev, int tc, diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c index 576d6787b484..53ed58320a24 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs.c @@ -800,7 +800,7 @@ void mlx5e_destroy_ttc_table(struct mlx5e_priv *priv) mlx5e_destroy_flow_table(&ttc->ft); } -int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn) +int mlx5e_create_ttc_table(struct mlx5e_priv *priv) { struct mlx5e_ttc_table *ttc = &priv->fs.ttc; struct mlx5_flow_table_attr ft_attr = {}; @@ -810,7 +810,6 @@ int mlx5e_create_ttc_table(struct mlx5e_priv *priv, u32 underlay_qpn) ft_attr.max_fte = MLX5E_TTC_TABLE_SIZE; ft_attr.level = MLX5E_TTC_FT_LEVEL; ft_attr.prio = MLX5E_NIC_PRIO; - ft_attr.underlay_qpn = underlay_qpn; ft->t = mlx5_create_flow_table(priv->fs.ns, &ft_attr); if (IS_ERR(ft->t)) { @@ -1147,7 +1146,7 @@ int mlx5e_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_ttc_table(priv, 0); + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c index 19e3d2fc2099..fcec7bedd3cd 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c @@ -40,28 +40,25 @@ #include "eswitch.h" int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft) + struct mlx5_flow_table *ft, u32 underlay_qpn) { u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {0}; u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0}; if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && - ft->underlay_qpn == 0) + underlay_qpn == 0) return 0; MLX5_SET(set_flow_table_root_in, in, opcode, MLX5_CMD_OP_SET_FLOW_TABLE_ROOT); MLX5_SET(set_flow_table_root_in, in, table_type, ft->type); MLX5_SET(set_flow_table_root_in, in, table_id, ft->id); + MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn); if (ft->vport) { MLX5_SET(set_flow_table_root_in, in, vport_number, ft->vport); MLX5_SET(set_flow_table_root_in, in, other_vport, 1); } - if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) && - ft->underlay_qpn != 0) - MLX5_SET(set_flow_table_root_in, in, underlay_qpn, ft->underlay_qpn); - return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out)); } diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h index 8fad80688536..0f98a7cf4877 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.h @@ -71,7 +71,8 @@ int mlx5_cmd_delete_fte(struct mlx5_core_dev *dev, unsigned int index); int mlx5_cmd_update_root_ft(struct mlx5_core_dev *dev, - struct mlx5_flow_table *ft); + struct mlx5_flow_table *ft, + u32 underlay_qpn); int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u16 *id); int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u16 id); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c index b8a176503d38..0e487e8ca634 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c @@ -650,7 +650,7 @@ static int update_root_ft_create(struct mlx5_flow_table *ft, struct fs_prio if (ft->level >= min_level) return 0; - err = mlx5_cmd_update_root_ft(root->dev, ft); + err = mlx5_cmd_update_root_ft(root->dev, ft, root->underlay_qpn); if (err) mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", ft->id); @@ -818,8 +818,6 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa goto unlock_root; } - ft->underlay_qpn = ft_attr->underlay_qpn; - tree_init_node(&ft->node, 1, del_flow_table); log_table_sz = ft->max_fte ? ilog2(ft->max_fte) : 0; next_ft = find_next_chained_ft(fs_prio); @@ -1489,7 +1487,8 @@ static int update_root_ft_destroy(struct mlx5_flow_table *ft) new_root_ft = find_next_ft(ft); if (new_root_ft) { - int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft); + int err = mlx5_cmd_update_root_ft(root->dev, new_root_ft, + root->underlay_qpn); if (err) { mlx5_core_warn(root->dev, "Update root flow table of id=%u failed\n", @@ -2062,3 +2061,21 @@ err: mlx5_cleanup_fs(dev); return err; } + +int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) +{ + struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + + root->underlay_qpn = underlay_qpn; + return 0; +} +EXPORT_SYMBOL(mlx5_fs_add_rx_underlay_qpn); + +int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn) +{ + struct mlx5_flow_root_namespace *root = dev->priv.steering->root_ns; + + root->underlay_qpn = 0; + return 0; +} +EXPORT_SYMBOL(mlx5_fs_remove_rx_underlay_qpn); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h index 81eafc7b9dd9..990acee6fb09 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h +++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h @@ -118,7 +118,6 @@ struct mlx5_flow_table { /* FWD rules that point on this flow table */ struct list_head fwd_rules; u32 flags; - u32 underlay_qpn; }; struct mlx5_fc_cache { @@ -195,6 +194,7 @@ struct mlx5_flow_root_namespace { struct mlx5_flow_table *root_ft; /* Should be held when chaining flow tables */ struct mutex chain_lock; + u32 underlay_qpn; }; int mlx5_init_fc_stats(struct mlx5_core_dev *dev); diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c index 56bff3540954..cc1858752e70 100644 --- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c +++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib.c @@ -160,6 +160,8 @@ out: static void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp) { + mlx5_fs_remove_rx_underlay_qpn(mdev, qp->qpn); + mlx5_core_destroy_qp(mdev, qp); } @@ -174,6 +176,8 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv) return err; } + mlx5_fs_add_rx_underlay_qpn(priv->mdev, ipriv->qp.qpn); + err = mlx5e_create_tis(priv->mdev, 0 /* tc */, ipriv->qp.qpn, &priv->tisn[0]); if (err) { mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err); @@ -193,7 +197,6 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv) static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) { - struct mlx5i_priv *ipriv = priv->ppriv; int err; priv->fs.ns = mlx5_get_flow_namespace(priv->mdev, @@ -209,7 +212,7 @@ static int mlx5i_create_flow_steering(struct mlx5e_priv *priv) priv->netdev->hw_features &= ~NETIF_F_NTUPLE; } - err = mlx5e_create_ttc_table(priv, ipriv->qp.qpn); + err = mlx5e_create_ttc_table(priv); if (err) { netdev_err(priv->netdev, "Failed to create ttc table, err=%d\n", err); diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 1b166d2e19c5..b25e7baa273e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -109,7 +109,6 @@ struct mlx5_flow_table_attr { int max_fte; u32 level; u32 flags; - u32 underlay_qpn; }; struct mlx5_flow_table * @@ -167,4 +166,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); +int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); +int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); + #endif -- cgit v1.2.3 From 93491ced3c87c94b12220dbac0527e1356702179 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Wed, 10 May 2017 18:18:29 +0200 Subject: USB: hub: fix SS max number of ports Add define for the maximum number of ports on a SuperSpeed hub as per USB 3.1 spec Table 10-5, and use it when verifying the retrieved hub descriptor. This specifically avoids benign attempts to update the DeviceRemovable mask for non-existing ports (should we get that far). Fixes: dbe79bbe9dcb ("USB 3.0 Hub Changes") Acked-by: Alan Stern Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- drivers/usb/core/hub.c | 8 +++++++- include/uapi/linux/usb/ch11.h | 3 +++ 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/drivers/usb/core/hub.c b/drivers/usb/core/hub.c index f77a4ebde7d5..b8bb20d7acdb 100644 --- a/drivers/usb/core/hub.c +++ b/drivers/usb/core/hub.c @@ -1336,7 +1336,13 @@ static int hub_configure(struct usb_hub *hub, if (ret < 0) { message = "can't read hub descriptor"; goto fail; - } else if (hub->descriptor->bNbrPorts > USB_MAXCHILDREN) { + } + + maxchild = USB_MAXCHILDREN; + if (hub_is_superspeed(hdev)) + maxchild = min_t(unsigned, maxchild, USB_SS_MAXPORTS); + + if (hub->descriptor->bNbrPorts > maxchild) { message = "hub has too many ports!"; ret = -ENODEV; goto fail; diff --git a/include/uapi/linux/usb/ch11.h b/include/uapi/linux/usb/ch11.h index 361297e96f58..576c704e3fb8 100644 --- a/include/uapi/linux/usb/ch11.h +++ b/include/uapi/linux/usb/ch11.h @@ -22,6 +22,9 @@ */ #define USB_MAXCHILDREN 31 +/* See USB 3.1 spec Table 10-5 */ +#define USB_SS_MAXPORTS 15 + /* * Hub request types */ -- cgit v1.2.3 From 30e7d894c1478c88d50ce94ddcdbd7f9763d9cdd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 May 2017 10:19:49 +0200 Subject: tracing/kprobes: Enforce kprobes teardown after testing Enabling the tracer selftest triggers occasionally the warning in text_poke(), which warns when the to be modified page is not marked reserved. The reason is that the tracer selftest installs kprobes on functions marked __init for testing. These probes are removed after the tests, but that removal schedules the delayed kprobes_optimizer work, which will do the actual text poke. If the work is executed after the init text is freed, then the warning triggers. The bug can be reproduced reliably when the work delay is increased. Flush the optimizer work and wait for the optimizing/unoptimizing lists to become empty before returning from the kprobes tracer selftest. That ensures that all operations which were queued due to the probes removal have completed. Link: http://lkml.kernel.org/r/20170516094802.76a468bb@gandalf.local.home Signed-off-by: Thomas Gleixner Acked-by: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 6274de498 ("kprobes: Support delayed unoptimizing") Signed-off-by: Steven Rostedt (VMware) --- include/linux/kprobes.h | 3 +++ kernel/kprobes.c | 2 +- kernel/trace/trace_kprobe.c | 5 +++++ 3 files changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 30f90c1a0aaf..541df0b5b815 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -349,6 +349,9 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif +extern void wait_for_kprobe_optimizer(void); +#else +static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 7367e0ec6f81..199243bba554 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -595,7 +595,7 @@ static void kprobe_optimizer(struct work_struct *work) } /* Wait for completing optimization and unoptimization */ -static void wait_for_kprobe_optimizer(void) +void wait_for_kprobe_optimizer(void) { mutex_lock(&kprobe_mutex); diff --git a/kernel/trace/trace_kprobe.c b/kernel/trace/trace_kprobe.c index 8485f6738a87..c129fca6ec99 100644 --- a/kernel/trace/trace_kprobe.c +++ b/kernel/trace/trace_kprobe.c @@ -1535,6 +1535,11 @@ static __init int kprobe_trace_self_tests_init(void) end: release_all_trace_kprobes(); + /* + * Wait for the optimizer work to finish. Otherwise it might fiddle + * with probes in already freed __init text. + */ + wait_for_kprobe_optimizer(); if (warn) pr_cont("NG: Some tests are failed. Please check them.\n"); else -- cgit v1.2.3 From 552c9f47f8d451830a6b47151c6d2db77f77cc3e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Wed, 17 May 2017 13:12:51 +0200 Subject: KVM: arm/arm64: Fix bug when registering redist iodevs If userspace creates the VCPUs after initializing the VGIC, then we end up in a situation where we trigger a bug in kvm_vcpu_get_idx(), because it is called prior to adding the VCPU into the vcpus array on the VM. There is no tight coupling between the VCPU index and the area of the redistributor region used for the VCPU, so we can simply ensure that all creations of redistributors are serialized per VM, and increment an offset when we successfully add a redistributor. The vgic_register_redist_iodev() function can be called from two paths: vgic_redister_all_redist_iodev() which is called via the kvm_vgic_addr() device attribute handler. This patch already holds the kvm->lock mutex. The other path is via kvm_vgic_vcpu_init, which is called through a longer chain from kvm_vm_ioctl_create_vcpu(), which releases the kvm->lock mutex just before calling kvm_arch_vcpu_create(), so we can simply take this mutex again later for our purposes. Fixes: ab6f468c10 ("KVM: arm/arm64: Register iodevs when setting redist base and creating VCPUs") Signed-off-by: Christoffer Dall Tested-by: Jean-Philippe Brucker Reviewed-by: Eric Auger --- include/kvm/arm_vgic.h | 5 ++++- virt/kvm/arm/vgic/vgic-init.c | 5 ++++- virt/kvm/arm/vgic/vgic-mmio-v3.c | 9 ++++++--- 3 files changed, 14 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/kvm/arm_vgic.h b/include/kvm/arm_vgic.h index 97b8d3728b31..ef718586321c 100644 --- a/include/kvm/arm_vgic.h +++ b/include/kvm/arm_vgic.h @@ -195,7 +195,10 @@ struct vgic_dist { /* either a GICv2 CPU interface */ gpa_t vgic_cpu_base; /* or a number of GICv3 redistributor regions */ - gpa_t vgic_redist_base; + struct { + gpa_t vgic_redist_base; + gpa_t vgic_redist_free_offset; + }; }; /* distributor enabled */ diff --git a/virt/kvm/arm/vgic/vgic-init.c b/virt/kvm/arm/vgic/vgic-init.c index dc68e2e424ab..3a0b8999f011 100644 --- a/virt/kvm/arm/vgic/vgic-init.c +++ b/virt/kvm/arm/vgic/vgic-init.c @@ -242,8 +242,11 @@ int kvm_vgic_vcpu_init(struct kvm_vcpu *vcpu) * If we are creating a VCPU with a GICv3 we must also register the * KVM io device for the redistributor that belongs to this VCPU. */ - if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) + if (dist->vgic_model == KVM_DEV_TYPE_ARM_VGIC_V3) { + mutex_lock(&vcpu->kvm->lock); ret = vgic_register_redist_iodev(vcpu); + mutex_unlock(&vcpu->kvm->lock); + } return ret; } diff --git a/virt/kvm/arm/vgic/vgic-mmio-v3.c b/virt/kvm/arm/vgic/vgic-mmio-v3.c index 99da1a207c19..9b0f6810e7a8 100644 --- a/virt/kvm/arm/vgic/vgic-mmio-v3.c +++ b/virt/kvm/arm/vgic/vgic-mmio-v3.c @@ -586,7 +586,7 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) if (!vgic_v3_check_base(kvm)) return -EINVAL; - rd_base = vgic->vgic_redist_base + kvm_vcpu_get_idx(vcpu) * SZ_64K * 2; + rd_base = vgic->vgic_redist_base + vgic->vgic_redist_free_offset; sgi_base = rd_base + SZ_64K; kvm_iodevice_init(&rd_dev->dev, &kvm_io_gic_ops); @@ -615,11 +615,14 @@ int vgic_register_redist_iodev(struct kvm_vcpu *vcpu) ret = kvm_io_bus_register_dev(kvm, KVM_MMIO_BUS, sgi_base, SZ_64K, &sgi_dev->dev); mutex_unlock(&kvm->slots_lock); - if (ret) + if (ret) { kvm_io_bus_unregister_dev(kvm, KVM_MMIO_BUS, &rd_dev->dev); + return ret; + } - return ret; + vgic->vgic_redist_free_offset += 2 * SZ_64K; + return 0; } static void vgic_unregister_redist_iodev(struct kvm_vcpu *vcpu) -- cgit v1.2.3 From 6a29beef9d1b16c762e469d77e28c3de3f5c3dbb Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 17 May 2017 18:32:02 +0300 Subject: usb: host: xhci-ring: don't need to clear interrupt pending for MSI enabled hcd According to xHCI spec Figure 30: Interrupt Throttle Flow Diagram If PCI Message Signaled Interrupts (MSI or MSI-X) are enabled, then the assertion of the Interrupt Pending (IP) flag in Figure 30 generates a PCI Dword write. The IP flag is automatically cleared by the completion of the PCI write. the MSI enabled HCs don't need to clear interrupt pending bit, but hcd->irq = 0 doesn't equal to MSI enabled HCD. At some Dual-role controller software designs, it sets hcd->irq as 0 to avoid HCD requesting interrupt, and they want to decide when to call usb_hcd_irq by software. Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- drivers/usb/host/xhci-ring.c | 5 +---- drivers/usb/host/xhci.c | 5 +++-- include/linux/usb/hcd.h | 1 + 3 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/drivers/usb/host/xhci-ring.c b/drivers/usb/host/xhci-ring.c index 507ba7734b94..0830b25f9499 100644 --- a/drivers/usb/host/xhci-ring.c +++ b/drivers/usb/host/xhci-ring.c @@ -2707,12 +2707,9 @@ irqreturn_t xhci_irq(struct usb_hcd *hcd) */ status |= STS_EINT; writel(status, &xhci->op_regs->status); - /* FIXME when MSI-X is supported and there are multiple vectors */ - /* Clear the MSI-X event interrupt status */ - if (hcd->irq) { + if (!hcd->msi_enabled) { u32 irq_pending; - /* Acknowledge the PCI interrupt */ irq_pending = readl(&xhci->ir_set->irq_pending); irq_pending |= IMAN_IP; writel(irq_pending, &xhci->ir_set->irq_pending); diff --git a/drivers/usb/host/xhci.c b/drivers/usb/host/xhci.c index 2d1310220832..71eb2991c698 100644 --- a/drivers/usb/host/xhci.c +++ b/drivers/usb/host/xhci.c @@ -359,9 +359,10 @@ static int xhci_try_enable_msi(struct usb_hcd *hcd) /* fall back to msi*/ ret = xhci_setup_msi(xhci); - if (!ret) - /* hcd->irq is 0, we have MSI */ + if (!ret) { + hcd->msi_enabled = 1; return 0; + } if (!pdev->irq) { xhci_err(xhci, "No msi-x/msi found and no IRQ in BIOS\n"); diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index a469999a106d..50398b69ca44 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -148,6 +148,7 @@ struct usb_hcd { unsigned rh_registered:1;/* is root hub registered? */ unsigned rh_pollable:1; /* may we poll the root hub? */ unsigned msix_enabled:1; /* driver has MSI-X enabled? */ + unsigned msi_enabled:1; /* driver has MSI enabled? */ unsigned remove_phy:1; /* auto-remove USB phy */ /* The next flag is a stopgap, to be removed when all the HCDs -- cgit v1.2.3 From 64df6d525fcff1630098db9238bfd2b3e092d5c1 Mon Sep 17 00:00:00 2001 From: linzhang Date: Wed, 17 May 2017 12:05:07 +0800 Subject: net: x25: fix one potential use-after-free issue The function x25_init is not properly unregister related resources on error handler.It is will result in kernel oops if x25_init init failed, so add properly unregister call on error handler. Also, i adjust the coding style and make x25_register_sysctl properly return failure. Signed-off-by: linzhang Signed-off-by: David S. Miller --- include/net/x25.h | 4 ++-- net/x25/af_x25.c | 24 ++++++++++++++++-------- net/x25/sysctl_net_x25.c | 5 ++++- 3 files changed, 22 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/net/x25.h b/include/net/x25.h index c383aa4edbf0..6d30a01d281d 100644 --- a/include/net/x25.h +++ b/include/net/x25.h @@ -298,10 +298,10 @@ void x25_check_rbuf(struct sock *); /* sysctl_net_x25.c */ #ifdef CONFIG_SYSCTL -void x25_register_sysctl(void); +int x25_register_sysctl(void); void x25_unregister_sysctl(void); #else -static inline void x25_register_sysctl(void) {}; +static inline int x25_register_sysctl(void) { return 0; }; static inline void x25_unregister_sysctl(void) {}; #endif /* CONFIG_SYSCTL */ diff --git a/net/x25/af_x25.c b/net/x25/af_x25.c index 8b911c29860e..5a1a98df3499 100644 --- a/net/x25/af_x25.c +++ b/net/x25/af_x25.c @@ -1791,32 +1791,40 @@ void x25_kill_by_neigh(struct x25_neigh *nb) static int __init x25_init(void) { - int rc = proto_register(&x25_proto, 0); + int rc; - if (rc != 0) + rc = proto_register(&x25_proto, 0); + if (rc) goto out; rc = sock_register(&x25_family_ops); - if (rc != 0) + if (rc) goto out_proto; dev_add_pack(&x25_packet_type); rc = register_netdevice_notifier(&x25_dev_notifier); - if (rc != 0) + if (rc) goto out_sock; - pr_info("Linux Version 0.2\n"); + rc = x25_register_sysctl(); + if (rc) + goto out_dev; - x25_register_sysctl(); rc = x25_proc_init(); - if (rc != 0) - goto out_dev; + if (rc) + goto out_sysctl; + + pr_info("Linux Version 0.2\n"); + out: return rc; +out_sysctl: + x25_unregister_sysctl(); out_dev: unregister_netdevice_notifier(&x25_dev_notifier); out_sock: + dev_remove_pack(&x25_packet_type); sock_unregister(AF_X25); out_proto: proto_unregister(&x25_proto); diff --git a/net/x25/sysctl_net_x25.c b/net/x25/sysctl_net_x25.c index a06dfe143c67..ba078c85f0a1 100644 --- a/net/x25/sysctl_net_x25.c +++ b/net/x25/sysctl_net_x25.c @@ -73,9 +73,12 @@ static struct ctl_table x25_table[] = { { }, }; -void __init x25_register_sysctl(void) +int __init x25_register_sysctl(void) { x25_table_header = register_net_sysctl(&init_net, "net/x25", x25_table); + if (!x25_table_header) + return -ENOMEM; + return 0; } void x25_unregister_sysctl(void) -- cgit v1.2.3 From 4123109050a869a8871e58a50f28f383d41e49ad Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 5 May 2017 16:13:02 -0700 Subject: nvme-fc: correct port role bits FC Port roles is a bit mask, not individual values. Correct nvme definitions to unique bits. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme-fc-driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 0db37158a61d..12e344b5b77f 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -27,8 +27,8 @@ /* FC Port role bitmask - can merge with FC Port Roles in fc transport */ #define FC_PORT_ROLE_NVME_INITIATOR 0x10 -#define FC_PORT_ROLE_NVME_TARGET 0x11 -#define FC_PORT_ROLE_NVME_DISCOVERY 0x12 +#define FC_PORT_ROLE_NVME_TARGET 0x20 +#define FC_PORT_ROLE_NVME_DISCOVERY 0x40 /** -- cgit v1.2.3 From 4b8ba5fa525bc8bdaaed2a5c5433f0f2008d7bc5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 25 Apr 2017 16:23:09 -0700 Subject: nvmet-fc: remove target cpu scheduling flag Remove NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED. It's unnecessary. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- drivers/nvme/target/fc.c | 4 +--- drivers/nvme/target/fcloop.c | 1 - drivers/scsi/lpfc/lpfc_nvmet.c | 1 - include/linux/nvme-fc-driver.h | 12 ++---------- 4 files changed, 3 insertions(+), 15 deletions(-) (limited to 'include') diff --git a/drivers/nvme/target/fc.c b/drivers/nvme/target/fc.c index 62eba29c85fb..2006fae61980 100644 --- a/drivers/nvme/target/fc.c +++ b/drivers/nvme/target/fc.c @@ -517,9 +517,7 @@ nvmet_fc_queue_to_cpu(struct nvmet_fc_tgtport *tgtport, int qid) { int cpu, idx, cnt; - if (!(tgtport->ops->target_features & - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED) || - tgtport->ops->max_hw_queues == 1) + if (tgtport->ops->max_hw_queues == 1) return WORK_CPU_UNBOUND; /* Simple cpu selection based on qid modulo active cpu count */ diff --git a/drivers/nvme/target/fcloop.c b/drivers/nvme/target/fcloop.c index 15551ef79c8c..294a6611fb24 100644 --- a/drivers/nvme/target/fcloop.c +++ b/drivers/nvme/target/fcloop.c @@ -698,7 +698,6 @@ static struct nvmet_fc_target_template tgttemplate = { .dma_boundary = FCLOOP_DMABOUND_4G, /* optional features */ .target_features = NVMET_FCTGTFEAT_CMD_IN_ISR | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | NVMET_FCTGTFEAT_OPDONE_IN_ISR, /* sizes of additional private data for data structures */ .target_priv_sz = sizeof(struct fcloop_tport), diff --git a/drivers/scsi/lpfc/lpfc_nvmet.c b/drivers/scsi/lpfc/lpfc_nvmet.c index 94434e621c33..0488580eea12 100644 --- a/drivers/scsi/lpfc/lpfc_nvmet.c +++ b/drivers/scsi/lpfc/lpfc_nvmet.c @@ -764,7 +764,6 @@ lpfc_nvmet_create_targetport(struct lpfc_hba *phba) lpfc_tgttemplate.max_sgl_segments = phba->cfg_nvme_seg_cnt + 1; lpfc_tgttemplate.max_hw_queues = phba->cfg_nvme_io_channel; lpfc_tgttemplate.target_features = NVMET_FCTGTFEAT_READDATA_RSP | - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED | NVMET_FCTGTFEAT_CMD_IN_ISR | NVMET_FCTGTFEAT_OPDONE_IN_ISR; diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 12e344b5b77f..6c8c5d8041b7 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -642,15 +642,7 @@ enum { * sequence in one LLDD operation. Errors during Data * sequence transmit must not allow RSP sequence to be sent. */ - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1), - /* Bit 1: When 0, the LLDD will deliver FCP CMD - * on the CPU it should be affinitized to. Thus work will - * be scheduled on the cpu received on. When 1, the LLDD - * may not deliver the CMD on the CPU it should be worked - * on. The transport should pick a cpu to schedule the work - * on. - */ - NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 2), + NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1), /* Bit 2: When 0, the LLDD is calling the cmd rcv handler * in a non-isr context, allowing the transport to finish * op completion in the calling context. When 1, the LLDD @@ -658,7 +650,7 @@ enum { * requiring the transport to transition to a workqueue * for op completion. */ - NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 3), + NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2), /* Bit 3: When 0, the LLDD is calling the op done handler * in a non-isr context, allowing the transport to finish * op completion in the calling context. When 1, the LLDD -- cgit v1.2.3