From 72b603ee8cfc6be587f301568d79ce38e7ed735d Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 25 Aug 2014 12:27:02 -0700 Subject: bpf: x86: add missing 'shift by register' instructions to x64 eBPF JIT 'shift by register' operations are supported by eBPF interpreter, but were accidently left out of x64 JIT compiler. Fix it and add a testcase. Reported-by: Brendan Gregg Signed-off-by: Alexei Starovoitov Fixes: 622582786c9e ("net: filter: x86: internal BPF JIT") Signed-off-by: David S. Miller --- arch/x86/net/bpf_jit_comp.c | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'arch') diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 5c8cb8043c5a..b08a98c59530 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -515,6 +515,48 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT3(0xC1, add_1reg(b3, dst_reg), imm32); break; + case BPF_ALU | BPF_LSH | BPF_X: + case BPF_ALU | BPF_RSH | BPF_X: + case BPF_ALU | BPF_ARSH | BPF_X: + case BPF_ALU64 | BPF_LSH | BPF_X: + case BPF_ALU64 | BPF_RSH | BPF_X: + case BPF_ALU64 | BPF_ARSH | BPF_X: + + /* check for bad case when dst_reg == rcx */ + if (dst_reg == BPF_REG_4) { + /* mov r11, dst_reg */ + EMIT_mov(AUX_REG, dst_reg); + dst_reg = AUX_REG; + } + + if (src_reg != BPF_REG_4) { /* common case */ + EMIT1(0x51); /* push rcx */ + + /* mov rcx, src_reg */ + EMIT_mov(BPF_REG_4, src_reg); + } + + /* shl %rax, %cl | shr %rax, %cl | sar %rax, %cl */ + if (BPF_CLASS(insn->code) == BPF_ALU64) + EMIT1(add_1mod(0x48, dst_reg)); + else if (is_ereg(dst_reg)) + EMIT1(add_1mod(0x40, dst_reg)); + + switch (BPF_OP(insn->code)) { + case BPF_LSH: b3 = 0xE0; break; + case BPF_RSH: b3 = 0xE8; break; + case BPF_ARSH: b3 = 0xF8; break; + } + EMIT2(0xD3, add_1reg(b3, dst_reg)); + + if (src_reg != BPF_REG_4) + EMIT1(0x59); /* pop rcx */ + + if (insn->dst_reg == BPF_REG_4) + /* mov dst_reg, r11 */ + EMIT_mov(insn->dst_reg, AUX_REG); + break; + case BPF_ALU | BPF_END | BPF_FROM_BE: switch (imm32) { case 16: -- cgit v1.2.3 From 60a3b2253c413cf601783b070507d7dd6620c954 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 2 Sep 2014 22:53:44 +0200 Subject: net: bpf: make eBPF interpreter images read-only With eBPF getting more extended and exposure to user space is on it's way, hardening the memory range the interpreter uses to steer its command flow seems appropriate. This patch moves the to be interpreted bytecode to read-only pages. In case we execute a corrupted BPF interpreter image for some reason e.g. caused by an attacker which got past a verifier stage, it would not only provide arbitrary read/write memory access but arbitrary function calls as well. After setting up the BPF interpreter image, its contents do not change until destruction time, thus we can setup the image on immutable made pages in order to mitigate modifications to that code. The idea is derived from commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks"). This is possible because bpf_prog is not part of sk_filter anymore. After setup bpf_prog cannot be altered during its life-time. This prevents any modifications to the entire bpf_prog structure (incl. function/JIT image pointer). Every eBPF program (including classic BPF that are migrated) have to call bpf_prog_select_runtime() to select either interpreter or a JIT image as a last setup step, and they all are being freed via bpf_prog_free(), including non-JIT. Therefore, we can easily integrate this into the eBPF life-time, plus since we directly allocate a bpf_prog, we have no performance penalty. Tested with seccomp and test_bpf testsuite in JIT/non-JIT mode and manual inspection of kernel_page_tables. Brad Spengler proposed the same idea via Twitter during development of this patch. Joint work with Hannes Frederic Sowa. Suggested-by: Brad Spengler Signed-off-by: Daniel Borkmann Signed-off-by: Hannes Frederic Sowa Cc: Alexei Starovoitov Cc: Kees Cook Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 3 +- arch/mips/net/bpf_jit.c | 3 +- arch/powerpc/net/bpf_jit_comp.c | 3 +- arch/s390/net/bpf_jit_comp.c | 2 +- arch/sparc/net/bpf_jit_comp.c | 3 +- arch/x86/net/bpf_jit_comp.c | 18 ++++------ include/linux/filter.h | 49 ++++++++++++++++++++++--- kernel/bpf/core.c | 80 +++++++++++++++++++++++++++++++++++++++-- kernel/seccomp.c | 7 ++-- lib/test_bpf.c | 2 +- net/core/filter.c | 6 ++-- 11 files changed, 144 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a37b989a2f91..a76623bcf722 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -930,5 +930,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 05a56619ece2..cfa83cf2447d 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1427,5 +1427,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 3afa6f4c1957..40c53ff59124 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -697,5 +697,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 61e45b7c04d7..f2833c5b218a 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -887,5 +887,5 @@ void bpf_jit_free(struct bpf_prog *fp) module_free(NULL, header); free_filter: - kfree(fp); + bpf_prog_unlock_free(fp); } diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index 1f76c22a6a75..f7a736b645e8 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -812,5 +812,6 @@ void bpf_jit_free(struct bpf_prog *fp) { if (fp->jited) module_free(NULL, fp->bpf_func); - kfree(fp); + + bpf_prog_unlock_free(fp); } diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index b08a98c59530..39ccfbb4a723 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -972,23 +972,17 @@ out: kfree(addrs); } -static void bpf_jit_free_deferred(struct work_struct *work) +void bpf_jit_free(struct bpf_prog *fp) { - struct bpf_prog *fp = container_of(work, struct bpf_prog, work); unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; struct bpf_binary_header *header = (void *)addr; + if (!fp->jited) + goto free_filter; + set_memory_rw(addr, header->pages); module_free(NULL, header); - kfree(fp); -} -void bpf_jit_free(struct bpf_prog *fp) -{ - if (fp->jited) { - INIT_WORK(&fp->work, bpf_jit_free_deferred); - schedule_work(&fp->work); - } else { - kfree(fp); - } +free_filter: + bpf_prog_unlock_free(fp); } diff --git a/include/linux/filter.h b/include/linux/filter.h index a5227ab8ccb1..c78994593355 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -9,6 +9,11 @@ #include #include #include +#include + +struct sk_buff; +struct sock; +struct seccomp_data; /* Internally used and optimized filter representation with extended * instruction set based on top of classic BPF. @@ -320,20 +325,23 @@ struct sock_fprog_kern { struct sock_filter *filter; }; -struct sk_buff; -struct sock; -struct seccomp_data; +struct bpf_work_struct { + struct bpf_prog *prog; + struct work_struct work; +}; struct bpf_prog { + u32 pages; /* Number of allocated pages */ u32 jited:1, /* Is our filter JIT'ed? */ len:31; /* Number of filter blocks */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ + struct bpf_work_struct *work; /* Deferred free work struct */ unsigned int (*bpf_func)(const struct sk_buff *skb, const struct bpf_insn *filter); + /* Instructions for interpreter */ union { struct sock_filter insns[0]; struct bpf_insn insnsi[0]; - struct work_struct work; }; }; @@ -353,6 +361,26 @@ static inline unsigned int bpf_prog_size(unsigned int proglen) #define bpf_classic_proglen(fprog) (fprog->len * sizeof(fprog->filter[0])) +#ifdef CONFIG_DEBUG_SET_MODULE_RONX +static inline void bpf_prog_lock_ro(struct bpf_prog *fp) +{ + set_memory_ro((unsigned long)fp, fp->pages); +} + +static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) +{ + set_memory_rw((unsigned long)fp, fp->pages); +} +#else +static inline void bpf_prog_lock_ro(struct bpf_prog *fp) +{ +} + +static inline void bpf_prog_unlock_ro(struct bpf_prog *fp) +{ +} +#endif /* CONFIG_DEBUG_SET_MODULE_RONX */ + int sk_filter(struct sock *sk, struct sk_buff *skb); void bpf_prog_select_runtime(struct bpf_prog *fp); @@ -361,6 +389,17 @@ void bpf_prog_free(struct bpf_prog *fp); int bpf_convert_filter(struct sock_filter *prog, int len, struct bpf_insn *new_prog, int *new_len); +struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags); +struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, + gfp_t gfp_extra_flags); +void __bpf_prog_free(struct bpf_prog *fp); + +static inline void bpf_prog_unlock_free(struct bpf_prog *fp) +{ + bpf_prog_unlock_ro(fp); + __bpf_prog_free(fp); +} + int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); void bpf_prog_destroy(struct bpf_prog *fp); @@ -450,7 +489,7 @@ static inline void bpf_jit_compile(struct bpf_prog *fp) static inline void bpf_jit_free(struct bpf_prog *fp) { - kfree(fp); + bpf_prog_unlock_free(fp); } #endif /* CONFIG_BPF_JIT */ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 7f0dbcbb34af..b54bb2c2e494 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -22,6 +22,7 @@ */ #include #include +#include #include /* Registers */ @@ -63,6 +64,67 @@ void *bpf_internal_load_pointer_neg_helper(const struct sk_buff *skb, int k, uns return NULL; } +struct bpf_prog *bpf_prog_alloc(unsigned int size, gfp_t gfp_extra_flags) +{ + gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | + gfp_extra_flags; + struct bpf_work_struct *ws; + struct bpf_prog *fp; + + size = round_up(size, PAGE_SIZE); + fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); + if (fp == NULL) + return NULL; + + ws = kmalloc(sizeof(*ws), GFP_KERNEL | gfp_extra_flags); + if (ws == NULL) { + vfree(fp); + return NULL; + } + + fp->pages = size / PAGE_SIZE; + fp->work = ws; + + return fp; +} +EXPORT_SYMBOL_GPL(bpf_prog_alloc); + +struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, + gfp_t gfp_extra_flags) +{ + gfp_t gfp_flags = GFP_KERNEL | __GFP_HIGHMEM | __GFP_ZERO | + gfp_extra_flags; + struct bpf_prog *fp; + + BUG_ON(fp_old == NULL); + + size = round_up(size, PAGE_SIZE); + if (size <= fp_old->pages * PAGE_SIZE) + return fp_old; + + fp = __vmalloc(size, gfp_flags, PAGE_KERNEL); + if (fp != NULL) { + memcpy(fp, fp_old, fp_old->pages * PAGE_SIZE); + fp->pages = size / PAGE_SIZE; + + /* We keep fp->work from fp_old around in the new + * reallocated structure. + */ + fp_old->work = NULL; + __bpf_prog_free(fp_old); + } + + return fp; +} +EXPORT_SYMBOL_GPL(bpf_prog_realloc); + +void __bpf_prog_free(struct bpf_prog *fp) +{ + kfree(fp->work); + vfree(fp); +} +EXPORT_SYMBOL_GPL(__bpf_prog_free); + /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs * anyway later on, so do not let the compiler omit it. @@ -523,12 +585,26 @@ void bpf_prog_select_runtime(struct bpf_prog *fp) /* Probe if internal BPF can be JITed */ bpf_int_jit_compile(fp); + /* Lock whole bpf_prog as read-only */ + bpf_prog_lock_ro(fp); } EXPORT_SYMBOL_GPL(bpf_prog_select_runtime); -/* free internal BPF program */ +static void bpf_prog_free_deferred(struct work_struct *work) +{ + struct bpf_work_struct *ws; + + ws = container_of(work, struct bpf_work_struct, work); + bpf_jit_free(ws->prog); +} + +/* Free internal BPF program */ void bpf_prog_free(struct bpf_prog *fp) { - bpf_jit_free(fp); + struct bpf_work_struct *ws = fp->work; + + INIT_WORK(&ws->work, bpf_prog_free_deferred); + ws->prog = fp; + schedule_work(&ws->work); } EXPORT_SYMBOL_GPL(bpf_prog_free); diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 44eb005c6695..84922befea84 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -395,16 +395,15 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) if (!filter) goto free_prog; - filter->prog = kzalloc(bpf_prog_size(new_len), - GFP_KERNEL|__GFP_NOWARN); + filter->prog = bpf_prog_alloc(bpf_prog_size(new_len), __GFP_NOWARN); if (!filter->prog) goto free_filter; ret = bpf_convert_filter(fp, fprog->len, filter->prog->insnsi, &new_len); if (ret) goto free_filter_prog; - kfree(fp); + kfree(fp); atomic_set(&filter->usage, 1); filter->prog->len = new_len; @@ -413,7 +412,7 @@ static struct seccomp_filter *seccomp_prepare_filter(struct sock_fprog *fprog) return filter; free_filter_prog: - kfree(filter->prog); + __bpf_prog_free(filter->prog); free_filter: kfree(filter); free_prog: diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 8c66c6aace04..9a67456ba29a 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1836,7 +1836,7 @@ static struct bpf_prog *generate_filter(int which, int *err) break; case INTERNAL: - fp = kzalloc(bpf_prog_size(flen), GFP_KERNEL); + fp = bpf_prog_alloc(bpf_prog_size(flen), 0); if (fp == NULL) { pr_cont("UNEXPECTED_FAIL no memory left\n"); *err = -ENOMEM; diff --git a/net/core/filter.c b/net/core/filter.c index d814b8a89d0f..37f8eb06fdee 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -933,7 +933,7 @@ static struct bpf_prog *bpf_migrate_filter(struct bpf_prog *fp) /* Expand fp for appending the new filter representation. */ old_fp = fp; - fp = krealloc(old_fp, bpf_prog_size(new_len), GFP_KERNEL); + fp = bpf_prog_realloc(old_fp, bpf_prog_size(new_len), 0); if (!fp) { /* The old_fp is still around in case we couldn't * allocate new memory, so uncharge on that one. @@ -1013,7 +1013,7 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog) if (fprog->filter == NULL) return -EINVAL; - fp = kmalloc(bpf_prog_size(fprog->len), GFP_KERNEL); + fp = bpf_prog_alloc(bpf_prog_size(fprog->len), 0); if (!fp) return -ENOMEM; @@ -1069,7 +1069,7 @@ int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk) if (fprog->filter == NULL) return -EINVAL; - prog = kmalloc(bpf_fsize, GFP_KERNEL); + prog = bpf_prog_alloc(bpf_fsize, 0); if (!prog) return -ENOMEM; -- cgit v1.2.3 From 02ab695bb37ee9ad515df0d0790d5977505dd04a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 4 Sep 2014 22:17:17 -0700 Subject: net: filter: add "load 64-bit immediate" eBPF instruction add BPF_LD_IMM64 instruction to load 64-bit immediate value into a register. All previous instructions were 8-byte. This is first 16-byte instruction. Two consecutive 'struct bpf_insn' blocks are interpreted as single instruction: insn[0].code = BPF_LD | BPF_DW | BPF_IMM insn[0].dst_reg = destination register insn[0].imm = lower 32-bit insn[1].code = 0 insn[1].imm = upper 32-bit All unused fields must be zero. Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads 32-bit immediate value into a register. x64 JITs it as single 'movabsq %rax, imm64' arm64 may JIT as sequence of four 'movk x0, #imm16, lsl #shift' insn Note that old eBPF programs are binary compatible with new interpreter. It helps eBPF programs load 64-bit constant into a register with one instruction instead of using two registers and 4 instructions: BPF_MOV32_IMM(R1, imm32) BPF_ALU64_IMM(BPF_LSH, R1, 32) BPF_MOV32_IMM(R2, imm32) BPF_ALU64_REG(BPF_OR, R1, R2) User space generated programs will use this instruction to load constants only. To tell kernel that user space needs a pointer the _pseudo_ variant of this instruction may be added later, which will use extra bits of encoding to indicate what type of pointer user space is asking kernel to provide. For example 'off' or 'src_reg' fields can be used for such purpose. src_reg = 1 could mean that user space is asking kernel to validate and load in-kernel map pointer. src_reg = 2 could mean that user space needs readonly data section pointer src_reg = 3 could mean that user space needs a pointer to per-cpu local data All such future pseudo instructions will not be carrying the actual pointer as part of the instruction, but rather will be treated as a request to kernel to provide one. The kernel will verify the request_for_a_pointer, then will drop _pseudo_ marking and will store actual internal pointer inside the instruction, so the end result is the interpreter and JITs never see pseudo BPF_LD_IMM64 insns and only operate on generic BPF_LD_IMM64 that loads 64-bit immediate into a register. User space never operates on direct pointers and verifier can easily recognize request_for_pointer vs other instructions. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- Documentation/networking/filter.txt | 8 +++++++- arch/x86/net/bpf_jit_comp.c | 17 +++++++++++++++++ include/linux/filter.h | 18 ++++++++++++++++++ kernel/bpf/core.c | 5 +++++ lib/test_bpf.c | 21 +++++++++++++++++++++ 5 files changed, 68 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt index c48a9704bda8..81916ab5d96f 100644 --- a/Documentation/networking/filter.txt +++ b/Documentation/networking/filter.txt @@ -951,7 +951,7 @@ Size modifier is one of ... Mode modifier is one of: - BPF_IMM 0x00 /* classic BPF only, reserved in eBPF */ + BPF_IMM 0x00 /* used for 32-bit mov in classic BPF and 64-bit in eBPF */ BPF_ABS 0x20 BPF_IND 0x40 BPF_MEM 0x60 @@ -995,6 +995,12 @@ BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and 2 byte atomic increments are not supported. +eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists +of two consecutive 'struct bpf_insn' 8-byte blocks and interpreted as single +instruction that loads 64-bit immediate value into a dst_reg. +Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads +32-bit immediate value into a register. + Testing ------- diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 39ccfbb4a723..06f8c17f5484 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -393,6 +393,23 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image, EMIT1_off32(add_1reg(0xB8, dst_reg), imm32); break; + case BPF_LD | BPF_IMM | BPF_DW: + if (insn[1].code != 0 || insn[1].src_reg != 0 || + insn[1].dst_reg != 0 || insn[1].off != 0) { + /* verifier must catch invalid insns */ + pr_err("invalid BPF_LD_IMM64 insn\n"); + return -EINVAL; + } + + /* movabsq %rax, imm64 */ + EMIT2(add_1mod(0x48, dst_reg), add_1reg(0xB8, dst_reg)); + EMIT(insn[0].imm, 4); + EMIT(insn[1].imm, 4); + + insn++; + i++; + break; + /* dst %= src, dst /= src, dst %= imm32, dst /= imm32 */ case BPF_ALU | BPF_MOD | BPF_X: case BPF_ALU | BPF_DIV | BPF_X: diff --git a/include/linux/filter.h b/include/linux/filter.h index c78994593355..bf323da77950 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -166,6 +166,24 @@ enum { .off = 0, \ .imm = IMM }) +/* BPF_LD_IMM64 macro encodes single 'load 64-bit immediate' insn */ +#define BPF_LD_IMM64(DST, IMM) \ + BPF_LD_IMM64_RAW(DST, 0, IMM) + +#define BPF_LD_IMM64_RAW(DST, SRC, IMM) \ + ((struct bpf_insn) { \ + .code = BPF_LD | BPF_DW | BPF_IMM, \ + .dst_reg = DST, \ + .src_reg = SRC, \ + .off = 0, \ + .imm = (__u32) (IMM) }), \ + ((struct bpf_insn) { \ + .code = 0, /* zero is reserved opcode */ \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = 0, \ + .imm = ((__u64) (IMM)) >> 32 }) + /* Short form of mov based on type, BPF_X: dst_reg = src_reg, BPF_K: dst_reg = imm32 */ #define BPF_MOV64_RAW(TYPE, DST, SRC, IMM) \ diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index b54bb2c2e494..2c2bfaacce66 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -242,6 +242,7 @@ static unsigned int __bpf_prog_run(void *ctx, const struct bpf_insn *insn) [BPF_LD | BPF_IND | BPF_W] = &&LD_IND_W, [BPF_LD | BPF_IND | BPF_H] = &&LD_IND_H, [BPF_LD | BPF_IND | BPF_B] = &&LD_IND_B, + [BPF_LD | BPF_IMM | BPF_DW] = &&LD_IMM_DW, }; void *ptr; int off; @@ -301,6 +302,10 @@ select_insn: ALU64_MOV_K: DST = IMM; CONT; + LD_IMM_DW: + DST = (u64) (u32) insn[0].imm | ((u64) (u32) insn[1].imm) << 32; + insn++; + CONT; ALU64_ARSH_X: (*(s64 *) &DST) >>= SRC; CONT; diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 9a67456ba29a..413890815d3e 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -1735,6 +1735,27 @@ static struct bpf_test tests[] = { { }, { { 1, 0 } }, }, + { + "load 64-bit immediate", + .u.insns_int = { + BPF_LD_IMM64(R1, 0x567800001234L), + BPF_MOV64_REG(R2, R1), + BPF_MOV64_REG(R3, R2), + BPF_ALU64_IMM(BPF_RSH, R2, 32), + BPF_ALU64_IMM(BPF_LSH, R3, 32), + BPF_ALU64_IMM(BPF_RSH, R3, 32), + BPF_ALU64_IMM(BPF_MOV, R0, 0), + BPF_JMP_IMM(BPF_JEQ, R2, 0x5678, 1), + BPF_EXIT_INSN(), + BPF_JMP_IMM(BPF_JEQ, R3, 0x1234, 1), + BPF_EXIT_INSN(), + BPF_ALU64_IMM(BPF_MOV, R0, 1), + BPF_EXIT_INSN(), + }, + INTERNAL, + { }, + { { 0, 1 } } + }, }; static struct net_device dev; -- cgit v1.2.3 From a395135ddebb0a06052b84c309eb6cb68b79c797 Mon Sep 17 00:00:00 2001 From: Rafał Miłecki Date: Mon, 1 Sep 2014 23:11:07 +0200 Subject: bcma: use separated function to initialize bus on SoC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is required to split SoC bus init into two phases. The later one (which includes scanning) should be called when kalloc is available. Cc: Ralf Baechle Signed-off-by: Rafał Miłecki Acked-by: Hauke Mehrtens Signed-off-by: John W. Linville --- arch/mips/bcm47xx/setup.c | 4 ++++ drivers/bcma/host_soc.c | 11 +++++++++-- include/linux/bcma/bcma_soc.h | 1 + 3 files changed, 14 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/mips/bcm47xx/setup.c b/arch/mips/bcm47xx/setup.c index 2b63e7e7d3d3..fff6ed4978c7 100644 --- a/arch/mips/bcm47xx/setup.c +++ b/arch/mips/bcm47xx/setup.c @@ -201,6 +201,10 @@ static void __init bcm47xx_register_bcma(void) pr_warn("bcm47xx: someone else already registered a bcma SPROM callback handler (err %d)\n", err); err = bcma_host_soc_register(&bcm47xx_bus.bcma); + if (err) + panic("Failed to register BCMA bus (err %d)", err); + + err = bcma_host_soc_init(&bcm47xx_bus.bcma); if (err) panic("Failed to initialize BCMA bus (err %d)", err); diff --git a/drivers/bcma/host_soc.c b/drivers/bcma/host_soc.c index 379e0d4ebe72..718e054dd727 100644 --- a/drivers/bcma/host_soc.c +++ b/drivers/bcma/host_soc.c @@ -165,7 +165,6 @@ static const struct bcma_host_ops bcma_host_soc_ops = { int __init bcma_host_soc_register(struct bcma_soc *soc) { struct bcma_bus *bus = &soc->bus; - int err; /* iomap only first core. We have to read some register on this core * to scan the bus. @@ -181,7 +180,15 @@ int __init bcma_host_soc_register(struct bcma_soc *soc) /* Initialize struct, detect chip */ bcma_init_bus(bus); - /* Register */ + return 0; +} + +int __init bcma_host_soc_init(struct bcma_soc *soc) +{ + struct bcma_bus *bus = &soc->bus; + int err; + + /* Scan bus and initialize it */ err = bcma_bus_early_register(bus, &soc->core_cc, &soc->core_mips); if (err) iounmap(bus->mmio); diff --git a/include/linux/bcma/bcma_soc.h b/include/linux/bcma/bcma_soc.h index 4203c5593b9f..f24d245f8394 100644 --- a/include/linux/bcma/bcma_soc.h +++ b/include/linux/bcma/bcma_soc.h @@ -10,6 +10,7 @@ struct bcma_soc { }; int __init bcma_host_soc_register(struct bcma_soc *soc); +int __init bcma_host_soc_init(struct bcma_soc *soc); int bcma_bus_register(struct bcma_bus *bus); -- cgit v1.2.3 From 738cbe72adc5c8f2016c4c68aa5162631d4f27e1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 Sep 2014 08:04:47 +0200 Subject: net: bpf: consolidate JIT binary allocator Introduced in commit 314beb9bcabf ("x86: bpf_jit_comp: secure bpf jit against spraying attacks") and later on replicated in aa2d2c73c21f ("s390/bpf,jit: address randomize and write protect jit code") for s390 architecture, write protection for BPF JIT images got added and a random start address of the JIT code, so that it's not on a page boundary anymore. Since both use a very similar allocator for the BPF binary header, we can consolidate this code into the BPF core as it's mostly JIT independant anyway. This will also allow for future archs that support DEBUG_SET_MODULE_RONX to just reuse instead of reimplementing it. JIT tested on x86_64 and s390x with BPF test suite. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Cc: Eric Dumazet Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: David S. Miller --- arch/s390/net/bpf_jit_comp.c | 45 ++++++++------------------------------- arch/x86/net/bpf_jit_comp.c | 50 ++++++++++---------------------------------- include/linux/filter.h | 13 ++++++++++++ kernel/bpf/core.c | 39 ++++++++++++++++++++++++++++++++++ 4 files changed, 72 insertions(+), 75 deletions(-) (limited to 'arch') diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index f2833c5b218a..b734f975c22e 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -5,11 +5,9 @@ * * Author(s): Martin Schwidefsky */ -#include #include #include #include -#include #include #include #include @@ -148,6 +146,12 @@ struct bpf_jit { ret; \ }) +static void bpf_jit_fill_hole(void *area, unsigned int size) +{ + /* Fill whole space with illegal instructions */ + memset(area, 0, size); +} + static void bpf_jit_prologue(struct bpf_jit *jit) { /* Save registers and create stack frame if necessary */ @@ -780,38 +784,6 @@ out: return -1; } -/* - * Note: for security reasons, bpf code will follow a randomly - * sized amount of illegal instructions. - */ -struct bpf_binary_header { - unsigned int pages; - u8 image[]; -}; - -static struct bpf_binary_header *bpf_alloc_binary(unsigned int bpfsize, - u8 **image_ptr) -{ - struct bpf_binary_header *header; - unsigned int sz, hole; - - /* Most BPF filters are really small, but if some of them fill a page, - * allow at least 128 extra bytes for illegal instructions. - */ - sz = round_up(bpfsize + sizeof(*header) + 128, PAGE_SIZE); - header = module_alloc(sz); - if (!header) - return NULL; - memset(header, 0, sz); - header->pages = sz / PAGE_SIZE; - hole = min(sz - (bpfsize + sizeof(*header)), PAGE_SIZE - sizeof(*header)); - /* Insert random number of illegal instructions before BPF code - * and make sure the first instruction starts at an even address. - */ - *image_ptr = &header->image[(prandom_u32() % hole) & -2]; - return header; -} - void bpf_jit_compile(struct bpf_prog *fp) { struct bpf_binary_header *header = NULL; @@ -850,7 +822,8 @@ void bpf_jit_compile(struct bpf_prog *fp) size = prg_len + lit_len; if (size >= BPF_SIZE_MAX) goto out; - header = bpf_alloc_binary(size, &jit.start); + header = bpf_jit_binary_alloc(size, &jit.start, + 2, bpf_jit_fill_hole); if (!header) goto out; jit.prg = jit.mid = jit.start + prg_len; @@ -884,7 +857,7 @@ void bpf_jit_free(struct bpf_prog *fp) goto free_filter; set_memory_rw(addr, header->pages); - module_free(NULL, header); + bpf_jit_binary_free(header); free_filter: bpf_prog_unlock_free(fp); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 06f8c17f5484..9de0b5476b0c 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -8,12 +8,10 @@ * as published by the Free Software Foundation; version 2 * of the License. */ -#include -#include #include #include #include -#include +#include int bpf_jit_enable __read_mostly; @@ -109,39 +107,6 @@ static inline void bpf_flush_icache(void *start, void *end) #define CHOOSE_LOAD_FUNC(K, func) \ ((int)K < 0 ? ((int)K >= SKF_LL_OFF ? func##_negative_offset : func) : func##_positive_offset) -struct bpf_binary_header { - unsigned int pages; - /* Note : for security reasons, bpf code will follow a randomly - * sized amount of int3 instructions - */ - u8 image[]; -}; - -static struct bpf_binary_header *bpf_alloc_binary(unsigned int proglen, - u8 **image_ptr) -{ - unsigned int sz, hole; - struct bpf_binary_header *header; - - /* Most of BPF filters are really small, - * but if some of them fill a page, allow at least - * 128 extra bytes to insert a random section of int3 - */ - sz = round_up(proglen + sizeof(*header) + 128, PAGE_SIZE); - header = module_alloc(sz); - if (!header) - return NULL; - - memset(header, 0xcc, sz); /* fill whole space with int3 instructions */ - - header->pages = sz / PAGE_SIZE; - hole = min(sz - (proglen + sizeof(*header)), PAGE_SIZE - sizeof(*header)); - - /* insert a random number of int3 instructions before BPF code */ - *image_ptr = &header->image[prandom_u32() % hole]; - return header; -} - /* pick a register outside of BPF range for JIT internal work */ #define AUX_REG (MAX_BPF_REG + 1) @@ -206,6 +171,12 @@ static inline u8 add_2reg(u8 byte, u32 dst_reg, u32 src_reg) return byte + reg2hex[dst_reg] + (reg2hex[src_reg] << 3); } +static void jit_fill_hole(void *area, unsigned int size) +{ + /* fill whole space with int3 instructions */ + memset(area, 0xcc, size); +} + struct jit_context { unsigned int cleanup_addr; /* epilogue code offset */ bool seen_ld_abs; @@ -959,7 +930,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog) if (proglen <= 0) { image = NULL; if (header) - module_free(NULL, header); + bpf_jit_binary_free(header); goto out; } if (image) { @@ -969,7 +940,8 @@ void bpf_int_jit_compile(struct bpf_prog *prog) break; } if (proglen == oldproglen) { - header = bpf_alloc_binary(proglen, &image); + header = bpf_jit_binary_alloc(proglen, &image, + 1, jit_fill_hole); if (!header) goto out; } @@ -998,7 +970,7 @@ void bpf_jit_free(struct bpf_prog *fp) goto free_filter; set_memory_rw(addr, header->pages); - module_free(NULL, header); + bpf_jit_binary_free(header); free_filter: bpf_prog_unlock_free(fp); diff --git a/include/linux/filter.h b/include/linux/filter.h index 8f82ef3f1cdd..868764fcffb8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -289,6 +289,11 @@ struct sock_fprog_kern { struct sock_filter *filter; }; +struct bpf_binary_header { + unsigned int pages; + u8 image[]; +}; + struct bpf_work_struct { struct bpf_prog *prog; struct work_struct work; @@ -358,6 +363,14 @@ struct bpf_prog *bpf_prog_realloc(struct bpf_prog *fp_old, unsigned int size, gfp_t gfp_extra_flags); void __bpf_prog_free(struct bpf_prog *fp); +typedef void (*bpf_jit_fill_hole_t)(void *area, unsigned int size); + +struct bpf_binary_header * +bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, + unsigned int alignment, + bpf_jit_fill_hole_t bpf_fill_ill_insns); +void bpf_jit_binary_free(struct bpf_binary_header *hdr); + static inline void bpf_prog_unlock_free(struct bpf_prog *fp) { bpf_prog_unlock_ro(fp); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 2c2bfaacce66..8ee520f0ec70 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -20,9 +20,12 @@ * Andi Kleen - Fix a few bad bugs and races. * Kris Katterjohn - Added many additional checks in bpf_check_classic() */ + #include #include #include +#include +#include #include /* Registers */ @@ -125,6 +128,42 @@ void __bpf_prog_free(struct bpf_prog *fp) } EXPORT_SYMBOL_GPL(__bpf_prog_free); +struct bpf_binary_header * +bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, + unsigned int alignment, + bpf_jit_fill_hole_t bpf_fill_ill_insns) +{ + struct bpf_binary_header *hdr; + unsigned int size, hole, start; + + /* Most of BPF filters are really small, but if some of them + * fill a page, allow at least 128 extra bytes to insert a + * random section of illegal instructions. + */ + size = round_up(proglen + sizeof(*hdr) + 128, PAGE_SIZE); + hdr = module_alloc(size); + if (hdr == NULL) + return NULL; + + /* Fill space with illegal/arch-dep instructions. */ + bpf_fill_ill_insns(hdr, size); + + hdr->pages = size / PAGE_SIZE; + hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), + PAGE_SIZE - sizeof(*hdr)); + start = (prandom_u32() % hole) & ~(alignment - 1); + + /* Leave a random number of instructions before BPF code. */ + *image_ptr = &hdr->image[start]; + + return hdr; +} + +void bpf_jit_binary_free(struct bpf_binary_header *hdr) +{ + module_free(NULL, hdr); +} + /* Base function for offset calculation. Needs to go into .text section, * therefore keeping it non-static as well; will also be used by JITs * anyway later on, so do not let the compiler omit it. -- cgit v1.2.3 From 55309dd3d4cd7420376a3de0526d6ed24ff8fa76 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 Sep 2014 08:04:48 +0200 Subject: net: bpf: arm: address randomize and write protect JIT code This is the ARM variant for 314beb9bcab ("x86: bpf_jit_comp: secure bpf jit against spraying attacks"). It is now possible to implement it due to commits 75374ad47c64 ("ARM: mm: Define set_memory_* functions for ARM") and dca9aa92fc7c ("ARM: add DEBUG_SET_MODULE_RONX option to Kconfig") which added infrastructure for this facility. Thus, this patch makes sure the BPF generated JIT code is marked RO, as other kernel text sections, and also lets the generated JIT code start at a pseudo random offset instead on a page boundary. The holes are filled with illegal instructions. JIT tested on armv7hl with BPF test suite. Reference: http://mainisusuallyafunction.blogspot.com/2012/11/attacking-hardened-linux-systems-with.html Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Acked-by: Mircea Gherzan Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 32 ++++++++++++++++++++++++++------ 1 file changed, 26 insertions(+), 6 deletions(-) (limited to 'arch') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index a76623bcf722..2d1a5b93d91c 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include #include @@ -174,6 +173,15 @@ static inline bool is_load_to_a(u16 inst) } } +static void jit_fill_hole(void *area, unsigned int size) +{ + /* Insert illegal UND instructions. */ + u32 *ptr, fill_ins = 0xe7ffffff; + /* We are guaranteed to have aligned memory. */ + for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) + *ptr++ = fill_ins; +} + static void build_prologue(struct jit_ctx *ctx) { u16 reg_set = saved_regs(ctx); @@ -859,9 +867,11 @@ b_epilogue: void bpf_jit_compile(struct bpf_prog *fp) { + struct bpf_binary_header *header; struct jit_ctx ctx; unsigned tmp_idx; unsigned alloc_size; + u8 *target_ptr; if (!bpf_jit_enable) return; @@ -897,13 +907,15 @@ void bpf_jit_compile(struct bpf_prog *fp) /* there's nothing after the epilogue on ARMv7 */ build_epilogue(&ctx); #endif - alloc_size = 4 * ctx.idx; - ctx.target = module_alloc(alloc_size); - if (unlikely(ctx.target == NULL)) + header = bpf_jit_binary_alloc(alloc_size, &target_ptr, + 4, jit_fill_hole); + if (header == NULL) goto out; + ctx.target = (u32 *) target_ptr; ctx.idx = 0; + build_prologue(&ctx); build_body(&ctx); build_epilogue(&ctx); @@ -919,6 +931,7 @@ void bpf_jit_compile(struct bpf_prog *fp) /* there are 2 passes here */ bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); + set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *)ctx.target; fp->jited = 1; out: @@ -928,8 +941,15 @@ out: void bpf_jit_free(struct bpf_prog *fp) { - if (fp->jited) - module_free(NULL, fp->bpf_func); + unsigned long addr = (unsigned long)fp->bpf_func & PAGE_MASK; + struct bpf_binary_header *header = (void *)addr; + + if (!fp->jited) + goto free_filter; + + set_memory_rw(addr, header->pages); + bpf_jit_binary_free(header); +free_filter: bpf_prog_unlock_free(fp); } -- cgit v1.2.3 From 286aad3c4014ca825c447e07e24f8929e6d266d2 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 8 Sep 2014 08:04:49 +0200 Subject: net: bpf: be friendly to kmemcheck Reported by Mikulas Patocka, kmemcheck currently barks out a false positive since we don't have special kmemcheck annotation for bitfields used in bpf_prog structure. We currently have jited:1, len:31 and thus when accessing len while CONFIG_KMEMCHECK enabled, kmemcheck throws a warning that we're reading uninitialized memory. As we don't need the whole bit universe for pages member, we can just split it to u16 and use a bool flag for jited instead of a bitfield. Signed-off-by: Mikulas Patocka Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 2 +- arch/mips/net/bpf_jit.c | 2 +- arch/powerpc/net/bpf_jit_comp.c | 2 +- arch/s390/net/bpf_jit_comp.c | 2 +- arch/sparc/net/bpf_jit_comp.c | 2 +- arch/x86/net/bpf_jit_comp.c | 2 +- include/linux/filter.h | 6 +++--- net/core/filter.c | 2 +- 8 files changed, 10 insertions(+), 10 deletions(-) (limited to 'arch') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 2d1a5b93d91c..6b45f649eff0 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -933,7 +933,7 @@ void bpf_jit_compile(struct bpf_prog *fp) set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *)ctx.target; - fp->jited = 1; + fp->jited = true; out: kfree(ctx.offsets); return; diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index cfa83cf2447d..0e97ccd29fe3 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -1417,7 +1417,7 @@ void bpf_jit_compile(struct bpf_prog *fp) bpf_jit_dump(fp->len, alloc_size, 2, ctx.target); fp->bpf_func = (void *)ctx.target; - fp->jited = 1; + fp->jited = true; out: kfree(ctx.offsets); diff --git a/arch/powerpc/net/bpf_jit_comp.c b/arch/powerpc/net/bpf_jit_comp.c index 40c53ff59124..cbae2dfd053c 100644 --- a/arch/powerpc/net/bpf_jit_comp.c +++ b/arch/powerpc/net/bpf_jit_comp.c @@ -686,7 +686,7 @@ void bpf_jit_compile(struct bpf_prog *fp) ((u64 *)image)[0] = (u64)code_base; ((u64 *)image)[1] = local_paca->kernel_toc; fp->bpf_func = (void *)image; - fp->jited = 1; + fp->jited = true; } out: kfree(addrs); diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index b734f975c22e..555f5c7e83ab 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -842,7 +842,7 @@ void bpf_jit_compile(struct bpf_prog *fp) if (jit.start) { set_memory_ro((unsigned long)header, header->pages); fp->bpf_func = (void *) jit.start; - fp->jited = 1; + fp->jited = true; } out: kfree(addrs); diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index f7a736b645e8..b2ad9dc5425e 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -801,7 +801,7 @@ cond_branch: f_offset = addrs[i + filter[i].jf]; if (image) { bpf_flush_icache(image, image + proglen); fp->bpf_func = (void *)image; - fp->jited = 1; + fp->jited = true; } out: kfree(addrs); diff --git a/arch/x86/net/bpf_jit_comp.c b/arch/x86/net/bpf_jit_comp.c index 9de0b5476b0c..d56cd1f515bd 100644 --- a/arch/x86/net/bpf_jit_comp.c +++ b/arch/x86/net/bpf_jit_comp.c @@ -955,7 +955,7 @@ void bpf_int_jit_compile(struct bpf_prog *prog) bpf_flush_icache(header, image + proglen); set_memory_ro((unsigned long)header, header->pages); prog->bpf_func = (void *)image; - prog->jited = 1; + prog->jited = true; } out: kfree(addrs); diff --git a/include/linux/filter.h b/include/linux/filter.h index 868764fcffb8..4b59edead908 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -300,9 +300,9 @@ struct bpf_work_struct { }; struct bpf_prog { - u32 pages; /* Number of allocated pages */ - u32 jited:1, /* Is our filter JIT'ed? */ - len:31; /* Number of filter blocks */ + u16 pages; /* Number of allocated pages */ + bool jited; /* Is our filter JIT'ed? */ + u32 len; /* Number of filter blocks */ struct sock_fprog_kern *orig_prog; /* Original BPF program */ struct bpf_work_struct *work; /* Deferred free work struct */ unsigned int (*bpf_func)(const struct sk_buff *skb, diff --git a/net/core/filter.c b/net/core/filter.c index fa5b7d0f77ac..dfc716ffa44b 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -972,7 +972,7 @@ static struct bpf_prog *bpf_prepare_filter(struct bpf_prog *fp) int err; fp->bpf_func = NULL; - fp->jited = 0; + fp->jited = false; err = bpf_check_classic(fp->insns, fp->len); if (err) { -- cgit v1.2.3 From 18ec91e1947f478f4fb7d7b1a2b0696d15f464c5 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Mon, 8 Sep 2014 17:14:49 +0000 Subject: ARM: dts: Add emac nodes to the rk3188 device tree This adds support for EMAC Rockchip driver on RK3188 SoCs. Signed-off-by: Romain Perier Signed-off-by: David S. Miller --- arch/arm/boot/dts/rk3188.dtsi | 22 ++++++++++++++++++++++ arch/arm/boot/dts/rk3xxx.dtsi | 17 +++++++++++++++++ 2 files changed, 39 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/rk3188.dtsi b/arch/arm/boot/dts/rk3188.dtsi index ee801a9c6b74..7f25bc51f2ee 100644 --- a/arch/arm/boot/dts/rk3188.dtsi +++ b/arch/arm/boot/dts/rk3188.dtsi @@ -147,6 +147,24 @@ bias-disable; }; + emac { + emac_xfer: emac-xfer { + rockchip,pins = , /* tx_en */ + , /* txd1 */ + , /* txd0 */ + , /* rxd0 */ + , /* rxd1 */ + , /* mac_clk */ + , /* rx_err */ + ; /* crs_dvalid */ + }; + + emac_mdio: emac-mdio { + rockchip,pins = , + ; + }; + }; + i2c0 { i2c0_xfer: i2c0-xfer { rockchip,pins = , @@ -323,6 +341,10 @@ }; }; +&emac { + compatible = "rockchip,rk3188-emac"; +}; + &global_timer { interrupts = ; }; diff --git a/arch/arm/boot/dts/rk3xxx.dtsi b/arch/arm/boot/dts/rk3xxx.dtsi index 8caf85d83901..208b1df6bcb0 100644 --- a/arch/arm/boot/dts/rk3xxx.dtsi +++ b/arch/arm/boot/dts/rk3xxx.dtsi @@ -91,6 +91,23 @@ status = "disabled"; }; + emac: ethernet@10204000 { + compatible = "snps,arc-emac"; + reg = <0x10204000 0x3c>; + interrupts = ; + #address-cells = <1>; + #size-cells = <0>; + + rockchip,grf = <&grf>; + + clocks = <&cru HCLK_EMAC>, <&cru SCLK_MAC>; + clock-names = "hclk", "macref"; + max-speed = <100>; + phy-mode = "rmii"; + + status = "disabled"; + }; + mmc0: dwmmc@10214000 { compatible = "rockchip,rk2928-dw-mshc"; reg = <0x10214000 0x1000>; -- cgit v1.2.3 From c6ec956b73dbeb23f1f58aaf4dbb73eb509779c0 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Mon, 8 Sep 2014 17:14:50 +0000 Subject: ARM: dts: Enable emac node on the rk3188-radxarock boards This enables EMAC Rockchip support on radxa rock boards. Signed-off-by: Romain Perier Signed-off-by: David S. Miller --- arch/arm/boot/dts/rk3188-radxarock.dts | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/rk3188-radxarock.dts b/arch/arm/boot/dts/rk3188-radxarock.dts index 5e4e3c238b2d..d63d685ea6cc 100644 --- a/arch/arm/boot/dts/rk3188-radxarock.dts +++ b/arch/arm/boot/dts/rk3188-radxarock.dts @@ -76,6 +76,22 @@ }; }; +&emac { + status = "okay"; + + pinctrl-names = "default"; + pinctrl-0 = <&emac_xfer>, <&emac_mdio>, <&phy_int>; + + phy = <&phy0>; + phy-supply = <&vcc_rmii>; + + phy0: ethernet-phy@0 { + reg = <0>; + interrupt-parent = <&gpio3>; + interrupts = <26 IRQ_TYPE_LEVEL_LOW>; + }; +}; + &i2c1 { status = "okay"; clock-frequency = <400000>; @@ -201,6 +217,12 @@ }; }; + lan8720a { + phy_int: phy-int { + rockchip,pins = ; + }; + }; + ir-receiver { ir_recv_pin: ir-recv-pin { rockchip,pins = ; -- cgit v1.2.3 From 233577a22089facf5271ab5e845b2262047c971f Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 12 Sep 2014 14:04:43 +0200 Subject: net: filter: constify detection of pkt_type_offset Currently we have 2 pkt_type_offset functions doing the same thing and spread across the architecture files. Remove those and replace them with a PKT_TYPE_OFFSET macro helper which gets the constant value from a zero sized sk_buff member right in front of the bitfield with offsetof. This new offset marker does not change size of struct sk_buff. Cc: Eric Dumazet Cc: Markos Chandras Cc: Martin Schwidefsky Cc: Daniel Borkmann Cc: Alexei Starovoitov Signed-off-by: Denis Kirjanov Signed-off-by: Hannes Frederic Sowa Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- arch/mips/net/bpf_jit.c | 27 +-------------------------- arch/s390/net/bpf_jit_comp.c | 35 +---------------------------------- include/linux/skbuff.h | 10 ++++++++++ net/core/filter.c | 31 ++----------------------------- 4 files changed, 14 insertions(+), 89 deletions(-) (limited to 'arch') diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 0e97ccd29fe3..7edc08398c4a 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -765,27 +765,6 @@ static u64 jit_get_skb_w(struct sk_buff *skb, unsigned offset) return (u64)err << 32 | ntohl(ret); } -#ifdef __BIG_ENDIAN_BITFIELD -#define PKT_TYPE_MAX (7 << 5) -#else -#define PKT_TYPE_MAX 7 -#endif -static int pkt_type_offset(void) -{ - struct sk_buff skb_probe = { - .pkt_type = ~0, - }; - u8 *ct = (u8 *)&skb_probe; - unsigned int off; - - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (ct[off] == PKT_TYPE_MAX) - return off; - } - pr_err_once("Please fix pkt_type_offset(), as pkt_type couldn't be found\n"); - return -1; -} - static int build_body(struct jit_ctx *ctx) { void *load_func[] = {jit_get_skb_b, jit_get_skb_h, jit_get_skb_w}; @@ -1332,11 +1311,7 @@ jmp_cmp: case BPF_ANC | SKF_AD_PKTTYPE: ctx->flags |= SEEN_SKB; - off = pkt_type_offset(); - - if (off < 0) - return -1; - emit_load_byte(r_tmp, r_skb, off, ctx); + emit_load_byte(r_tmp, r_skb, PKT_TYPE_OFFSET(), ctx); /* Keep only the last 3 bits */ emit_andi(r_A, r_tmp, PKT_TYPE_MAX, ctx); #ifdef __BIG_ENDIAN_BITFIELD diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c index 555f5c7e83ab..c52ac77408ca 100644 --- a/arch/s390/net/bpf_jit_comp.c +++ b/arch/s390/net/bpf_jit_comp.c @@ -227,37 +227,6 @@ static void bpf_jit_epilogue(struct bpf_jit *jit) EMIT2(0x07fe); } -/* Helper to find the offset of pkt_type in sk_buff - * Make sure its still a 3bit field starting at the MSBs within a byte. - */ -#define PKT_TYPE_MAX 0xe0 -static int pkt_type_offset; - -static int __init bpf_pkt_type_offset_init(void) -{ - struct sk_buff skb_probe = { - .pkt_type = ~0, - }; - char *ct = (char *)&skb_probe; - int off; - - pkt_type_offset = -1; - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (!ct[off]) - continue; - if (ct[off] == PKT_TYPE_MAX) - pkt_type_offset = off; - else { - /* Found non matching bit pattern, fix needed. */ - WARN_ON_ONCE(1); - pkt_type_offset = -1; - return -1; - } - } - return 0; -} -device_initcall(bpf_pkt_type_offset_init); - /* * make sure we dont leak kernel information to user */ @@ -757,12 +726,10 @@ call_fn: /* lg %r1,(%r13) */ } break; case BPF_ANC | SKF_AD_PKTTYPE: - if (pkt_type_offset < 0) - goto out; /* lhi %r5,0 */ EMIT4(0xa7580000); /* ic %r5,(%r2) */ - EMIT4_DISP(0x43502000, pkt_type_offset); + EMIT4_DISP(0x43502000, PKT_TYPE_OFFSET()); /* srl %r5,5 */ EMIT4_DISP(0x88500000, 5); break; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 07c9fdd0c126..756e3d057e84 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -548,6 +548,16 @@ struct sk_buff { ip_summed:2, nohdr:1, nfctinfo:3; + +/* if you move pkt_type around you also must adapt those constants */ +#ifdef __BIG_ENDIAN_BITFIELD +#define PKT_TYPE_MAX (7 << 5) +#else +#define PKT_TYPE_MAX 7 +#endif +#define PKT_TYPE_OFFSET() offsetof(struct sk_buff, __pkt_type_offset) + + __u8 __pkt_type_offset[0]; __u8 pkt_type:3, fclone:2, ipvs_property:1, diff --git a/net/core/filter.c b/net/core/filter.c index dfc716ffa44b..601f28de7311 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -87,30 +87,6 @@ int sk_filter(struct sock *sk, struct sk_buff *skb) } EXPORT_SYMBOL(sk_filter); -/* Helper to find the offset of pkt_type in sk_buff structure. We want - * to make sure its still a 3bit field starting at a byte boundary; - * taken from arch/x86/net/bpf_jit_comp.c. - */ -#ifdef __BIG_ENDIAN_BITFIELD -#define PKT_TYPE_MAX (7 << 5) -#else -#define PKT_TYPE_MAX 7 -#endif -static unsigned int pkt_type_offset(void) -{ - struct sk_buff skb_probe = { .pkt_type = ~0, }; - u8 *ct = (u8 *) &skb_probe; - unsigned int off; - - for (off = 0; off < sizeof(struct sk_buff); off++) { - if (ct[off] == PKT_TYPE_MAX) - return off; - } - - pr_err_once("Please fix %s, as pkt_type couldn't be found!\n", __func__); - return -1; -} - static u64 __skb_get_pay_offset(u64 ctx, u64 a, u64 x, u64 r4, u64 r5) { return skb_get_poff((struct sk_buff *)(unsigned long) ctx); @@ -190,11 +166,8 @@ static bool convert_bpf_extensions(struct sock_filter *fp, break; case SKF_AD_OFF + SKF_AD_PKTTYPE: - *insn = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, - pkt_type_offset()); - if (insn->off < 0) - return false; - insn++; + *insn++ = BPF_LDX_MEM(BPF_B, BPF_REG_A, BPF_REG_CTX, + PKT_TYPE_OFFSET()); *insn = BPF_ALU32_IMM(BPF_AND, BPF_REG_A, PKT_TYPE_MAX); #ifdef __BIG_ENDIAN_BITFIELD insn++; -- cgit v1.2.3 From 0afdfe951989aec4528a88213b1e1b1b595feae0 Mon Sep 17 00:00:00 2001 From: Frank Li Date: Sat, 13 Sep 2014 05:00:57 +0800 Subject: ARM: dts: imx6sx: add multi-queue support enet Enable 3 queues suppport for ethernet Signed-off-by: Frank Li Signed-off-by: David S. Miller --- arch/arm/boot/dts/imx6sx.dtsi | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi index f4b9da65bc0f..0a03260c1707 100644 --- a/arch/arm/boot/dts/imx6sx.dtsi +++ b/arch/arm/boot/dts/imx6sx.dtsi @@ -776,6 +776,8 @@ <&clks IMX6SX_CLK_ENET_PTP>; clock-names = "ipg", "ahb", "ptp", "enet_clk_ref", "enet_out"; + fsl,num-tx-queues=<3>; + fsl,num-rx-queues=<3>; status = "disabled"; }; -- cgit v1.2.3 From b4d2394d01bc642e95b2cba956d908423c1bef77 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 15 Sep 2014 13:00:27 -0400 Subject: dsa: Replace mii_bus with a generic host device This change makes it so that instead of passing and storing a mii_bus we instead pass and store a host_dev. From there we can test to determine the exact type of device, and can verify it is the correct device for our switch. So for example it would be possible to pass a device pointer from a pci_dev and instead of checking for a PHY ID we could check for a vendor and/or device ID. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- arch/arm/plat-orion/common.c | 2 +- drivers/net/dsa/bcm_sf2.c | 2 +- drivers/net/dsa/mv88e6060.c | 13 +++++++++---- drivers/net/dsa/mv88e6123_61_65.c | 6 +++++- drivers/net/dsa/mv88e6131.c | 6 +++++- drivers/net/dsa/mv88e6171.c | 6 +++++- drivers/net/dsa/mv88e6xxx.c | 4 ++-- include/net/dsa.h | 9 +++++---- net/dsa/dsa.c | 24 ++++++++---------------- net/dsa/slave.c | 2 +- 10 files changed, 42 insertions(+), 32 deletions(-) (limited to 'arch') diff --git a/arch/arm/plat-orion/common.c b/arch/arm/plat-orion/common.c index 3ec6e8e8d368..f5b00f41c4f6 100644 --- a/arch/arm/plat-orion/common.c +++ b/arch/arm/plat-orion/common.c @@ -499,7 +499,7 @@ void __init orion_ge00_switch_init(struct dsa_platform_data *d, int irq) d->netdev = &orion_ge00.dev; for (i = 0; i < d->nr_chips; i++) - d->chip[i].mii_bus = &orion_ge00_shared.dev; + d->chip[i].host_dev = &orion_ge00_shared.dev; orion_switch_device.dev.platform_data = d; platform_device_register(&orion_switch_device); diff --git a/drivers/net/dsa/bcm_sf2.c b/drivers/net/dsa/bcm_sf2.c index e9918c7f1792..02d7db320d90 100644 --- a/drivers/net/dsa/bcm_sf2.c +++ b/drivers/net/dsa/bcm_sf2.c @@ -129,7 +129,7 @@ static int bcm_sf2_sw_get_sset_count(struct dsa_switch *ds) return BCM_SF2_STATS_SIZE; } -static char *bcm_sf2_sw_probe(struct mii_bus *bus, int sw_addr) +static char *bcm_sf2_sw_probe(struct device *host_dev, int sw_addr) { return "Broadcom Starfighter 2"; } diff --git a/drivers/net/dsa/mv88e6060.c b/drivers/net/dsa/mv88e6060.c index d8037c1055ac..776e965dc9f4 100644 --- a/drivers/net/dsa/mv88e6060.c +++ b/drivers/net/dsa/mv88e6060.c @@ -21,7 +21,8 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) { - return mdiobus_read(ds->master_mii_bus, ds->pd->sw_addr + addr, reg); + return mdiobus_read(to_mii_bus(ds->master_dev), + ds->pd->sw_addr + addr, reg); } #define REG_READ(addr, reg) \ @@ -37,8 +38,8 @@ static int reg_read(struct dsa_switch *ds, int addr, int reg) static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) { - return mdiobus_write(ds->master_mii_bus, ds->pd->sw_addr + addr, - reg, val); + return mdiobus_write(to_mii_bus(ds->master_dev), + ds->pd->sw_addr + addr, reg, val); } #define REG_WRITE(addr, reg, val) \ @@ -50,10 +51,14 @@ static int reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) return __ret; \ }) -static char *mv88e6060_probe(struct mii_bus *bus, int sw_addr) +static char *mv88e6060_probe(struct device *host_dev, int sw_addr) { + struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev); int ret; + if (bus == NULL) + return NULL; + ret = mdiobus_read(bus, sw_addr + REG_PORT(0), 0x03); if (ret >= 0) { ret &= 0xfff0; diff --git a/drivers/net/dsa/mv88e6123_61_65.c b/drivers/net/dsa/mv88e6123_61_65.c index 975774f338c2..a332c53ff955 100644 --- a/drivers/net/dsa/mv88e6123_61_65.c +++ b/drivers/net/dsa/mv88e6123_61_65.c @@ -17,10 +17,14 @@ #include #include "mv88e6xxx.h" -static char *mv88e6123_61_65_probe(struct mii_bus *bus, int sw_addr) +static char *mv88e6123_61_65_probe(struct device *host_dev, int sw_addr) { + struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev); int ret; + if (bus == NULL) + return NULL; + ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); if (ret >= 0) { if (ret == 0x1212) diff --git a/drivers/net/dsa/mv88e6131.c b/drivers/net/dsa/mv88e6131.c index 35541f2ceca3..244c735014fa 100644 --- a/drivers/net/dsa/mv88e6131.c +++ b/drivers/net/dsa/mv88e6131.c @@ -22,10 +22,14 @@ #define ID_6095 0x0950 #define ID_6131 0x1060 -static char *mv88e6131_probe(struct mii_bus *bus, int sw_addr) +static char *mv88e6131_probe(struct device *host_dev, int sw_addr) { + struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev); int ret; + if (bus == NULL) + return NULL; + ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); if (ret >= 0) { ret &= 0xfff0; diff --git a/drivers/net/dsa/mv88e6171.c b/drivers/net/dsa/mv88e6171.c index 03a70069a8c6..6365e30138af 100644 --- a/drivers/net/dsa/mv88e6171.c +++ b/drivers/net/dsa/mv88e6171.c @@ -17,10 +17,14 @@ #include #include "mv88e6xxx.h" -static char *mv88e6171_probe(struct mii_bus *bus, int sw_addr) +static char *mv88e6171_probe(struct device *host_dev, int sw_addr) { + struct mii_bus *bus = dsa_host_dev_to_mii_bus(host_dev); int ret; + if (bus == NULL) + return NULL; + ret = __mv88e6xxx_reg_read(bus, sw_addr, REG_PORT(0), 0x03); if (ret >= 0) { if ((ret & 0xfff0) == 0x1710) diff --git a/drivers/net/dsa/mv88e6xxx.c b/drivers/net/dsa/mv88e6xxx.c index 901d2a9704ef..d6f6428b27dc 100644 --- a/drivers/net/dsa/mv88e6xxx.c +++ b/drivers/net/dsa/mv88e6xxx.c @@ -78,7 +78,7 @@ int mv88e6xxx_reg_read(struct dsa_switch *ds, int addr, int reg) int ret; mutex_lock(&ps->smi_mutex); - ret = __mv88e6xxx_reg_read(ds->master_mii_bus, + ret = __mv88e6xxx_reg_read(to_mii_bus(ds->master_dev), ds->pd->sw_addr, addr, reg); mutex_unlock(&ps->smi_mutex); @@ -122,7 +122,7 @@ int mv88e6xxx_reg_write(struct dsa_switch *ds, int addr, int reg, u16 val) int ret; mutex_lock(&ps->smi_mutex); - ret = __mv88e6xxx_reg_write(ds->master_mii_bus, + ret = __mv88e6xxx_reg_write(to_mii_bus(ds->master_dev), ds->pd->sw_addr, addr, reg, val); mutex_unlock(&ps->smi_mutex); diff --git a/include/net/dsa.h b/include/net/dsa.h index a55c4e6a4f0f..c779e9bba1b3 100644 --- a/include/net/dsa.h +++ b/include/net/dsa.h @@ -34,7 +34,7 @@ struct dsa_chip_data { /* * How to access the switch configuration registers. */ - struct device *mii_bus; + struct device *host_dev; int sw_addr; /* Device tree node pointer for this specific switch chip @@ -134,9 +134,9 @@ struct dsa_switch { struct dsa_switch_driver *drv; /* - * Reference to mii bus to use. + * Reference to host device to use. */ - struct mii_bus *master_mii_bus; + struct device *master_dev; /* * Slave mii_bus and devices for the individual ports. @@ -178,7 +178,7 @@ struct dsa_switch_driver { /* * Probing and setup. */ - char *(*probe)(struct mii_bus *bus, int sw_addr); + char *(*probe)(struct device *host_dev, int sw_addr); int (*setup)(struct dsa_switch *ds); int (*set_addr)(struct dsa_switch *ds, u8 *addr); @@ -213,6 +213,7 @@ struct dsa_switch_driver { void register_switch_driver(struct dsa_switch_driver *type); void unregister_switch_driver(struct dsa_switch_driver *type); +struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev); static inline void *ds_to_priv(struct dsa_switch *ds) { diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c index 1df0a7cf1e9e..b34d6978d773 100644 --- a/net/dsa/dsa.c +++ b/net/dsa/dsa.c @@ -43,7 +43,7 @@ void unregister_switch_driver(struct dsa_switch_driver *drv) EXPORT_SYMBOL_GPL(unregister_switch_driver); static struct dsa_switch_driver * -dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name) +dsa_switch_probe(struct device *host_dev, int sw_addr, char **_name) { struct dsa_switch_driver *ret; struct list_head *list; @@ -58,7 +58,7 @@ dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name) drv = list_entry(list, struct dsa_switch_driver, list); - name = drv->probe(bus, sw_addr); + name = drv->probe(host_dev, sw_addr); if (name != NULL) { ret = drv; break; @@ -75,7 +75,7 @@ dsa_switch_probe(struct mii_bus *bus, int sw_addr, char **_name) /* basic switch operations **************************************************/ static struct dsa_switch * dsa_switch_setup(struct dsa_switch_tree *dst, int index, - struct device *parent, struct mii_bus *bus) + struct device *parent, struct device *host_dev) { struct dsa_chip_data *pd = dst->pd->chip + index; struct dsa_switch_driver *drv; @@ -88,7 +88,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, /* * Probe for switch model. */ - drv = dsa_switch_probe(bus, pd->sw_addr, &name); + drv = dsa_switch_probe(host_dev, pd->sw_addr, &name); if (drv == NULL) { printk(KERN_ERR "%s[%d]: could not detect attached switch\n", dst->master_netdev->name, index); @@ -109,8 +109,7 @@ dsa_switch_setup(struct dsa_switch_tree *dst, int index, ds->index = index; ds->pd = dst->pd->chip + index; ds->drv = drv; - ds->master_mii_bus = bus; - + ds->master_dev = host_dev; /* * Validate supplied switch configuration. @@ -285,7 +284,7 @@ static struct device *dev_find_class(struct device *parent, char *class) return device_find_child(parent, class, dev_is_class); } -static struct mii_bus *dev_to_mii_bus(struct device *dev) +struct mii_bus *dsa_host_dev_to_mii_bus(struct device *dev) { struct device *d; @@ -301,6 +300,7 @@ static struct mii_bus *dev_to_mii_bus(struct device *dev) return NULL; } +EXPORT_SYMBOL_GPL(dsa_host_dev_to_mii_bus); static struct net_device *dev_to_net_device(struct device *dev) { @@ -566,17 +566,9 @@ static int dsa_probe(struct platform_device *pdev) dst->cpu_port = -1; for (i = 0; i < pd->nr_chips; i++) { - struct mii_bus *bus; struct dsa_switch *ds; - bus = dev_to_mii_bus(pd->chip[i].mii_bus); - if (bus == NULL) { - printk(KERN_ERR "%s[%d]: no mii bus found for " - "dsa switch\n", dev->name, i); - continue; - } - - ds = dsa_switch_setup(dst, i, &pdev->dev, bus); + ds = dsa_switch_setup(dst, i, &pdev->dev, pd->chip[i].host_dev); if (IS_ERR(ds)) { printk(KERN_ERR "%s[%d]: couldn't create dsa switch " "instance (error %ld)\n", dev->name, i, diff --git a/net/dsa/slave.c b/net/dsa/slave.c index e38a331111c0..90c9689ed362 100644 --- a/net/dsa/slave.c +++ b/net/dsa/slave.c @@ -44,7 +44,7 @@ void dsa_slave_mii_bus_init(struct dsa_switch *ds) ds->slave_mii_bus->write = dsa_slave_phy_write; snprintf(ds->slave_mii_bus->id, MII_BUS_ID_SIZE, "dsa-%d:%.2x", ds->index, ds->pd->sw_addr); - ds->slave_mii_bus->parent = &ds->master_mii_bus->dev; + ds->slave_mii_bus->parent = ds->master_dev; } -- cgit v1.2.3 From 709f6c58d4dcd2b300c914ec17b297acae4cb998 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 16 Sep 2014 12:35:35 -0700 Subject: sparc: bpf_jit: add SKF_AD_PKTTYPE support to JIT commit 233577a22089 ("net: filter: constify detection of pkt_type_offset") allows us to implement simple PKTTYPE support in sparc JIT Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/sparc/net/bpf_jit_comp.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'arch') diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index b2ad9dc5425e..2dde48bdcc42 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -579,16 +579,11 @@ void bpf_jit_compile(struct bpf_prog *fp) case BPF_ANC | SKF_AD_PROTOCOL: emit_skb_load16(protocol, r_A); break; -#if 0 - /* GCC won't let us take the address of - * a bit field even though we very much - * know what we are doing here. - */ case BPF_ANC | SKF_AD_PKTTYPE: - __emit_skb_load8(pkt_type, r_A); + __emit_skb_load8(__pkt_type_offset, r_A); + emit_andi(r_A, PKT_TYPE_MAX, r_A); emit_alu_K(SRL, 5); break; -#endif case BPF_ANC | SKF_AD_IFINDEX: emit_skb_loadptr(dev, r_A); emit_cmpi(r_A, 0); -- cgit v1.2.3 From e8b56d55a30afe588d905913d011678235dda437 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 19 Sep 2014 14:56:57 +0200 Subject: net: bpf: arm: make hole-faulting more robust Will Deacon pointed out, that the currently used opcode for filling holes, that is 0xe7ffffff, seems not robust enough ... $ echo 0xffffffe7 | xxd -r > test.bin $ arm-linux-gnueabihf-objdump -m arm -D -b binary test.bin ... 0: e7ffffff udf #65535 ; 0xffff ... while for Thumb, it ends up as ... 0: ffff e7ff vqshl.u64 q15, , #63 ... which is a bit fragile. The ARM specification defines some *permanently* guaranteed undefined instruction (UDF) space, for example for ARM in ARMv7-AR, section A5.4 and for Thumb in ARMv7-M, section A5.2.6. Similarly, ptrace, kprobes, kgdb, bug and uprobes make use of such instruction as well to trap. Given mentioned section from the specification, we can find such a universe as (where 'x' denotes 'don't care'): ARM: xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx Thumb: 1101 1110 xxxx xxxx We therefore should use a more robust opcode that fits both. Russell King suggested that we can even reuse a single 32-bit word, that is, 0xe7fddef1 which will fault if executed in ARM *or* Thumb mode as done in f928d4f2a86f ("ARM: poison the vectors page"). That will still hold our requirements: $ echo 0xf1defde7 | xxd -r > test.bin $ arm-unknown-linux-gnueabi-objdump -m arm -D -b binary test.bin ... 0: e7fddef1 udf #56801 ; 0xdde1 $ echo 0xf1defde7f1defde7f1defde7 | xxd -r > test.bin $ arm-unknown-linux-gnueabi-objdump -marm -Mforce-thumb -D -b binary test.bin ... 0: def1 udf #241 ; 0xf1 2: e7fd b.n 0x0 4: def1 udf #241 ; 0xf1 6: e7fd b.n 0x4 8: def1 udf #241 ; 0xf1 a: e7fd b.n 0x8 So on ARM 0xe7fddef1 conforms to the above UDF pattern, and the low 16 bit likewise correspond to UDF in Thumb case. The 0xe7fd part is an unconditional branch back to the UDF instruction. Signed-off-by: Daniel Borkmann Cc: Russell King Cc: Catalin Marinas Cc: Will Deacon Cc: Mircea Gherzan Cc: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/arm/net/bpf_jit_32.c | 6 +++--- arch/arm/net/bpf_jit_32.h | 14 ++++++++++++++ 2 files changed, 17 insertions(+), 3 deletions(-) (limited to 'arch') diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c index 6b45f649eff0..e1268f905026 100644 --- a/arch/arm/net/bpf_jit_32.c +++ b/arch/arm/net/bpf_jit_32.c @@ -16,6 +16,7 @@ #include #include #include + #include #include #include @@ -175,11 +176,10 @@ static inline bool is_load_to_a(u16 inst) static void jit_fill_hole(void *area, unsigned int size) { - /* Insert illegal UND instructions. */ - u32 *ptr, fill_ins = 0xe7ffffff; + u32 *ptr; /* We are guaranteed to have aligned memory. */ for (ptr = area; size >= sizeof(u32); size -= sizeof(u32)) - *ptr++ = fill_ins; + *ptr++ = __opcode_to_mem_arm(ARM_INST_UDF); } static void build_prologue(struct jit_ctx *ctx) diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h index afb84621ff6f..b2d7d92859d3 100644 --- a/arch/arm/net/bpf_jit_32.h +++ b/arch/arm/net/bpf_jit_32.h @@ -114,6 +114,20 @@ #define ARM_INST_UMULL 0x00800090 +/* + * Use a suitable undefined instruction to use for ARM/Thumb2 faulting. + * We need to be careful not to conflict with those used by other modules + * (BUG, kprobes, etc) and the register_undef_hook() system. + * + * The ARM architecture reference manual guarantees that the following + * instruction space will produce an undefined instruction exception on + * all CPUs: + * + * ARM: xxxx 0111 1111 xxxx xxxx xxxx 1111 xxxx ARMv7-AR, section A5.4 + * Thumb: 1101 1110 xxxx xxxx ARMv7-M, section A5.2.6 + */ +#define ARM_INST_UDF 0xe7fddef1 + /* register */ #define _AL3_R(op, rd, rn, rm) ((op ## _R) | (rd) << 12 | (rn) << 16 | (rm)) /* immediate */ -- cgit v1.2.3 From 749730ce42a2121e1c88350d69478bff3994b10a Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Fri, 26 Sep 2014 00:16:58 -0700 Subject: bpf: enable bpf syscall on x64 and i386 done as separate commit to ease conflict resolution Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/x86/syscalls/syscall_32.tbl | 1 + arch/x86/syscalls/syscall_64.tbl | 1 + include/linux/syscalls.h | 3 ++- include/uapi/asm-generic/unistd.h | 4 +++- kernel/sys_ni.c | 3 +++ 5 files changed, 10 insertions(+), 2 deletions(-) (limited to 'arch') diff --git a/arch/x86/syscalls/syscall_32.tbl b/arch/x86/syscalls/syscall_32.tbl index 028b78168d85..9fe1b5d002f0 100644 --- a/arch/x86/syscalls/syscall_32.tbl +++ b/arch/x86/syscalls/syscall_32.tbl @@ -363,3 +363,4 @@ 354 i386 seccomp sys_seccomp 355 i386 getrandom sys_getrandom 356 i386 memfd_create sys_memfd_create +357 i386 bpf sys_bpf diff --git a/arch/x86/syscalls/syscall_64.tbl b/arch/x86/syscalls/syscall_64.tbl index 35dd922727b9..281150b539a2 100644 --- a/arch/x86/syscalls/syscall_64.tbl +++ b/arch/x86/syscalls/syscall_64.tbl @@ -327,6 +327,7 @@ 318 common getrandom sys_getrandom 319 common memfd_create sys_memfd_create 320 common kexec_file_load sys_kexec_file_load +321 common bpf sys_bpf # # x32-specific system call numbers start at 512 to avoid cache impact diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 0f86d85a9ce4..bda9b81357cc 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -65,6 +65,7 @@ struct old_linux_dirent; struct perf_event_attr; struct file_handle; struct sigaltstack; +union bpf_attr; #include #include @@ -875,5 +876,5 @@ asmlinkage long sys_seccomp(unsigned int op, unsigned int flags, const char __user *uargs); asmlinkage long sys_getrandom(char __user *buf, size_t count, unsigned int flags); - +asmlinkage long sys_bpf(int cmd, union bpf_attr *attr, unsigned int size); #endif diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 11d11bc5c78f..22749c134117 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -705,9 +705,11 @@ __SYSCALL(__NR_seccomp, sys_seccomp) __SYSCALL(__NR_getrandom, sys_getrandom) #define __NR_memfd_create 279 __SYSCALL(__NR_memfd_create, sys_memfd_create) +#define __NR_bpf 280 +__SYSCALL(__NR_bpf, sys_bpf) #undef __NR_syscalls -#define __NR_syscalls 280 +#define __NR_syscalls 281 /* * All syscalls below here should go away really, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 391d4ddb6f4b..b4b5083f5f5e 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -218,3 +218,6 @@ cond_syscall(sys_kcmp); /* operate on Secure Computing state */ cond_syscall(sys_seccomp); + +/* access BPF programs and maps */ +cond_syscall(sys_bpf); -- cgit v1.2.3 From cec08315190a4461a369d47041a510d104a5d2a2 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 23 Sep 2014 14:36:11 -0700 Subject: sparc: bpf_jit: add support for BPF_LD(X) | BPF_LEN instructions BPF_LD | BPF_W | BPF_LEN instruction is occasionally used by tcpdump and present in 11 tests in lib/test_bpf.c Teach sparc JIT compiler to emit it. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- arch/sparc/net/bpf_jit_comp.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/sparc/net/bpf_jit_comp.c b/arch/sparc/net/bpf_jit_comp.c index c1c60c32f6dd..f33e7c7a3bf7 100644 --- a/arch/sparc/net/bpf_jit_comp.c +++ b/arch/sparc/net/bpf_jit_comp.c @@ -624,7 +624,12 @@ void bpf_jit_compile(struct bpf_prog *fp) emit_and(r_A, r_TMP, r_A); } break; - + case BPF_LD | BPF_W | BPF_LEN: + emit_skb_load32(len, r_A); + break; + case BPF_LDX | BPF_W | BPF_LEN: + emit_skb_load32(len, r_X); + break; case BPF_LD | BPF_IMM: emit_loadimm(K, r_A); break; -- cgit v1.2.3 From c9aaf87cd06fe161d96a814e719f7854cf4be8a9 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Mon, 29 Sep 2014 08:53:18 +0200 Subject: am33xx: define syscon control module device node Signed-off-by: Markus Pargmann Reviewed-by: Wolfram Sang Acked-by: Tony Lindgren Signed-off-by: David S. Miller --- arch/arm/boot/dts/am33xx.dtsi | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 3a0a161342ba..25e38b6ac376 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -132,6 +132,11 @@ }; }; + cm: syscon@44e10000 { + compatible = "ti,am33xx-controlmodule", "syscon"; + reg = <0x44e10000 0x800>; + }; + intc: interrupt-controller@48200000 { compatible = "ti,omap2-intc"; interrupt-controller; -- cgit v1.2.3 From fa5f4adf3a1594d55d730ba68c10afa1edebf58a Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Mon, 29 Sep 2014 08:53:19 +0200 Subject: arm: dts: am33xx, Add syscon phandle to cpsw node There are 2 MACIDs stored in the control module of the am33xx. These are read by the cpsw driver if no valid MACID was found in the devicetree. Signed-off-by: Markus Pargmann Reviewed-by: Wolfram Sang Acked-by: Tony Lindgren Signed-off-by: David S. Miller --- arch/arm/boot/dts/am33xx.dtsi | 1 + 1 file changed, 1 insertion(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/am33xx.dtsi b/arch/arm/boot/dts/am33xx.dtsi index 25e38b6ac376..13e44b0f5adc 100644 --- a/arch/arm/boot/dts/am33xx.dtsi +++ b/arch/arm/boot/dts/am33xx.dtsi @@ -701,6 +701,7 @@ */ interrupts = <40 41 42 43>; ranges; + syscon = <&cm>; status = "disabled"; davinci_mdio: mdio@4a101000 { -- cgit v1.2.3 From bdc06cd701fc9eed0e87293f5f12d1cedcaec9dd Mon Sep 17 00:00:00 2001 From: Antoine Ténart Date: Tue, 30 Sep 2014 16:28:15 +0200 Subject: ARM: dts: berlin: add the Ethernet node This patch adds the Ethernet node, enabling the network unit on Berlin BG2Q SoCs. Signed-off-by: Antoine Tenart Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- arch/arm/boot/dts/berlin2q.dtsi | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/berlin2q.dtsi b/arch/arm/boot/dts/berlin2q.dtsi index 400c40fceccc..891d56b03922 100644 --- a/arch/arm/boot/dts/berlin2q.dtsi +++ b/arch/arm/boot/dts/berlin2q.dtsi @@ -114,6 +114,23 @@ #interrupt-cells = <3>; }; + eth0: ethernet@b90000 { + compatible = "marvell,pxa168-eth"; + reg = <0xb90000 0x10000>; + clocks = <&chip CLKID_GETH0>; + interrupts = ; + /* set by bootloader */ + local-mac-address = [00 00 00 00 00 00]; + #address-cells = <1>; + #size-cells = <0>; + phy-handle = <ðphy0>; + status = "disabled"; + + ethphy0: ethernet-phy@0 { + reg = <0>; + }; + }; + cpu-ctrl@dd0000 { compatible = "marvell,berlin-cpu-ctrl"; reg = <0xdd0000 0x10000>; -- cgit v1.2.3 From 75215193b975d9b5663f21d885c9f794de787328 Mon Sep 17 00:00:00 2001 From: Antoine Ténart Date: Tue, 30 Sep 2014 16:28:16 +0200 Subject: ARM: dts: berlin: enable the Ethernet port on the BG2Q DMP This patch enables the Ethernet port on the Marvell Berlin2Q DMP board. Signed-off-by: Antoine Tenart Acked-by: Arnd Bergmann Signed-off-by: David S. Miller --- arch/arm/boot/dts/berlin2q-marvell-dmp.dts | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'arch') diff --git a/arch/arm/boot/dts/berlin2q-marvell-dmp.dts b/arch/arm/boot/dts/berlin2q-marvell-dmp.dts index a357ce02a64e..ea1f99b8eed6 100644 --- a/arch/arm/boot/dts/berlin2q-marvell-dmp.dts +++ b/arch/arm/boot/dts/berlin2q-marvell-dmp.dts @@ -45,3 +45,7 @@ &uart0 { status = "okay"; }; + +ð0 { + status = "okay"; +}; -- cgit v1.2.3 From 72d099e2571b3b1bcf8cddb917f0cde4e60c354d Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 30 Sep 2014 09:39:00 -0700 Subject: next: mips: bpf: Fix build failure Fix: arch/mips/net/bpf_jit.c: In function 'build_body': arch/mips/net/bpf_jit.c:762:6: error: unused variable 'tmp' cc1: all warnings being treated as errors make[2]: *** [arch/mips/net/bpf_jit.o] Error 1 Seen when building mips:allmodconfig in -next since next-20140924. Signed-off-by: Guenter Roeck Signed-off-by: David S. Miller --- arch/mips/net/bpf_jit.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch') diff --git a/arch/mips/net/bpf_jit.c b/arch/mips/net/bpf_jit.c index 64ecf9a4e526..7edc08398c4a 100644 --- a/arch/mips/net/bpf_jit.c +++ b/arch/mips/net/bpf_jit.c @@ -772,7 +772,6 @@ static int build_body(struct jit_ctx *ctx) const struct sock_filter *inst; unsigned int i, off, load_order, condt; u32 k, b_off __maybe_unused; - int tmp; for (i = 0; i < prog->len; i++) { u16 code; -- cgit v1.2.3 From e4defc775424a3501caf98d266a8d7616fa53819 Mon Sep 17 00:00:00 2001 From: David L Stevens Date: Mon, 29 Sep 2014 19:47:59 -0400 Subject: sunvnet: upgrade to VIO protocol version 1.6 This patch upgrades the sunvnet driver to support VIO protocol version 1.6. In particular, it adds per-port MTU negotiation, allowing MTUs other than ETH_FRAMELEN with ports using newer VIO protocol versions. Signed-off-by: David L Stevens Signed-off-by: David S. Miller --- arch/sparc/include/asm/vio.h | 44 +++++++++++++++- arch/sparc/kernel/viohs.c | 14 ++++- drivers/net/ethernet/sun/sunvnet.c | 104 ++++++++++++++++++++++++++++++------- drivers/net/ethernet/sun/sunvnet.h | 3 ++ 4 files changed, 143 insertions(+), 22 deletions(-) (limited to 'arch') diff --git a/arch/sparc/include/asm/vio.h b/arch/sparc/include/asm/vio.h index e0f6c399f1d0..6b135a8ab07b 100644 --- a/arch/sparc/include/asm/vio.h +++ b/arch/sparc/include/asm/vio.h @@ -65,6 +65,7 @@ struct vio_dring_register { u16 options; #define VIO_TX_DRING 0x0001 #define VIO_RX_DRING 0x0002 +#define VIO_RX_DRING_DATA 0x0004 u16 resv; u32 num_cookies; struct ldc_trans_cookie cookies[0]; @@ -80,6 +81,8 @@ struct vio_dring_unregister { #define VIO_PKT_MODE 0x01 /* Packet based transfer */ #define VIO_DESC_MODE 0x02 /* In-band descriptors */ #define VIO_DRING_MODE 0x03 /* Descriptor rings */ +/* in vers >= 1.2, VIO_DRING_MODE is 0x04 and transfer mode is a bitmask */ +#define VIO_NEW_DRING_MODE 0x04 struct vio_dring_data { struct vio_msg_tag tag; @@ -205,10 +208,20 @@ struct vio_net_attr_info { u8 addr_type; #define VNET_ADDR_ETHERMAC 0x01 u16 ack_freq; - u32 resv1; + u8 plnk_updt; +#define PHYSLINK_UPDATE_NONE 0x00 +#define PHYSLINK_UPDATE_STATE 0x01 +#define PHYSLINK_UPDATE_STATE_ACK 0x02 +#define PHYSLINK_UPDATE_STATE_NACK 0x03 + u8 options; + u16 resv1; u64 addr; u64 mtu; - u64 resv2[3]; + u16 cflags; +#define VNET_LSO_IPV4_CAPAB 0x0001 + u16 ipv4_lso_maxlen; + u32 resv2; + u64 resv3[2]; }; #define VNET_NUM_MCAST 7 @@ -366,6 +379,33 @@ struct vio_driver_state { struct vio_driver_ops *ops; }; +static inline bool vio_version_before(struct vio_driver_state *vio, + u16 major, u16 minor) +{ + u32 have = (u32)vio->ver.major << 16 | vio->ver.minor; + u32 want = (u32)major << 16 | minor; + + return have < want; +} + +static inline bool vio_version_after(struct vio_driver_state *vio, + u16 major, u16 minor) +{ + u32 have = (u32)vio->ver.major << 16 | vio->ver.minor; + u32 want = (u32)major << 16 | minor; + + return have > want; +} + +static inline bool vio_version_after_eq(struct vio_driver_state *vio, + u16 major, u16 minor) +{ + u32 have = (u32)vio->ver.major << 16 | vio->ver.minor; + u32 want = (u32)major << 16 | minor; + + return have >= want; +} + #define viodbg(TYPE, f, a...) \ do { if (vio->debug & VIO_DEBUG_##TYPE) \ printk(KERN_INFO "vio: ID[%lu] " f, \ diff --git a/arch/sparc/kernel/viohs.c b/arch/sparc/kernel/viohs.c index f8e7dd53e1c7..7ef081a185b1 100644 --- a/arch/sparc/kernel/viohs.c +++ b/arch/sparc/kernel/viohs.c @@ -426,6 +426,13 @@ static int process_dreg_info(struct vio_driver_state *vio, if (vio->dr_state & VIO_DR_STATE_RXREG) goto send_nack; + /* v1.6 and higher, ACK with desired, supported mode, or NACK */ + if (vio_version_after_eq(vio, 1, 6)) { + if (!(pkt->options & VIO_TX_DRING)) + goto send_nack; + pkt->options = VIO_TX_DRING; + } + BUG_ON(vio->desc_buf); vio->desc_buf = kzalloc(pkt->descr_size, GFP_ATOMIC); @@ -453,8 +460,11 @@ static int process_dreg_info(struct vio_driver_state *vio, pkt->tag.stype = VIO_SUBTYPE_ACK; pkt->dring_ident = ++dr->ident; - viodbg(HS, "SEND DRING_REG ACK ident[%llx]\n", - (unsigned long long) pkt->dring_ident); + viodbg(HS, "SEND DRING_REG ACK ident[%llx] " + "ndesc[%u] dsz[%u] opt[0x%x] ncookies[%u]\n", + (unsigned long long) pkt->dring_ident, + pkt->num_descr, pkt->descr_size, pkt->options, + pkt->num_cookies); len = (sizeof(*pkt) + (dr->ncookies * sizeof(struct ldc_trans_cookie))); diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index edb860947da4..b1abcadb42ff 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -41,6 +42,7 @@ static int __vnet_tx_trigger(struct vnet_port *port, u32 start); /* Ordered from largest major to lowest */ static struct vio_version vnet_versions[] = { + { .major = 1, .minor = 6 }, { .major = 1, .minor = 0 }, }; @@ -67,6 +69,7 @@ static int vnet_send_attr(struct vio_driver_state *vio) struct vnet_port *port = to_vnet_port(vio); struct net_device *dev = port->vp->dev; struct vio_net_attr_info pkt; + int framelen = ETH_FRAME_LEN; int i; memset(&pkt, 0, sizeof(pkt)); @@ -74,19 +77,41 @@ static int vnet_send_attr(struct vio_driver_state *vio) pkt.tag.stype = VIO_SUBTYPE_INFO; pkt.tag.stype_env = VIO_ATTR_INFO; pkt.tag.sid = vio_send_sid(vio); - pkt.xfer_mode = VIO_DRING_MODE; + if (vio_version_before(vio, 1, 2)) + pkt.xfer_mode = VIO_DRING_MODE; + else + pkt.xfer_mode = VIO_NEW_DRING_MODE; pkt.addr_type = VNET_ADDR_ETHERMAC; pkt.ack_freq = 0; for (i = 0; i < 6; i++) pkt.addr |= (u64)dev->dev_addr[i] << ((5 - i) * 8); - pkt.mtu = ETH_FRAME_LEN; + if (vio_version_after(vio, 1, 3)) { + if (port->rmtu) { + port->rmtu = min(VNET_MAXPACKET, port->rmtu); + pkt.mtu = port->rmtu; + } else { + port->rmtu = VNET_MAXPACKET; + pkt.mtu = port->rmtu; + } + if (vio_version_after_eq(vio, 1, 6)) + pkt.options = VIO_TX_DRING; + } else if (vio_version_before(vio, 1, 3)) { + pkt.mtu = framelen; + } else { /* v1.3 */ + pkt.mtu = framelen + VLAN_HLEN; + } + + pkt.plnk_updt = PHYSLINK_UPDATE_NONE; + pkt.cflags = 0; viodbg(HS, "SEND NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " - "ackfreq[%u] mtu[%llu]\n", + "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " + "cflags[0x%04x] lso_max[%u]\n", pkt.xfer_mode, pkt.addr_type, - (unsigned long long) pkt.addr, - pkt.ack_freq, - (unsigned long long) pkt.mtu); + (unsigned long long)pkt.addr, + pkt.ack_freq, pkt.plnk_updt, pkt.options, + (unsigned long long)pkt.mtu, pkt.cflags, pkt.ipv4_lso_maxlen); + return vio_ldc_send(vio, &pkt, sizeof(pkt)); } @@ -94,18 +119,52 @@ static int vnet_send_attr(struct vio_driver_state *vio) static int handle_attr_info(struct vio_driver_state *vio, struct vio_net_attr_info *pkt) { - viodbg(HS, "GOT NET ATTR INFO xmode[0x%x] atype[0x%x] addr[%llx] " - "ackfreq[%u] mtu[%llu]\n", + struct vnet_port *port = to_vnet_port(vio); + u64 localmtu; + u8 xfer_mode; + + viodbg(HS, "GOT NET ATTR xmode[0x%x] atype[0x%x] addr[%llx] " + "ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] mtu[%llu] " + " (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", pkt->xfer_mode, pkt->addr_type, - (unsigned long long) pkt->addr, - pkt->ack_freq, - (unsigned long long) pkt->mtu); + (unsigned long long)pkt->addr, + pkt->ack_freq, pkt->plnk_updt, pkt->options, + (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, + pkt->ipv4_lso_maxlen); pkt->tag.sid = vio_send_sid(vio); - if (pkt->xfer_mode != VIO_DRING_MODE || + xfer_mode = pkt->xfer_mode; + /* for version < 1.2, VIO_DRING_MODE = 0x3 and no bitmask */ + if (vio_version_before(vio, 1, 2) && xfer_mode == VIO_DRING_MODE) + xfer_mode = VIO_NEW_DRING_MODE; + + /* MTU negotiation: + * < v1.3 - ETH_FRAME_LEN exactly + * > v1.3 - MIN(pkt.mtu, VNET_MAXPACKET, port->rmtu) and change + * pkt->mtu for ACK + * = v1.3 - ETH_FRAME_LEN + VLAN_HLEN exactly + */ + if (vio_version_before(vio, 1, 3)) { + localmtu = ETH_FRAME_LEN; + } else if (vio_version_after(vio, 1, 3)) { + localmtu = port->rmtu ? port->rmtu : VNET_MAXPACKET; + localmtu = min(pkt->mtu, localmtu); + pkt->mtu = localmtu; + } else { /* v1.3 */ + localmtu = ETH_FRAME_LEN + VLAN_HLEN; + } + port->rmtu = localmtu; + + /* for version >= 1.6, ACK packet mode we support */ + if (vio_version_after_eq(vio, 1, 6)) { + pkt->xfer_mode = VIO_NEW_DRING_MODE; + pkt->options = VIO_TX_DRING; + } + + if (!(xfer_mode | VIO_NEW_DRING_MODE) || pkt->addr_type != VNET_ADDR_ETHERMAC || - pkt->mtu != ETH_FRAME_LEN) { + pkt->mtu != localmtu) { viodbg(HS, "SEND NET ATTR NACK\n"); pkt->tag.stype = VIO_SUBTYPE_NACK; @@ -114,7 +173,14 @@ static int handle_attr_info(struct vio_driver_state *vio, return -ECONNRESET; } else { - viodbg(HS, "SEND NET ATTR ACK\n"); + viodbg(HS, "SEND NET ATTR ACK xmode[0x%x] atype[0x%x] " + "addr[%llx] ackfreq[%u] plnk_updt[0x%02x] opts[0x%02x] " + "mtu[%llu] (rmtu[%llu]) cflags[0x%04x] lso_max[%u]\n", + pkt->xfer_mode, pkt->addr_type, + (unsigned long long)pkt->addr, + pkt->ack_freq, pkt->plnk_updt, pkt->options, + (unsigned long long)pkt->mtu, port->rmtu, pkt->cflags, + pkt->ipv4_lso_maxlen); pkt->tag.stype = VIO_SUBTYPE_ACK; @@ -210,7 +276,7 @@ static int vnet_rx_one(struct vnet_port *port, unsigned int len, int err; err = -EMSGSIZE; - if (unlikely(len < ETH_ZLEN || len > ETH_FRAME_LEN)) { + if (unlikely(len < ETH_ZLEN || len > port->rmtu)) { dev->stats.rx_length_errors++; goto out_dropped; } @@ -558,8 +624,10 @@ static void vnet_event(void *arg, int event) vio_link_state_change(vio, event); spin_unlock_irqrestore(&vio->lock, flags); - if (event == LDC_EVENT_RESET) + if (event == LDC_EVENT_RESET) { + port->rmtu = 0; vio_port_up(vio); + } return; } @@ -1051,8 +1119,8 @@ static int vnet_port_alloc_tx_bufs(struct vnet_port *port) void *dring; for (i = 0; i < VNET_TX_RING_SIZE; i++) { - void *buf = kzalloc(ETH_FRAME_LEN + 8, GFP_KERNEL); - int map_len = (ETH_FRAME_LEN + 7) & ~7; + void *buf = kzalloc(VNET_MAXPACKET + 8, GFP_KERNEL); + int map_len = (VNET_MAXPACKET + 7) & ~7; err = -ENOMEM; if (!buf) diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h index da4933750d06..986e04b9313d 100644 --- a/drivers/net/ethernet/sun/sunvnet.h +++ b/drivers/net/ethernet/sun/sunvnet.h @@ -11,6 +11,7 @@ */ #define VNET_TX_TIMEOUT (5 * HZ) +#define VNET_MAXPACKET 1518ULL /* ETH_FRAMELEN + VLAN_HDR */ #define VNET_TX_RING_SIZE 512 #define VNET_TX_WAKEUP_THRESH(dr) ((dr)->pending / 4) @@ -44,6 +45,8 @@ struct vnet_port { u32 stop_rx_idx; bool stop_rx; bool start_cons; + + u64 rmtu; }; static inline struct vnet_port *to_vnet_port(struct vio_driver_state *vio) -- cgit v1.2.3 From 42db672dca5b74f9c85c318b74b3b298e86cbe8e Mon Sep 17 00:00:00 2001 From: David L Stevens Date: Mon, 29 Sep 2014 19:48:18 -0400 Subject: sunvnet: allow admin to set sunvnet MTU This patch allows an admin to set the MTU on a sunvnet device to arbitrary values between the minimum (68) and maximum (65535) IPv4 packet sizes. Signed-off-by: David L Stevens Signed-off-by: David S. Miller --- arch/sparc/kernel/ldc.c | 2 +- drivers/net/ethernet/sun/sunvnet.c | 7 +++++-- drivers/net/ethernet/sun/sunvnet.h | 6 ++++-- 3 files changed, 10 insertions(+), 5 deletions(-) (limited to 'arch') diff --git a/arch/sparc/kernel/ldc.c b/arch/sparc/kernel/ldc.c index 66dacd56bb10..0af28b984695 100644 --- a/arch/sparc/kernel/ldc.c +++ b/arch/sparc/kernel/ldc.c @@ -2159,7 +2159,7 @@ int ldc_map_single(struct ldc_channel *lp, state.pte_idx = (base - iommu->page_table); state.nc = 0; fill_cookies(&state, (pa & PAGE_MASK), (pa & ~PAGE_MASK), len); - BUG_ON(state.nc != 1); + BUG_ON(state.nc > ncookies); return state.nc; } diff --git a/drivers/net/ethernet/sun/sunvnet.c b/drivers/net/ethernet/sun/sunvnet.c index 8f5f4e3291a4..39804715bf22 100644 --- a/drivers/net/ethernet/sun/sunvnet.c +++ b/drivers/net/ethernet/sun/sunvnet.c @@ -913,6 +913,9 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) if (unlikely(!skb)) goto out_dropped; + if (skb->len > port->rmtu) + goto out_dropped; + spin_lock_irqsave(&port->vio.lock, flags); dr = &port->vio.drings[VIO_DRIVER_TX_RING]; @@ -944,7 +947,7 @@ static int vnet_start_xmit(struct sk_buff *skb, struct net_device *dev) skb = NULL; err = ldc_map_single(port->vio.lp, start, nlen, - port->tx_bufs[txi].cookies, 2, + port->tx_bufs[txi].cookies, VNET_MAXCOOKIES, (LDC_MAP_SHADOW | LDC_MAP_DIRECT | LDC_MAP_RW)); if (err < 0) { netdev_info(dev, "tx buffer map error %d\n", err); @@ -1182,7 +1185,7 @@ static void vnet_set_rx_mode(struct net_device *dev) static int vnet_change_mtu(struct net_device *dev, int new_mtu) { - if (new_mtu != ETH_DATA_LEN) + if (new_mtu < 68 || new_mtu > 65535) return -EINVAL; dev->mtu = new_mtu; diff --git a/drivers/net/ethernet/sun/sunvnet.h b/drivers/net/ethernet/sun/sunvnet.h index 02f507db9951..c91104542619 100644 --- a/drivers/net/ethernet/sun/sunvnet.h +++ b/drivers/net/ethernet/sun/sunvnet.h @@ -16,7 +16,7 @@ */ #define VNET_CLEAN_TIMEOUT ((HZ/100)+1) -#define VNET_MAXPACKET 1518ULL /* ETH_FRAMELEN + VLAN_HDR */ +#define VNET_MAXPACKET (65535ULL + ETH_HLEN + VLAN_HLEN) #define VNET_TX_RING_SIZE 512 #define VNET_TX_WAKEUP_THRESH(dr) ((dr)->pending / 4) @@ -26,10 +26,12 @@ */ #define VNET_PACKET_SKIP 6 +#define VNET_MAXCOOKIES (VNET_MAXPACKET/PAGE_SIZE + 1) + struct vnet_tx_entry { struct sk_buff *skb; unsigned int ncookies; - struct ldc_trans_cookie cookies[2]; + struct ldc_trans_cookie cookies[VNET_MAXCOOKIES]; }; struct vnet; -- cgit v1.2.3