From b7552e1bccbe3da9c8e7386c6188e8ea4667c8e7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 18 May 2016 14:14:28 +0200 Subject: bpf: rather use get_random_int for randomizations Start address randomization and blinding in BPF currently use prandom_u32(). prandom_u32() values are not exposed to unpriviledged user space to my knowledge, but given other kernel facilities such as ASLR, stack canaries, etc make use of stronger get_random_int(), we better make use of it here as well given blinding requests successively new random values. get_random_int() has minimal entropy pool depletion, is not cryptographically secure, but doesn't need to be for our use cases here. Suggested-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- kernel/bpf/core.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index f1e8a0def99b..b94a36550591 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -231,7 +231,7 @@ bpf_jit_binary_alloc(unsigned int proglen, u8 **image_ptr, hdr->pages = size / PAGE_SIZE; hole = min_t(unsigned int, size - (proglen + sizeof(*hdr)), PAGE_SIZE - sizeof(*hdr)); - start = (prandom_u32() % hole) & ~(alignment - 1); + start = (get_random_int() % hole) & ~(alignment - 1); /* Leave a random number of instructions before BPF code. */ *image_ptr = &hdr->image[start]; @@ -251,7 +251,7 @@ static int bpf_jit_blind_insn(const struct bpf_insn *from, struct bpf_insn *to_buff) { struct bpf_insn *to = to_buff; - u32 imm_rnd = prandom_u32(); + u32 imm_rnd = get_random_int(); s16 off; BUILD_BUG_ON(BPF_REG_AX + 1 != MAX_BPF_JIT_REG); -- cgit v1.2.3 From e27f4a942a0ee4b84567a3c6cfa84f273e55cbb7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 20 May 2016 17:22:48 -0500 Subject: bpf: Use mount_nodev not mount_ns to mount the bpf filesystem While reviewing the filesystems that set FS_USERNS_MOUNT I spotted the bpf filesystem. Looking at the code I saw a broken usage of mount_ns with current->nsproxy->mnt_ns. As the code does not acquire a reference to the mount namespace it can not possibly be correct to store the mount namespace on the superblock as it does. Replace mount_ns with mount_nodev so that each mount of the bpf filesystem returns a distinct instance, and the code is not buggy. In discussion with Hannes Frederic Sowa it was reported that the use of mount_ns was an attempt to have one bpf instance per mount namespace, in an attempt to keep resources that pin resources from hiding. That intent simply does not work, the vfs is not built to allow that kind of behavior. Which means that the bpf filesystem really is buggy both semantically and in it's implemenation as it does not nor can it implement the original intent. This change is userspace visible, but my experience with similar filesystems leads me to believe nothing will break with a model of each mount of the bpf filesystem is distinct from all others. Fixes: b2197755b263 ("bpf: add support for persistent maps/progs") Cc: Hannes Frederic Sowa Acked-by: Daniel Borkmann Signed-off-by: "Eric W. Biederman" Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- kernel/bpf/inode.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c index 71b75d9c81da..04be7021f848 100644 --- a/kernel/bpf/inode.c +++ b/kernel/bpf/inode.c @@ -357,7 +357,7 @@ static int bpf_fill_super(struct super_block *sb, void *data, int silent) static struct dentry *bpf_mount(struct file_system_type *type, int flags, const char *dev_name, void *data) { - return mount_ns(type, flags, current->nsproxy->mnt_ns, bpf_fill_super); + return mount_nodev(type, flags, data, bpf_fill_super); } static struct file_system_type bpf_fs_type = { -- cgit v1.2.3 From d91b28ed42de99217efb2e8cb0357263d6fb737c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 19 May 2016 18:17:13 -0700 Subject: bpf: support decreasing order in direct packet access when packet headers are accessed in 'decreasing' order (like TCP port may be fetched before the program reads IP src) the llvm may generate the following code: [...] // R7=pkt(id=0,off=22,r=70) r2 = *(u32 *)(r7 +0) // good access [...] r7 += 40 // R7=pkt(id=0,off=62,r=70) r8 = *(u32 *)(r7 +0) // good access [...] r1 = *(u32 *)(r7 -20) // this one will fail though it's within a safe range // it's doing *(u32*)(skb->data + 42) Fix verifier to recognize such code pattern Alos turned out that 'off > range' condition is not a verifier bug. It's a buggy program that may do something like: if (ptr + 50 > data_end) return 0; ptr += 60; *(u32*)ptr; in such case emit "invalid access to packet, off=0 size=4, R1(id=0,off=60,r=50)" error message, so all information is available for the program author to fix the program. Fixes: 969bf05eb3ce ("bpf: direct packet access") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 12 ++++-------- 1 file changed, 4 insertions(+), 8 deletions(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index a08d66215245..d54e34874579 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -683,15 +683,11 @@ static int check_packet_access(struct verifier_env *env, u32 regno, int off, { struct reg_state *regs = env->cur_state.regs; struct reg_state *reg = ®s[regno]; - int linear_size = (int) reg->range - (int) reg->off; - if (linear_size < 0 || linear_size >= MAX_PACKET_OFF) { - verbose("verifier bug\n"); - return -EFAULT; - } - if (off < 0 || off + size > linear_size) { - verbose("invalid access to packet, off=%d size=%d, allowed=%d\n", - off, size, linear_size); + off += reg->off; + if (off < 0 || off + size > reg->range) { + verbose("invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n", + off, size, regno, reg->id, reg->off, reg->range); return -EACCES; } return 0; -- cgit v1.2.3 From 1b9b69ecb3a5236d4d3da0f0fa11af916371841e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 19 May 2016 18:17:14 -0700 Subject: bpf: teach verifier to recognize imm += ptr pattern Humans don't write C code like: u8 *ptr = skb->data; int imm = 4; imm += ptr; but from llvm backend point of view 'imm' and 'ptr' are registers and imm += ptr may be preferred vs ptr += imm depending which register value will be used further in the code, while verifier can only recognize ptr += imm. That caused small unrelated changes in the C code of the bpf program to trigger rejection by the verifier. Therefore teach the verifier to recognize both ptr += imm and imm += ptr. For example: when R6=pkt(id=0,off=0,r=62) R7=imm22 after r7 += r6 instruction will be R6=pkt(id=0,off=0,r=62) R7=pkt(id=0,off=22,r=62) Fixes: 969bf05eb3ce ("bpf: direct packet access") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- kernel/bpf/verifier.c | 18 +++++++++++++++++- 1 file changed, 17 insertions(+), 1 deletion(-) (limited to 'kernel') diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index d54e34874579..668e07903c8f 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -1245,6 +1245,7 @@ static int check_packet_ptr_add(struct verifier_env *env, struct bpf_insn *insn) struct reg_state *regs = env->cur_state.regs; struct reg_state *dst_reg = ®s[insn->dst_reg]; struct reg_state *src_reg = ®s[insn->src_reg]; + struct reg_state tmp_reg; s32 imm; if (BPF_SRC(insn->code) == BPF_K) { @@ -1267,6 +1268,19 @@ add_imm: */ dst_reg->off += imm; } else { + if (src_reg->type == PTR_TO_PACKET) { + /* R6=pkt(id=0,off=0,r=62) R7=imm22; r7 += r6 */ + tmp_reg = *dst_reg; /* save r7 state */ + *dst_reg = *src_reg; /* copy pkt_ptr state r6 into r7 */ + src_reg = &tmp_reg; /* pretend it's src_reg state */ + /* if the checks below reject it, the copy won't matter, + * since we're rejecting the whole program. If all ok, + * then imm22 state will be added to r7 + * and r7 will be pkt(id=0,off=22,r=62) while + * r6 will stay as pkt(id=0,off=0,r=62) + */ + } + if (src_reg->type == CONST_IMM) { /* pkt_ptr += reg where reg is known constant */ imm = src_reg->imm; @@ -1565,7 +1579,9 @@ static int check_alu_op(struct verifier_env *env, struct bpf_insn *insn) return 0; } else if (opcode == BPF_ADD && BPF_CLASS(insn->code) == BPF_ALU64 && - dst_reg->type == PTR_TO_PACKET) { + (dst_reg->type == PTR_TO_PACKET || + (BPF_SRC(insn->code) == BPF_X && + regs[insn->src_reg].type == PTR_TO_PACKET))) { /* ptr_to_packet += K|X */ return check_packet_ptr_add(env, insn); } else if (BPF_CLASS(insn->code) == BPF_ALU64 && -- cgit v1.2.3