diff options
Diffstat (limited to 'net')
-rw-r--r-- | net/core/filter.c | 442 | ||||
-rw-r--r-- | net/ipv4/af_inet.c | 71 | ||||
-rw-r--r-- | net/ipv4/tcp_ipv4.c | 16 | ||||
-rw-r--r-- | net/ipv4/udp.c | 14 | ||||
-rw-r--r-- | net/ipv6/af_inet6.c | 66 | ||||
-rw-r--r-- | net/ipv6/tcp_ipv6.c | 16 | ||||
-rw-r--r-- | net/ipv6/udp.c | 20 |
7 files changed, 582 insertions, 63 deletions
diff --git a/net/core/filter.c b/net/core/filter.c index e989bf313195..d31aff93270d 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -33,6 +33,7 @@ #include <linux/if_packet.h> #include <linux/if_arp.h> #include <linux/gfp.h> +#include <net/inet_common.h> #include <net/ip.h> #include <net/protocol.h> #include <net/netlink.h> @@ -3656,6 +3657,52 @@ static const struct bpf_func_proto bpf_sock_ops_cb_flags_set_proto = { .arg2_type = ARG_ANYTHING, }; +const struct ipv6_bpf_stub *ipv6_bpf_stub __read_mostly; +EXPORT_SYMBOL_GPL(ipv6_bpf_stub); + +BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr, + int, addr_len) +{ +#ifdef CONFIG_INET + struct sock *sk = ctx->sk; + int err; + + /* Binding to port can be expensive so it's prohibited in the helper. + * Only binding to IP is supported. + */ + err = -EINVAL; + if (addr->sa_family == AF_INET) { + if (addr_len < sizeof(struct sockaddr_in)) + return err; + if (((struct sockaddr_in *)addr)->sin_port != htons(0)) + return err; + return __inet_bind(sk, addr, addr_len, true, false); +#if IS_ENABLED(CONFIG_IPV6) + } else if (addr->sa_family == AF_INET6) { + if (addr_len < SIN6_LEN_RFC2133) + return err; + if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0)) + return err; + /* ipv6_bpf_stub cannot be NULL, since it's called from + * bpf_cgroup_inet6_connect hook and ipv6 is already loaded + */ + return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false); +#endif /* CONFIG_IPV6 */ + } +#endif /* CONFIG_INET */ + + return -EAFNOSUPPORT; +} + +static const struct bpf_func_proto bpf_bind_proto = { + .func = bpf_bind, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_PTR_TO_MEM, + .arg3_type = ARG_CONST_SIZE, +}; + static const struct bpf_func_proto * bpf_base_func_proto(enum bpf_func_id func_id) { @@ -3685,7 +3732,7 @@ bpf_base_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * -sock_filter_func_proto(enum bpf_func_id func_id) +sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { /* inet and inet6 sockets are created in a process @@ -3699,7 +3746,29 @@ sock_filter_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * -sk_filter_func_proto(enum bpf_func_id func_id) +sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) +{ + switch (func_id) { + /* inet and inet6 sockets are created in a process + * context so there is always a valid uid/gid + */ + case BPF_FUNC_get_current_uid_gid: + return &bpf_get_current_uid_gid_proto; + case BPF_FUNC_bind: + switch (prog->expected_attach_type) { + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + return &bpf_bind_proto; + default: + return NULL; + } + default: + return bpf_base_func_proto(func_id); + } +} + +static const struct bpf_func_proto * +sk_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_skb_load_bytes: @@ -3714,7 +3783,7 @@ sk_filter_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * -tc_cls_act_func_proto(enum bpf_func_id func_id) +tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_skb_store_bytes: @@ -3781,7 +3850,7 @@ tc_cls_act_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * -xdp_func_proto(enum bpf_func_id func_id) +xdp_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_perf_event_output: @@ -3804,7 +3873,7 @@ xdp_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * -lwt_inout_func_proto(enum bpf_func_id func_id) +lwt_inout_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_skb_load_bytes: @@ -3831,7 +3900,7 @@ lwt_inout_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * - sock_ops_func_proto(enum bpf_func_id func_id) +sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_setsockopt: @@ -3847,7 +3916,8 @@ static const struct bpf_func_proto * } } -static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id) +static const struct bpf_func_proto * +sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_msg_redirect_map: @@ -3863,7 +3933,8 @@ static const struct bpf_func_proto *sk_msg_func_proto(enum bpf_func_id func_id) } } -static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id) +static const struct bpf_func_proto * +sk_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_skb_store_bytes: @@ -3888,7 +3959,7 @@ static const struct bpf_func_proto *sk_skb_func_proto(enum bpf_func_id func_id) } static const struct bpf_func_proto * -lwt_xmit_func_proto(enum bpf_func_id func_id) +lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_skb_get_tunnel_key: @@ -3918,11 +3989,12 @@ lwt_xmit_func_proto(enum bpf_func_id func_id) case BPF_FUNC_set_hash_invalid: return &bpf_set_hash_invalid_proto; default: - return lwt_inout_func_proto(func_id); + return lwt_inout_func_proto(func_id, prog); } } static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { const int size_default = sizeof(__u32); @@ -3966,6 +4038,7 @@ static bool bpf_skb_is_valid_access(int off, int size, enum bpf_access_type type static bool sk_filter_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { switch (off) { @@ -3986,11 +4059,12 @@ static bool sk_filter_is_valid_access(int off, int size, } } - return bpf_skb_is_valid_access(off, size, type, info); + return bpf_skb_is_valid_access(off, size, type, prog, info); } static bool lwt_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { switch (off) { @@ -4020,32 +4094,83 @@ static bool lwt_is_valid_access(int off, int size, break; } - return bpf_skb_is_valid_access(off, size, type, info); + return bpf_skb_is_valid_access(off, size, type, prog, info); } -static bool sock_filter_is_valid_access(int off, int size, - enum bpf_access_type type, - struct bpf_insn_access_aux *info) + +/* Attach type specific accesses */ +static bool __sock_filter_check_attach_type(int off, + enum bpf_access_type access_type, + enum bpf_attach_type attach_type) { - if (type == BPF_WRITE) { - switch (off) { - case offsetof(struct bpf_sock, bound_dev_if): - case offsetof(struct bpf_sock, mark): - case offsetof(struct bpf_sock, priority): - break; + switch (off) { + case offsetof(struct bpf_sock, bound_dev_if): + case offsetof(struct bpf_sock, mark): + case offsetof(struct bpf_sock, priority): + switch (attach_type) { + case BPF_CGROUP_INET_SOCK_CREATE: + goto full_access; + default: + return false; + } + case bpf_ctx_range(struct bpf_sock, src_ip4): + switch (attach_type) { + case BPF_CGROUP_INET4_POST_BIND: + goto read_only; + default: + return false; + } + case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): + switch (attach_type) { + case BPF_CGROUP_INET6_POST_BIND: + goto read_only; default: return false; } + case bpf_ctx_range(struct bpf_sock, src_port): + switch (attach_type) { + case BPF_CGROUP_INET4_POST_BIND: + case BPF_CGROUP_INET6_POST_BIND: + goto read_only; + default: + return false; + } + } +read_only: + return access_type == BPF_READ; +full_access: + return true; +} + +static bool __sock_filter_check_size(int off, int size, + struct bpf_insn_access_aux *info) +{ + const int size_default = sizeof(__u32); + + switch (off) { + case bpf_ctx_range(struct bpf_sock, src_ip4): + case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): + bpf_ctx_record_field_size(info, size_default); + return bpf_ctx_narrow_access_ok(off, size, size_default); } - if (off < 0 || off + size > sizeof(struct bpf_sock)) + return size == size_default; +} + +static bool sock_filter_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + if (off < 0 || off >= sizeof(struct bpf_sock)) return false; - /* The verifier guarantees that size > 0. */ if (off % size != 0) return false; - if (size != sizeof(__u32)) + if (!__sock_filter_check_attach_type(off, type, + prog->expected_attach_type)) + return false; + if (!__sock_filter_check_size(off, size, info)) return false; - return true; } @@ -4096,6 +4221,7 @@ static int tc_cls_act_prologue(struct bpf_insn *insn_buf, bool direct_write, static bool tc_cls_act_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { if (type == BPF_WRITE) { @@ -4125,7 +4251,7 @@ static bool tc_cls_act_is_valid_access(int off, int size, return false; } - return bpf_skb_is_valid_access(off, size, type, info); + return bpf_skb_is_valid_access(off, size, type, prog, info); } static bool __is_valid_xdp_access(int off, int size) @@ -4142,6 +4268,7 @@ static bool __is_valid_xdp_access(int off, int size) static bool xdp_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { if (type == BPF_WRITE) @@ -4172,8 +4299,74 @@ void bpf_warn_invalid_xdp_action(u32 act) } EXPORT_SYMBOL_GPL(bpf_warn_invalid_xdp_action); +static bool sock_addr_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + const int size_default = sizeof(__u32); + + if (off < 0 || off >= sizeof(struct bpf_sock_addr)) + return false; + if (off % size != 0) + return false; + + /* Disallow access to IPv6 fields from IPv4 contex and vise + * versa. + */ + switch (off) { + case bpf_ctx_range(struct bpf_sock_addr, user_ip4): + switch (prog->expected_attach_type) { + case BPF_CGROUP_INET4_BIND: + case BPF_CGROUP_INET4_CONNECT: + break; + default: + return false; + } + break; + case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): + switch (prog->expected_attach_type) { + case BPF_CGROUP_INET6_BIND: + case BPF_CGROUP_INET6_CONNECT: + break; + default: + return false; + } + break; + } + + switch (off) { + case bpf_ctx_range(struct bpf_sock_addr, user_ip4): + case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): + /* Only narrow read access allowed for now. */ + if (type == BPF_READ) { + bpf_ctx_record_field_size(info, size_default); + if (!bpf_ctx_narrow_access_ok(off, size, size_default)) + return false; + } else { + if (size != size_default) + return false; + } + break; + case bpf_ctx_range(struct bpf_sock_addr, user_port): + if (size != size_default) + return false; + break; + default: + if (type == BPF_READ) { + if (size != size_default) + return false; + } else { + return false; + } + } + + return true; +} + static bool sock_ops_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { const int size_default = sizeof(__u32); @@ -4220,6 +4413,7 @@ static int sk_skb_prologue(struct bpf_insn *insn_buf, bool direct_write, static bool sk_skb_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { switch (off) { @@ -4249,11 +4443,12 @@ static bool sk_skb_is_valid_access(int off, int size, break; } - return bpf_skb_is_valid_access(off, size, type, info); + return bpf_skb_is_valid_access(off, size, type, prog, info); } static bool sk_msg_is_valid_access(int off, int size, enum bpf_access_type type, + const struct bpf_prog *prog, struct bpf_insn_access_aux *info) { if (type == BPF_WRITE) @@ -4583,6 +4778,7 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, struct bpf_prog *prog, u32 *target_size) { struct bpf_insn *insn = insn_buf; + int off; switch (si->off) { case offsetof(struct bpf_sock, bound_dev_if): @@ -4638,6 +4834,43 @@ static u32 sock_filter_convert_ctx_access(enum bpf_access_type type, *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_PROTO_SHIFT); break; + + case offsetof(struct bpf_sock, src_ip4): + *insn++ = BPF_LDX_MEM( + BPF_SIZE(si->code), si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, skc_rcv_saddr, + FIELD_SIZEOF(struct sock_common, + skc_rcv_saddr), + target_size)); + break; + + case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]): +#if IS_ENABLED(CONFIG_IPV6) + off = si->off; + off -= offsetof(struct bpf_sock, src_ip6[0]); + *insn++ = BPF_LDX_MEM( + BPF_SIZE(si->code), si->dst_reg, si->src_reg, + bpf_target_off( + struct sock_common, + skc_v6_rcv_saddr.s6_addr32[0], + FIELD_SIZEOF(struct sock_common, + skc_v6_rcv_saddr.s6_addr32[0]), + target_size) + off); +#else + (void)off; + *insn++ = BPF_MOV32_IMM(si->dst_reg, 0); +#endif + break; + + case offsetof(struct bpf_sock, src_port): + *insn++ = BPF_LDX_MEM( + BPF_FIELD_SIZEOF(struct sock_common, skc_num), + si->dst_reg, si->src_reg, + bpf_target_off(struct sock_common, skc_num, + FIELD_SIZEOF(struct sock_common, + skc_num), + target_size)); + break; } return insn - insn_buf; @@ -4713,6 +4946,152 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type, return insn - insn_buf; } +/* SOCK_ADDR_LOAD_NESTED_FIELD() loads Nested Field S.F.NF where S is type of + * context Structure, F is Field in context structure that contains a pointer + * to Nested Structure of type NS that has the field NF. + * + * SIZE encodes the load size (BPF_B, BPF_H, etc). It's up to caller to make + * sure that SIZE is not greater than actual size of S.F.NF. + * + * If offset OFF is provided, the load happens from that offset relative to + * offset of NF. + */ +#define SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF) \ + do { \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), si->dst_reg, \ + si->src_reg, offsetof(S, F)); \ + *insn++ = BPF_LDX_MEM( \ + SIZE, si->dst_reg, si->dst_reg, \ + bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \ + target_size) \ + + OFF); \ + } while (0) + +#define SOCK_ADDR_LOAD_NESTED_FIELD(S, NS, F, NF) \ + SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, \ + BPF_FIELD_SIZEOF(NS, NF), 0) + +/* SOCK_ADDR_STORE_NESTED_FIELD_OFF() has semantic similar to + * SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF() but for store operation. + * + * It doesn't support SIZE argument though since narrow stores are not + * supported for now. + * + * In addition it uses Temporary Field TF (member of struct S) as the 3rd + * "register" since two registers available in convert_ctx_access are not + * enough: we can't override neither SRC, since it contains value to store, nor + * DST since it contains pointer to context that may be used by later + * instructions. But we need a temporary place to save pointer to nested + * structure whose field we want to store to. + */ +#define SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, TF) \ + do { \ + int tmp_reg = BPF_REG_9; \ + if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \ + --tmp_reg; \ + if (si->src_reg == tmp_reg || si->dst_reg == tmp_reg) \ + --tmp_reg; \ + *insn++ = BPF_STX_MEM(BPF_DW, si->dst_reg, tmp_reg, \ + offsetof(S, TF)); \ + *insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(S, F), tmp_reg, \ + si->dst_reg, offsetof(S, F)); \ + *insn++ = BPF_STX_MEM( \ + BPF_FIELD_SIZEOF(NS, NF), tmp_reg, si->src_reg, \ + bpf_target_off(NS, NF, FIELD_SIZEOF(NS, NF), \ + target_size) \ + + OFF); \ + *insn++ = BPF_LDX_MEM(BPF_DW, tmp_reg, si->dst_reg, \ + offsetof(S, TF)); \ + } while (0) + +#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(S, NS, F, NF, SIZE, OFF, \ + TF) \ + do { \ + if (type == BPF_WRITE) { \ + SOCK_ADDR_STORE_NESTED_FIELD_OFF(S, NS, F, NF, OFF, \ + TF); \ + } else { \ + SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( \ + S, NS, F, NF, SIZE, OFF); \ + } \ + } while (0) + +#define SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(S, NS, F, NF, TF) \ + SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( \ + S, NS, F, NF, BPF_FIELD_SIZEOF(NS, NF), 0, TF) + +static u32 sock_addr_convert_ctx_access(enum bpf_access_type type, + const struct bpf_insn *si, + struct bpf_insn *insn_buf, + struct bpf_prog *prog, u32 *target_size) +{ + struct bpf_insn *insn = insn_buf; + int off; + + switch (si->off) { + case offsetof(struct bpf_sock_addr, user_family): + SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, + struct sockaddr, uaddr, sa_family); + break; + + case offsetof(struct bpf_sock_addr, user_ip4): + SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( + struct bpf_sock_addr_kern, struct sockaddr_in, uaddr, + sin_addr, BPF_SIZE(si->code), 0, tmp_reg); + break; + + case bpf_ctx_range_till(struct bpf_sock_addr, user_ip6[0], user_ip6[3]): + off = si->off; + off -= offsetof(struct bpf_sock_addr, user_ip6[0]); + SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF( + struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr, + sin6_addr.s6_addr32[0], BPF_SIZE(si->code), off, + tmp_reg); + break; + + case offsetof(struct bpf_sock_addr, user_port): + /* To get port we need to know sa_family first and then treat + * sockaddr as either sockaddr_in or sockaddr_in6. + * Though we can simplify since port field has same offset and + * size in both structures. + * Here we check this invariant and use just one of the + * structures if it's true. + */ + BUILD_BUG_ON(offsetof(struct sockaddr_in, sin_port) != + offsetof(struct sockaddr_in6, sin6_port)); + BUILD_BUG_ON(FIELD_SIZEOF(struct sockaddr_in, sin_port) != + FIELD_SIZEOF(struct sockaddr_in6, sin6_port)); + SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern, + struct sockaddr_in6, uaddr, + sin6_port, tmp_reg); + break; + + case offsetof(struct bpf_sock_addr, family): + SOCK_ADDR_LOAD_NESTED_FIELD(struct bpf_sock_addr_kern, + struct sock, sk, sk_family); + break; + + case offsetof(struct bpf_sock_addr, type): + SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( + struct bpf_sock_addr_kern, struct sock, sk, + __sk_flags_offset, BPF_W, 0); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_TYPE_MASK); + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, SK_FL_TYPE_SHIFT); + break; + + case offsetof(struct bpf_sock_addr, protocol): + SOCK_ADDR_LOAD_NESTED_FIELD_SIZE_OFF( + struct bpf_sock_addr_kern, struct sock, sk, + __sk_flags_offset, BPF_W, 0); + *insn++ = BPF_ALU32_IMM(BPF_AND, si->dst_reg, SK_FL_PROTO_MASK); + *insn++ = BPF_ALU32_IMM(BPF_RSH, si->dst_reg, + SK_FL_PROTO_SHIFT); + break; + } + + return insn - insn_buf; +} + static u32 sock_ops_convert_ctx_access(enum bpf_access_type type, const struct bpf_insn *si, struct bpf_insn *insn_buf, @@ -5170,6 +5549,15 @@ const struct bpf_verifier_ops cg_sock_verifier_ops = { const struct bpf_prog_ops cg_sock_prog_ops = { }; +const struct bpf_verifier_ops cg_sock_addr_verifier_ops = { + .get_func_proto = sock_addr_func_proto, + .is_valid_access = sock_addr_is_valid_access, + .convert_ctx_access = sock_addr_convert_ctx_access, +}; + +const struct bpf_prog_ops cg_sock_addr_prog_ops = { +}; + const struct bpf_verifier_ops sock_ops_verifier_ops = { .get_func_proto = sock_ops_func_proto, .is_valid_access = sock_ops_is_valid_access, diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c index e8c7fad8c329..142d4c35b493 100644 --- a/net/ipv4/af_inet.c +++ b/net/ipv4/af_inet.c @@ -432,23 +432,37 @@ EXPORT_SYMBOL(inet_release); int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; struct sock *sk = sock->sk; - struct inet_sock *inet = inet_sk(sk); - struct net *net = sock_net(sk); - unsigned short snum; - int chk_addr_ret; - u32 tb_id = RT_TABLE_LOCAL; int err; /* If the socket has its own bind function then use it. (RAW) */ if (sk->sk_prot->bind) { - err = sk->sk_prot->bind(sk, uaddr, addr_len); - goto out; + return sk->sk_prot->bind(sk, uaddr, addr_len); } - err = -EINVAL; if (addr_len < sizeof(struct sockaddr_in)) - goto out; + return -EINVAL; + + /* BPF prog is run before any checks are done so that if the prog + * changes context in a wrong way it will be caught. + */ + err = BPF_CGROUP_RUN_PROG_INET4_BIND(sk, uaddr); + if (err) + return err; + + return __inet_bind(sk, uaddr, addr_len, false, true); +} +EXPORT_SYMBOL(inet_bind); + +int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock) +{ + struct sockaddr_in *addr = (struct sockaddr_in *)uaddr; + struct inet_sock *inet = inet_sk(sk); + struct net *net = sock_net(sk); + unsigned short snum; + int chk_addr_ret; + u32 tb_id = RT_TABLE_LOCAL; + int err; if (addr->sin_family != AF_INET) { /* Compatibility games : accept AF_UNSPEC (mapped to AF_INET) @@ -492,7 +506,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) * would be illegal to use them (multicast/broadcast) in * which case the sending device address is used. */ - lock_sock(sk); + if (with_lock) + lock_sock(sk); /* Check these errors (active socket, double bind). */ err = -EINVAL; @@ -504,11 +519,18 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->inet_saddr = 0; /* Use device */ /* Make sure we are allowed to bind here. */ - if ((snum || !inet->bind_address_no_port) && - sk->sk_prot->get_port(sk, snum)) { - inet->inet_saddr = inet->inet_rcv_saddr = 0; - err = -EADDRINUSE; - goto out_release_sock; + if (snum || !(inet->bind_address_no_port || + force_bind_address_no_port)) { + if (sk->sk_prot->get_port(sk, snum)) { + inet->inet_saddr = inet->inet_rcv_saddr = 0; + err = -EADDRINUSE; + goto out_release_sock; + } + err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk); + if (err) { + inet->inet_saddr = inet->inet_rcv_saddr = 0; + goto out_release_sock; + } } if (inet->inet_rcv_saddr) @@ -521,22 +543,29 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) sk_dst_reset(sk); err = 0; out_release_sock: - release_sock(sk); + if (with_lock) + release_sock(sk); out: return err; } -EXPORT_SYMBOL(inet_bind); int inet_dgram_connect(struct socket *sock, struct sockaddr *uaddr, int addr_len, int flags) { struct sock *sk = sock->sk; + int err; if (addr_len < sizeof(uaddr->sa_family)) return -EINVAL; if (uaddr->sa_family == AF_UNSPEC) return sk->sk_prot->disconnect(sk, flags); + if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { + err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); + if (err) + return err; + } + if (!inet_sk(sk)->inet_num && inet_autobind(sk)) return -EAGAIN; return sk->sk_prot->connect(sk, uaddr, addr_len); @@ -617,6 +646,12 @@ int __inet_stream_connect(struct socket *sock, struct sockaddr *uaddr, if (sk->sk_state != TCP_CLOSE) goto out; + if (BPF_CGROUP_PRE_CONNECT_ENABLED(sk)) { + err = sk->sk_prot->pre_connect(sk, uaddr, addr_len); + if (err) + goto out; + } + err = sk->sk_prot->connect(sk, uaddr, addr_len); if (err < 0) goto out; diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c index 2c6aec2643e8..3c11d992d784 100644 --- a/net/ipv4/tcp_ipv4.c +++ b/net/ipv4/tcp_ipv4.c @@ -140,6 +140,21 @@ int tcp_twsk_unique(struct sock *sk, struct sock *sktw, void *twp) } EXPORT_SYMBOL_GPL(tcp_twsk_unique); +static int tcp_v4_pre_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + /* This check is replicated from tcp_v4_connect() and intended to + * prevent BPF program called below from accessing bytes that are out + * of the bound specified by user in addr_len. + */ + if (addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + sock_owned_by_me(sk); + + return BPF_CGROUP_RUN_PROG_INET4_CONNECT(sk, uaddr); +} + /* This will initiate an outgoing connection. */ int tcp_v4_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -2409,6 +2424,7 @@ struct proto tcp_prot = { .name = "TCP", .owner = THIS_MODULE, .close = tcp_close, + .pre_connect = tcp_v4_pre_connect, .connect = tcp_v4_connect, .disconnect = tcp_disconnect, .accept = inet_csk_accept, diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c index 908fc02fb4f8..9c6c77fec963 100644 --- a/net/ipv4/udp.c +++ b/net/ipv4/udp.c @@ -1658,6 +1658,19 @@ csum_copy_err: goto try_again; } +int udp_pre_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) +{ + /* This check is replicated from __ip4_datagram_connect() and + * intended to prevent BPF program called below from accessing bytes + * that are out of the bound specified by user in addr_len. + */ + if (addr_len < sizeof(struct sockaddr_in)) + return -EINVAL; + + return BPF_CGROUP_RUN_PROG_INET4_CONNECT_LOCK(sk, uaddr); +} +EXPORT_SYMBOL(udp_pre_connect); + int __udp_disconnect(struct sock *sk, int flags) { struct inet_sock *inet = inet_sk(sk); @@ -2530,6 +2543,7 @@ struct proto udp_prot = { .name = "UDP", .owner = THIS_MODULE, .close = udp_lib_close, + .pre_connect = udp_pre_connect, .connect = ip4_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c index dbbe04018813..41f50472679d 100644 --- a/net/ipv6/af_inet6.c +++ b/net/ipv6/af_inet6.c @@ -277,15 +277,7 @@ out_rcu_unlock: /* bind for INET6 API */ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) { - struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; struct sock *sk = sock->sk; - struct inet_sock *inet = inet_sk(sk); - struct ipv6_pinfo *np = inet6_sk(sk); - struct net *net = sock_net(sk); - __be32 v4addr = 0; - unsigned short snum; - bool saved_ipv6only; - int addr_type = 0; int err = 0; /* If the socket has its own bind function then use it. */ @@ -295,11 +287,35 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) if (addr_len < SIN6_LEN_RFC2133) return -EINVAL; + /* BPF prog is run before any checks are done so that if the prog + * changes context in a wrong way it will be caught. + */ + err = BPF_CGROUP_RUN_PROG_INET6_BIND(sk, uaddr); + if (err) + return err; + + return __inet6_bind(sk, uaddr, addr_len, false, true); +} +EXPORT_SYMBOL(inet6_bind); + +int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len, + bool force_bind_address_no_port, bool with_lock) +{ + struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr; + struct inet_sock *inet = inet_sk(sk); + struct ipv6_pinfo *np = inet6_sk(sk); + struct net *net = sock_net(sk); + __be32 v4addr = 0; + unsigned short snum; + bool saved_ipv6only; + int addr_type = 0; + int err = 0; + if (addr->sin6_family != AF_INET6) return -EAFNOSUPPORT; addr_type = ipv6_addr_type(&addr->sin6_addr); - if ((addr_type & IPV6_ADDR_MULTICAST) && sock->type == SOCK_STREAM) + if ((addr_type & IPV6_ADDR_MULTICAST) && sk->sk_type == SOCK_STREAM) return -EINVAL; snum = ntohs(addr->sin6_port); @@ -307,7 +323,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE)) return -EACCES; - lock_sock(sk); + if (with_lock) + lock_sock(sk); /* Check these errors (active socket, double bind). */ if (sk->sk_state != TCP_CLOSE || inet->inet_num) { @@ -395,12 +412,20 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) sk->sk_ipv6only = 1; /* Make sure we are allowed to bind here. */ - if ((snum || !inet->bind_address_no_port) && - sk->sk_prot->get_port(sk, snum)) { - sk->sk_ipv6only = saved_ipv6only; - inet_reset_saddr(sk); - err = -EADDRINUSE; - goto out; + if (snum || !(inet->bind_address_no_port || + force_bind_address_no_port)) { + if (sk->sk_prot->get_port(sk, snum)) { + sk->sk_ipv6only = saved_ipv6only; + inet_reset_saddr(sk); + err = -EADDRINUSE; + goto out; + } + err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk); + if (err) { + sk->sk_ipv6only = saved_ipv6only; + inet_reset_saddr(sk); + goto out; + } } if (addr_type != IPV6_ADDR_ANY) @@ -411,13 +436,13 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len) inet->inet_dport = 0; inet->inet_daddr = 0; out: - release_sock(sk); + if (with_lock) + release_sock(sk); return err; out_unlock: rcu_read_unlock(); goto out; } -EXPORT_SYMBOL(inet6_bind); int inet6_release(struct socket *sock) { @@ -869,6 +894,10 @@ static const struct ipv6_stub ipv6_stub_impl = { .nd_tbl = &nd_tbl, }; +static const struct ipv6_bpf_stub ipv6_bpf_stub_impl = { + .inet6_bind = __inet6_bind, +}; + static int __init inet6_init(void) { struct list_head *r; @@ -1025,6 +1054,7 @@ static int __init inet6_init(void) /* ensure that ipv6 stubs are visible only after ipv6 is ready */ wmb(); ipv6_stub = &ipv6_stub_impl; + ipv6_bpf_stub = &ipv6_bpf_stub_impl; out: return err; diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c index 5425d7b100ee..6469b741cf5a 100644 --- a/net/ipv6/tcp_ipv6.c +++ b/net/ipv6/tcp_ipv6.c @@ -117,6 +117,21 @@ static u32 tcp_v6_init_ts_off(const struct net *net, const struct sk_buff *skb) ipv6_hdr(skb)->saddr.s6_addr32); } +static int tcp_v6_pre_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + /* This check is replicated from tcp_v6_connect() and intended to + * prevent BPF program called below from accessing bytes that are out + * of the bound specified by user in addr_len. + */ + if (addr_len < SIN6_LEN_RFC2133) + return -EINVAL; + + sock_owned_by_me(sk); + + return BPF_CGROUP_RUN_PROG_INET6_CONNECT(sk, uaddr); +} + static int tcp_v6_connect(struct sock *sk, struct sockaddr *uaddr, int addr_len) { @@ -1925,6 +1940,7 @@ struct proto tcpv6_prot = { .name = "TCPv6", .owner = THIS_MODULE, .close = tcp_close, + .pre_connect = tcp_v6_pre_connect, .connect = tcp_v6_connect, .disconnect = tcp_disconnect, .accept = inet_csk_accept, diff --git a/net/ipv6/udp.c b/net/ipv6/udp.c index ad30f5e31969..6861ed479469 100644 --- a/net/ipv6/udp.c +++ b/net/ipv6/udp.c @@ -957,6 +957,25 @@ static void udp_v6_flush_pending_frames(struct sock *sk) } } +static int udpv6_pre_connect(struct sock *sk, struct sockaddr *uaddr, + int addr_len) +{ + /* The following checks are replicated from __ip6_datagram_connect() + * and intended to prevent BPF program called below from accessing + * bytes that are out of the bound specified by user in addr_len. + */ + if (uaddr->sa_family == AF_INET) { + if (__ipv6_only_sock(sk)) + return -EAFNOSUPPORT; + return udp_pre_connect(sk, uaddr, addr_len); + } + + if (addr_len < SIN6_LEN_RFC2133) + return -EINVAL; + + return BPF_CGROUP_RUN_PROG_INET6_CONNECT_LOCK(sk, uaddr); +} + /** * udp6_hwcsum_outgoing - handle outgoing HW checksumming * @sk: socket we are sending on @@ -1512,6 +1531,7 @@ struct proto udpv6_prot = { .name = "UDPv6", .owner = THIS_MODULE, .close = udp_lib_close, + .pre_connect = udpv6_pre_connect, .connect = ip6_datagram_connect, .disconnect = udp_disconnect, .ioctl = udp_ioctl, |