From 0456ea170cd665ddbb9503be92e39f96055dd5fa Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Mon, 20 Apr 2020 10:46:10 -0700 Subject: bpf: Enable more helpers for BPF_PROG_TYPE_CGROUP_{DEVICE,SYSCTL,SOCKOPT} Currently the following prog types don't fall back to bpf_base_func_proto() (instead they have cgroup_base_func_proto which has a limited set of helpers from bpf_base_func_proto): * BPF_PROG_TYPE_CGROUP_DEVICE * BPF_PROG_TYPE_CGROUP_SYSCTL * BPF_PROG_TYPE_CGROUP_SOCKOPT I don't see any specific reason why we shouldn't use bpf_base_func_proto(), every other type of program (except bpf-lirc and, understandably, tracing) use it, so let's fall back to bpf_base_func_proto for those prog types as well. This basically boils down to adding access to the following helpers: * BPF_FUNC_get_prandom_u32 * BPF_FUNC_get_smp_processor_id * BPF_FUNC_get_numa_node_id * BPF_FUNC_tail_call * BPF_FUNC_ktime_get_ns * BPF_FUNC_spin_lock (CAP_SYS_ADMIN) * BPF_FUNC_spin_unlock (CAP_SYS_ADMIN) * BPF_FUNC_jiffies64 (CAP_SYS_ADMIN) I've also added bpf_perf_event_output() because it's really handy for logging and debugging. Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200420174610.77494-1-sdf@google.com --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index fd2b2322412d..25da6ff2a880 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1523,6 +1523,7 @@ extern const struct bpf_func_proto bpf_strtoul_proto; extern const struct bpf_func_proto bpf_tcp_sock_proto; extern const struct bpf_func_proto bpf_jiffies64_proto; extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto; +extern const struct bpf_func_proto bpf_event_output_data_proto; const struct bpf_func_proto *bpf_tracing_func_proto( enum bpf_func_id func_id, const struct bpf_prog *prog); -- cgit v1.2.3 From 6890896bd765b0504761c61901c9804fca23bfb2 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Fri, 24 Apr 2020 16:59:41 -0700 Subject: bpf: Fix missing bpf_base_func_proto in cgroup_base_func_proto for CGROUP_NET=n linux-next build bot reported compile issue [1] with one of its configs. It looks like when we have CONFIG_NET=n and CONFIG_BPF{,_SYSCALL}=y, we are missing the bpf_base_func_proto definition (from net/core/filter.c) in cgroup_base_func_proto. I'm reshuffling the code a bit to make it work. The common helpers are moved into kernel/bpf/helpers.c and the bpf_base_func_proto is exported from there. Also, bpf_get_raw_cpu_id goes into kernel/bpf/core.c akin to existing bpf_user_rnd_u32. [1] https://lore.kernel.org/linux-next/CAKH8qBsBvKHswiX1nx40LgO+BGeTmb1NX8tiTttt_0uu6T3dCA@mail.gmail.com/T/#mff8b0c083314c68c2e2ef0211cb11bc20dc13c72 Fixes: 0456ea170cd6 ("bpf: Enable more helpers for BPF_PROG_TYPE_CGROUP_{DEVICE,SYSCTL,SOCKOPT}") Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Cc: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20200424235941.58382-1-sdf@google.com --- include/linux/bpf.h | 8 ++++++ include/linux/filter.h | 2 -- kernel/bpf/core.c | 5 ++++ kernel/bpf/helpers.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++ net/core/filter.c | 78 +------------------------------------------------- 5 files changed, 87 insertions(+), 79 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 25da6ff2a880..5147e11e53ff 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1215,6 +1215,7 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog, struct bpf_prog *bpf_prog_by_id(u32 id); +const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id); #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -1365,6 +1366,12 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id) { return ERR_PTR(-ENOTSUPP); } + +static inline const struct bpf_func_proto * +bpf_base_func_proto(enum bpf_func_id func_id) +{ + return NULL; +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, @@ -1531,6 +1538,7 @@ const struct bpf_func_proto *bpf_tracing_func_proto( /* Shared helpers among cBPF and eBPF. */ void bpf_user_rnd_init_once(void); u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); +u64 bpf_get_raw_cpu_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); #if defined(CONFIG_NET) bool bpf_sock_common_is_valid_access(int off, int size, diff --git a/include/linux/filter.h b/include/linux/filter.h index 9b5aa5c483cc..af37318bb1c5 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -863,8 +863,6 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, bpf_aux_classic_check_t trans, bool save_orig); void bpf_prog_destroy(struct bpf_prog *fp); -const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_attach_bpf(u32 ufd, struct sock *sk); diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 916f5132a984..0cc91805069a 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2136,6 +2136,11 @@ BPF_CALL_0(bpf_user_rnd_u32) return res; } +BPF_CALL_0(bpf_get_raw_cpu_id) +{ + return raw_smp_processor_id(); +} + /* Weak definitions of helper functions in case we don't have bpf syscall. */ const struct bpf_func_proto bpf_map_lookup_elem_proto __weak; const struct bpf_func_proto bpf_map_update_elem_proto __weak; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index bafc53ddd350..dbba4f41d508 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -562,3 +562,76 @@ const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = { .arg3_type = ARG_PTR_TO_UNINIT_MEM, .arg4_type = ARG_CONST_SIZE, }; + +static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { + .func = bpf_get_raw_cpu_id, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + +BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, + u64, flags, void *, data, u64, size) +{ + if (unlikely(flags & ~(BPF_F_INDEX_MASK))) + return -EINVAL; + + return bpf_event_output(map, flags, data, size, NULL, 0, NULL); +} + +const struct bpf_func_proto bpf_event_output_data_proto = { + .func = bpf_event_output_data, + .gpl_only = true, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_CONST_MAP_PTR, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE_OR_ZERO, +}; + +const struct bpf_func_proto * +bpf_base_func_proto(enum bpf_func_id func_id) +{ + switch (func_id) { + case BPF_FUNC_map_lookup_elem: + return &bpf_map_lookup_elem_proto; + case BPF_FUNC_map_update_elem: + return &bpf_map_update_elem_proto; + case BPF_FUNC_map_delete_elem: + return &bpf_map_delete_elem_proto; + case BPF_FUNC_map_push_elem: + return &bpf_map_push_elem_proto; + case BPF_FUNC_map_pop_elem: + return &bpf_map_pop_elem_proto; + case BPF_FUNC_map_peek_elem: + return &bpf_map_peek_elem_proto; + case BPF_FUNC_get_prandom_u32: + return &bpf_get_prandom_u32_proto; + case BPF_FUNC_get_smp_processor_id: + return &bpf_get_raw_smp_processor_id_proto; + case BPF_FUNC_get_numa_node_id: + return &bpf_get_numa_node_id_proto; + case BPF_FUNC_tail_call: + return &bpf_tail_call_proto; + case BPF_FUNC_ktime_get_ns: + return &bpf_ktime_get_ns_proto; + default: + break; + } + + if (!capable(CAP_SYS_ADMIN)) + return NULL; + + switch (func_id) { + case BPF_FUNC_spin_lock: + return &bpf_spin_lock_proto; + case BPF_FUNC_spin_unlock: + return &bpf_spin_unlock_proto; + case BPF_FUNC_trace_printk: + return bpf_get_trace_printk_proto(); + case BPF_FUNC_jiffies64: + return &bpf_jiffies64_proto; + default: + return NULL; + } +} diff --git a/net/core/filter.c b/net/core/filter.c index a943df3ad8b0..a605626142b6 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -256,17 +256,6 @@ BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb, offset); } -BPF_CALL_0(bpf_get_raw_cpu_id) -{ - return raw_smp_processor_id(); -} - -static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = { - .func = bpf_get_raw_cpu_id, - .gpl_only = false, - .ret_type = RET_INTEGER, -}; - static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg, struct bpf_insn *insn_buf) { @@ -4205,26 +4194,6 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, u64, flags, - void *, data, u64, size) -{ - if (unlikely(flags & ~(BPF_F_INDEX_MASK))) - return -EINVAL; - - return bpf_event_output(map, flags, data, size, NULL, 0, NULL); -} - -const struct bpf_func_proto bpf_event_output_data_proto = { - .func = bpf_event_output_data, - .gpl_only = true, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_CONST_MAP_PTR, - .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE_OR_ZERO, -}; - BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, int, level, int, optname, char *, optval, int, optlen) { @@ -5983,52 +5952,7 @@ bool bpf_helper_changes_pkt_data(void *func) return false; } -const struct bpf_func_proto * -bpf_base_func_proto(enum bpf_func_id func_id) -{ - switch (func_id) { - case BPF_FUNC_map_lookup_elem: - return &bpf_map_lookup_elem_proto; - case BPF_FUNC_map_update_elem: - return &bpf_map_update_elem_proto; - case BPF_FUNC_map_delete_elem: - return &bpf_map_delete_elem_proto; - case BPF_FUNC_map_push_elem: - return &bpf_map_push_elem_proto; - case BPF_FUNC_map_pop_elem: - return &bpf_map_pop_elem_proto; - case BPF_FUNC_map_peek_elem: - return &bpf_map_peek_elem_proto; - case BPF_FUNC_get_prandom_u32: - return &bpf_get_prandom_u32_proto; - case BPF_FUNC_get_smp_processor_id: - return &bpf_get_raw_smp_processor_id_proto; - case BPF_FUNC_get_numa_node_id: - return &bpf_get_numa_node_id_proto; - case BPF_FUNC_tail_call: - return &bpf_tail_call_proto; - case BPF_FUNC_ktime_get_ns: - return &bpf_ktime_get_ns_proto; - default: - break; - } - - if (!capable(CAP_SYS_ADMIN)) - return NULL; - - switch (func_id) { - case BPF_FUNC_spin_lock: - return &bpf_spin_lock_proto; - case BPF_FUNC_spin_unlock: - return &bpf_spin_unlock_proto; - case BPF_FUNC_trace_printk: - return bpf_get_trace_printk_proto(); - case BPF_FUNC_jiffies64: - return &bpf_jiffies64_proto; - default: - return NULL; - } -} +const struct bpf_func_proto bpf_event_output_data_proto __weak; static const struct bpf_func_proto * sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) -- cgit v1.2.3 From 71d19214776e61b33da48f7c1b46e522c7f78221 Mon Sep 17 00:00:00 2001 From: Maciej Żenczykowski Date: Sun, 26 Apr 2020 09:15:25 -0700 Subject: bpf: add bpf_ktime_get_boot_ns() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit On a device like a cellphone which is constantly suspending and resuming CLOCK_MONOTONIC is not particularly useful for keeping track of or reacting to external network events. Instead you want to use CLOCK_BOOTTIME. Hence add bpf_ktime_get_boot_ns() as a mirror of bpf_ktime_get_ns() based around CLOCK_BOOTTIME instead of CLOCK_MONOTONIC. Signed-off-by: Maciej Żenczykowski Signed-off-by: Alexei Starovoitov --- drivers/media/rc/bpf-lirc.c | 2 ++ include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 13 ++++++++++++- kernel/bpf/core.c | 1 + kernel/bpf/helpers.c | 14 ++++++++++++++ kernel/trace/bpf_trace.c | 2 ++ tools/include/uapi/linux/bpf.h | 13 ++++++++++++- 7 files changed, 44 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c index 0f3417d161b8..069c42f22a8c 100644 --- a/drivers/media/rc/bpf-lirc.c +++ b/drivers/media/rc/bpf-lirc.c @@ -103,6 +103,8 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_peek_elem_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + case BPF_FUNC_ktime_get_boot_ns: + return &bpf_ktime_get_boot_ns_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_get_prandom_u32: diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5147e11e53ff..10960cfabea4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1509,6 +1509,7 @@ extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_get_numa_node_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; extern const struct bpf_func_proto bpf_ktime_get_ns_proto; +extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto; extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto; extern const struct bpf_func_proto bpf_get_current_uid_gid_proto; extern const struct bpf_func_proto bpf_get_current_comm_proto; diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7bbf1b65be10..4a6c47f3febe 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -652,6 +652,8 @@ union bpf_attr { * u64 bpf_ktime_get_ns(void) * Description * Return the time elapsed since system boot, in nanoseconds. + * Does not include time the system was suspended. + * See: clock_gettime(CLOCK_MONOTONIC) * Return * Current *ktime*. * @@ -3025,6 +3027,14 @@ union bpf_attr { * * **-EOPNOTSUPP** Unsupported operation, for example a * call from outside of TC ingress. * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport). + * + * u64 bpf_ktime_get_boot_ns(void) + * Description + * Return the time elapsed since system boot, in nanoseconds. + * Does include the time the system was suspended. + * See: clock_gettime(CLOCK_BOOTTIME) + * Return + * Current *ktime*. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3151,7 +3161,8 @@ union bpf_attr { FN(xdp_output), \ FN(get_netns_cookie), \ FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), + FN(sk_assign), \ + FN(ktime_get_boot_ns), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c index 0cc91805069a..6aa11de67315 100644 --- a/kernel/bpf/core.c +++ b/kernel/bpf/core.c @@ -2156,6 +2156,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto __weak; const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak; const struct bpf_func_proto bpf_get_numa_node_id_proto __weak; const struct bpf_func_proto bpf_ktime_get_ns_proto __weak; +const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak; const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak; const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak; diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 9a6b23387d02..5c0290e0696e 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -155,6 +155,18 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = { .ret_type = RET_INTEGER, }; +BPF_CALL_0(bpf_ktime_get_boot_ns) +{ + /* NMI safe access to clock boottime */ + return ktime_get_boot_fast_ns(); +} + +const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = { + .func = bpf_ktime_get_boot_ns, + .gpl_only = false, + .ret_type = RET_INTEGER, +}; + BPF_CALL_0(bpf_get_current_pid_tgid) { struct task_struct *task = current; @@ -615,6 +627,8 @@ bpf_base_func_proto(enum bpf_func_id func_id) return &bpf_tail_call_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + case BPF_FUNC_ktime_get_boot_ns: + return &bpf_ktime_get_boot_ns_proto; default: break; } diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index ca1796747a77..e875c95d3ced 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -797,6 +797,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_map_peek_elem_proto; case BPF_FUNC_ktime_get_ns: return &bpf_ktime_get_ns_proto; + case BPF_FUNC_ktime_get_boot_ns: + return &bpf_ktime_get_boot_ns_proto; case BPF_FUNC_tail_call: return &bpf_tail_call_proto; case BPF_FUNC_get_current_pid_tgid: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7bbf1b65be10..4a6c47f3febe 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -652,6 +652,8 @@ union bpf_attr { * u64 bpf_ktime_get_ns(void) * Description * Return the time elapsed since system boot, in nanoseconds. + * Does not include time the system was suspended. + * See: clock_gettime(CLOCK_MONOTONIC) * Return * Current *ktime*. * @@ -3025,6 +3027,14 @@ union bpf_attr { * * **-EOPNOTSUPP** Unsupported operation, for example a * call from outside of TC ingress. * * **-ESOCKTNOSUPPORT** Socket type not supported (reuseport). + * + * u64 bpf_ktime_get_boot_ns(void) + * Description + * Return the time elapsed since system boot, in nanoseconds. + * Does include the time the system was suspended. + * See: clock_gettime(CLOCK_BOOTTIME) + * Return + * Current *ktime*. */ #define __BPF_FUNC_MAPPER(FN) \ FN(unspec), \ @@ -3151,7 +3161,8 @@ union bpf_attr { FN(xdp_output), \ FN(get_netns_cookie), \ FN(get_current_ancestor_cgroup_id), \ - FN(sk_assign), + FN(sk_assign), \ + FN(ktime_get_boot_ns), /* integer value in 'imm' field of BPF_CALL instruction selects which helper * function eBPF program intends to call -- cgit v1.2.3 From 26363af5643490a817272e1cc6f1d3f1d550a699 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2020 08:43:35 +0200 Subject: mm: remove watermark_boost_factor_sysctl_handler watermark_boost_factor_sysctl_handler is just a pointless wrapper for proc_dointvec_minmax, so remove it and use proc_dointvec_minmax directly. Signed-off-by: Christoph Hellwig Acked-by: David Rientjes Signed-off-by: Al Viro --- include/linux/mmzone.h | 2 -- kernel/sysctl.c | 2 +- mm/page_alloc.c | 12 ------------ 3 files changed, 1 insertion(+), 15 deletions(-) (limited to 'include') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1b9de7d220fb..f37bb8f187fc 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -911,8 +911,6 @@ static inline int is_highmem(struct zone *zone) struct ctl_table; int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int watermark_boost_factor_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 8a176d8727a3..99d27acf4646 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -1491,7 +1491,7 @@ static struct ctl_table vm_table[] = { .data = &watermark_boost_factor, .maxlen = sizeof(watermark_boost_factor), .mode = 0644, - .proc_handler = watermark_boost_factor_sysctl_handler, + .proc_handler = proc_dointvec_minmax, .extra1 = SYSCTL_ZERO, }, { diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 69827d4fa052..62c1550cd43e 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -7978,18 +7978,6 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, return 0; } -int watermark_boost_factor_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) -{ - int rc; - - rc = proc_dointvec_minmax(table, write, buffer, length, ppos); - if (rc) - return rc; - - return 0; -} - int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos) { -- cgit v1.2.3 From 2374c09b1c8a883bb9b4b2fc3756703eeb618f4a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2020 08:43:36 +0200 Subject: sysctl: remove all extern declaration from sysctl.c Extern declarations in .c files are a bad style and can lead to mismatches. Use existing definitions in headers where they exist, and otherwise move the external declarations to suitable header files. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/coredump.h | 4 ++++ include/linux/file.h | 2 ++ include/linux/mm.h | 2 ++ include/linux/mmzone.h | 2 ++ include/linux/pid.h | 3 +++ include/linux/sysctl.h | 8 ++++++++ kernel/sysctl.c | 45 +++------------------------------------------ 7 files changed, 24 insertions(+), 42 deletions(-) (limited to 'include') diff --git a/include/linux/coredump.h b/include/linux/coredump.h index abf4b4e65dbb..7a899e83835d 100644 --- a/include/linux/coredump.h +++ b/include/linux/coredump.h @@ -22,4 +22,8 @@ extern void do_coredump(const kernel_siginfo_t *siginfo); static inline void do_coredump(const kernel_siginfo_t *siginfo) {} #endif +extern int core_uses_pid; +extern char core_pattern[]; +extern unsigned int core_pipe_limit; + #endif /* _LINUX_COREDUMP_H */ diff --git a/include/linux/file.h b/include/linux/file.h index 142d102f285e..122f80084a3e 100644 --- a/include/linux/file.h +++ b/include/linux/file.h @@ -94,4 +94,6 @@ extern void fd_install(unsigned int fd, struct file *file); extern void flush_delayed_fput(void); extern void __fput_sync(struct file *); +extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max; + #endif /* __LINUX_FILE_H */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 5a323422d783..9c4e7e76dedd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -3140,5 +3140,7 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping, pgoff_t first_index, pgoff_t nr); #endif +extern int sysctl_nr_trim_pages; + #endif /* __KERNEL__ */ #endif /* _LINUX_MM_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index f37bb8f187fc..b2af594ef0f7 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -909,6 +909,7 @@ static inline int is_highmem(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; + int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, @@ -925,6 +926,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, extern int numa_zonelist_order_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); +extern int percpu_pagelist_fraction; extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 diff --git a/include/linux/pid.h b/include/linux/pid.h index cc896f0fc4e3..93543cbc0e6b 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -108,6 +108,9 @@ extern void transfer_pid(struct task_struct *old, struct task_struct *new, struct pid_namespace; extern struct pid_namespace init_pid_ns; +extern int pid_max; +extern int pid_max_min, pid_max_max; + /* * look up a PID in the hash table. Must be called with the tasklist_lock * or rcu_read_lock() held. diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 02fa84493f23..36143ca40b56 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -207,7 +207,15 @@ void unregister_sysctl_table(struct ctl_table_header * table); extern int sysctl_init(void); +extern int pwrsw_enabled; +extern int unaligned_enabled; +extern int unaligned_dump_stack; +extern int no_unaligned_warning; + extern struct ctl_table sysctl_mount_point[]; +extern struct ctl_table random_table[]; +extern struct ctl_table firmware_config_table[]; +extern struct ctl_table epoll_table[]; #else /* CONFIG_SYSCTL */ static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 99d27acf4646..31b934865ebc 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -68,6 +68,9 @@ #include #include #include +#include +#include +#include #include "../lib/kstrtox.h" @@ -103,22 +106,6 @@ #if defined(CONFIG_SYSCTL) -/* External variables not in a header file. */ -extern int suid_dumpable; -#ifdef CONFIG_COREDUMP -extern int core_uses_pid; -extern char core_pattern[]; -extern unsigned int core_pipe_limit; -#endif -extern int pid_max; -extern int pid_max_min, pid_max_max; -extern int percpu_pagelist_fraction; -extern int latencytop_enabled; -extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max; -#ifndef CONFIG_MMU -extern int sysctl_nr_trim_pages; -#endif - /* Constants used for minimum and maximum */ #ifdef CONFIG_LOCKUP_DETECTOR static int sixty = 60; @@ -160,24 +147,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ); #ifdef CONFIG_INOTIFY_USER #include #endif -#ifdef CONFIG_SPARC -#endif - -#ifdef CONFIG_PARISC -extern int pwrsw_enabled; -#endif - -#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW -extern int unaligned_enabled; -#endif - -#ifdef CONFIG_IA64 -extern int unaligned_dump_stack; -#endif - -#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN -extern int no_unaligned_warning; -#endif #ifdef CONFIG_PROC_SYSCTL @@ -243,14 +212,6 @@ static struct ctl_table vm_table[]; static struct ctl_table fs_table[]; static struct ctl_table debug_table[]; static struct ctl_table dev_table[]; -extern struct ctl_table random_table[]; -#ifdef CONFIG_EPOLL -extern struct ctl_table epoll_table[]; -#endif - -#ifdef CONFIG_FW_LOADER_USER_HELPER -extern struct ctl_table firmware_config_table[]; -#endif #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \ defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT) -- cgit v1.2.3 From 32927393dc1ccd60fb2bdc05b9e8e88753761469 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 24 Apr 2020 08:43:38 +0200 Subject: sysctl: pass kernel pointers to ->proc_handler Instead of having all the sysctl handlers deal with user pointers, which is rather hairy in terms of the BPF interaction, copy the input to and from userspace in common code. This also means that the strings are always NUL-terminated by the common code, making the API a little bit safer. As most handler just pass through the data to one of the common handlers a lot of the changes are mechnical. Signed-off-by: Christoph Hellwig Acked-by: Andrey Ignatov Signed-off-by: Al Viro --- arch/arm64/kernel/armv8_deprecated.c | 2 +- arch/arm64/kernel/fpsimd.c | 3 +- arch/mips/lasat/sysctl.c | 13 +- arch/s390/appldata/appldata_base.c | 11 +- arch/s390/kernel/debug.c | 2 +- arch/s390/kernel/topology.c | 2 +- arch/s390/mm/cmm.c | 12 +- arch/x86/kernel/itmt.c | 3 +- drivers/cdrom/cdrom.c | 2 +- drivers/char/random.c | 2 +- drivers/macintosh/mac_hid.c | 3 +- drivers/parport/procfs.c | 39 +++--- fs/dcache.c | 2 +- fs/drop_caches.c | 2 +- fs/file_table.c | 4 +- fs/fscache/main.c | 3 +- fs/inode.c | 2 +- fs/proc/proc_sysctl.c | 47 ++++--- fs/quota/dquot.c | 2 +- fs/xfs/xfs_sysctl.c | 4 +- include/linux/bpf-cgroup.h | 9 +- include/linux/compaction.h | 2 +- include/linux/fs.h | 6 +- include/linux/ftrace.h | 3 +- include/linux/hugetlb.h | 15 +- include/linux/kprobes.h | 2 +- include/linux/latencytop.h | 4 +- include/linux/mm.h | 12 +- include/linux/mmzone.h | 23 ++- include/linux/nmi.h | 15 +- include/linux/perf_event.h | 13 +- include/linux/printk.h | 2 +- include/linux/sched/sysctl.h | 44 ++---- include/linux/security.h | 2 +- include/linux/sysctl.h | 53 +++---- include/linux/timer.h | 3 +- include/linux/vmstat.h | 8 +- include/linux/writeback.h | 28 ++-- ipc/ipc_sysctl.c | 10 +- ipc/mq_sysctl.c | 4 +- kernel/bpf/cgroup.c | 35 ++--- kernel/events/callchain.c | 2 +- kernel/events/core.c | 6 +- kernel/kprobes.c | 2 +- kernel/latencytop.c | 4 +- kernel/pid_namespace.c | 2 +- kernel/printk/printk.c | 2 +- kernel/sched/core.c | 9 +- kernel/sched/fair.c | 3 +- kernel/sched/rt.c | 10 +- kernel/sched/topology.c | 2 +- kernel/seccomp.c | 2 +- kernel/sysctl.c | 239 ++++++++++++-------------------- kernel/time/timer.c | 3 +- kernel/trace/trace.c | 2 +- kernel/umh.c | 2 +- kernel/utsname_sysctl.c | 2 +- kernel/watchdog.c | 12 +- mm/compaction.c | 2 +- mm/hugetlb.c | 9 +- mm/page-writeback.c | 16 +-- mm/page_alloc.c | 30 ++-- mm/util.c | 10 +- mm/vmstat.c | 4 +- net/bridge/br_netfilter_hooks.c | 2 +- net/core/neighbour.c | 28 ++-- net/core/sysctl_net_core.c | 27 ++-- net/decnet/dn_dev.c | 7 +- net/decnet/sysctl_net_decnet.c | 27 ++-- net/ipv4/devinet.c | 9 +- net/ipv4/route.c | 3 +- net/ipv4/sysctl_net_ipv4.c | 38 ++--- net/ipv6/addrconf.c | 33 ++--- net/ipv6/ndisc.c | 3 +- net/ipv6/route.c | 5 +- net/ipv6/sysctl_net_ipv6.c | 3 +- net/mpls/af_mpls.c | 5 +- net/netfilter/ipvs/ip_vs_ctl.c | 6 +- net/netfilter/nf_conntrack_standalone.c | 2 +- net/netfilter/nf_log.c | 2 +- net/phonet/sysctl.c | 3 +- net/rds/tcp.c | 6 +- net/sctp/sysctl.c | 32 ++--- net/sunrpc/sysctl.c | 29 ++-- net/sunrpc/xprtrdma/svc_rdma.c | 7 +- security/apparmor/lsm.c | 2 +- security/min_addr.c | 2 +- security/yama/yama_lsm.c | 2 +- 88 files changed, 458 insertions(+), 653 deletions(-) (limited to 'include') diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c index c19aa81ddc8c..7364de008bab 100644 --- a/arch/arm64/kernel/armv8_deprecated.c +++ b/arch/arm64/kernel/armv8_deprecated.c @@ -203,7 +203,7 @@ static void __init register_insn_emulation(struct insn_emulation_ops *ops) } static int emulation_proc_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c index 94289d126993..35cb5e66c504 100644 --- a/arch/arm64/kernel/fpsimd.c +++ b/arch/arm64/kernel/fpsimd.c @@ -341,8 +341,7 @@ static unsigned int find_supported_vector_length(unsigned int vl) #ifdef CONFIG_SYSCTL static int sve_proc_do_default_vl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; int vl = sve_default_vl; diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c index e666fe26c50d..2119541a5b8b 100644 --- a/arch/mips/lasat/sysctl.c +++ b/arch/mips/lasat/sysctl.c @@ -95,16 +95,15 @@ int proc_lasat_ip(struct ctl_table *table, int write, len = 0; p = buffer; while (len < *lenp) { - if (get_user(c, p++)) - return -EFAULT; + c = *p; + p++; if (c == 0 || c == '\n') break; len++; } if (len >= sizeof(ipbuf)-1) len = sizeof(ipbuf) - 1; - if (copy_from_user(ipbuf, buffer, len)) - return -EFAULT; + memcpy(ipbuf, buffer, len); ipbuf[len] = 0; *ppos += *lenp; /* Now see if we can convert it to a valid IP */ @@ -122,11 +121,9 @@ int proc_lasat_ip(struct ctl_table *table, int write, if (len > *lenp) len = *lenp; if (len) - if (copy_to_user(buffer, ipbuf, len)) - return -EFAULT; + memcpy(buffer, ipbuf, len); if (len < *lenp) { - if (put_user('\n', ((char *) buffer) + len)) - return -EFAULT; + *((char *)buffer + len) = '\n'; len++; } *lenp = len; diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c index aa738cad1338..d74a4c7d5df6 100644 --- a/arch/s390/appldata/appldata_base.c +++ b/arch/s390/appldata/appldata_base.c @@ -51,10 +51,9 @@ static struct platform_device *appldata_pdev; */ static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata"; static int appldata_timer_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static int appldata_interval_handler(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table_header *appldata_sysctl_header; static struct ctl_table appldata_table[] = { @@ -217,7 +216,7 @@ static void __appldata_vtimer_setup(int cmd) */ static int appldata_timer_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int timer_active = appldata_timer_active; int rc; @@ -250,7 +249,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write, */ static int appldata_interval_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int interval = appldata_interval; int rc; @@ -280,7 +279,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write, */ static int appldata_generic_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct appldata_ops *ops = NULL, *tmp_ops; struct list_head *lh; diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c index 6d321f5f101d..636446003a06 100644 --- a/arch/s390/kernel/debug.c +++ b/arch/s390/kernel/debug.c @@ -867,7 +867,7 @@ static int debug_active = 1; * if debug_active is already off */ static int s390dbf_procactive(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (!write || debug_stoppable || !debug_active) return proc_dointvec(table, write, buffer, lenp, ppos); diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c index 5f70cefc13e4..332b542548cd 100644 --- a/arch/s390/kernel/topology.c +++ b/arch/s390/kernel/topology.c @@ -594,7 +594,7 @@ static int __init topology_setup(char *str) early_param("topology", topology_setup); static int topology_ctl_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int enabled = topology_is_enabled(); int new_mode; diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c index ae989b740376..36bce727897b 100644 --- a/arch/s390/mm/cmm.c +++ b/arch/s390/mm/cmm.c @@ -245,7 +245,7 @@ static int cmm_skip_blanks(char *cp, char **endp) } static int cmm_pages_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { long nr = cmm_get_pages(); struct ctl_table ctl_entry = { @@ -264,7 +264,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write, } static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { long nr = cmm_get_timed_pages(); @@ -284,7 +284,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write, } static int cmm_timeout_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { char buf[64], *p; long nr, seconds; @@ -297,8 +297,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, if (write) { len = min(*lenp, sizeof(buf)); - if (copy_from_user(buf, buffer, len)) - return -EFAULT; + memcpy(buf, buffer, len); buf[len - 1] = '\0'; cmm_skip_blanks(buf, &p); nr = simple_strtoul(p, &p, 0); @@ -311,8 +310,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write, cmm_timeout_pages, cmm_timeout_seconds); if (len > *lenp) len = *lenp; - if (copy_to_user(buffer, buf, len)) - return -EFAULT; + memcpy(buffer, buf, len); *lenp = len; *ppos += len; } diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c index 1cb3ca9bba49..1afbdd1dd777 100644 --- a/arch/x86/kernel/itmt.c +++ b/arch/x86/kernel/itmt.c @@ -39,8 +39,7 @@ static bool __read_mostly sched_itmt_capable; unsigned int __read_mostly sysctl_sched_itmt_enabled; static int sched_itmt_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { unsigned int old_sysctl; int ret; diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c index faca0f346fff..e3bbe108eb54 100644 --- a/drivers/cdrom/cdrom.c +++ b/drivers/cdrom/cdrom.c @@ -3631,7 +3631,7 @@ static void cdrom_update_settings(void) } static int cdrom_sysctl_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/drivers/char/random.c b/drivers/char/random.c index 0d10e31fd342..1e0db78b83ba 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -2057,7 +2057,7 @@ static char sysctl_bootid[16]; * sysctl system call, as 16 bytes of binary data. */ static int proc_do_uuid(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table fake_table; unsigned char buf[64], tmp_uuid[16], *uuid; diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c index 7af0c536d568..28b8581b44dd 100644 --- a/drivers/macintosh/mac_hid.c +++ b/drivers/macintosh/mac_hid.c @@ -183,8 +183,7 @@ static void mac_hid_stop_emulation(void) } static int mac_hid_toggle_emumouse(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; int old_val = *valp; diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c index 48804049d697..ee7b5daabfd4 100644 --- a/drivers/parport/procfs.c +++ b/drivers/parport/procfs.c @@ -34,7 +34,7 @@ #define PARPORT_MAX_SPINTIME_VALUE 1000 static int do_active_device(struct ctl_table *table, int write, - void __user *result, size_t *lenp, loff_t *ppos) + void *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; char buffer[256]; @@ -65,13 +65,13 @@ static int do_active_device(struct ctl_table *table, int write, *lenp = len; *ppos += len; - - return copy_to_user(result, buffer, len) ? -EFAULT : 0; + memcpy(result, buffer, len); + return 0; } #ifdef CONFIG_PARPORT_1284 static int do_autoprobe(struct ctl_table *table, int write, - void __user *result, size_t *lenp, loff_t *ppos) + void *result, size_t *lenp, loff_t *ppos) { struct parport_device_info *info = table->extra2; const char *str; @@ -108,13 +108,13 @@ static int do_autoprobe(struct ctl_table *table, int write, *ppos += len; - return copy_to_user (result, buffer, len) ? -EFAULT : 0; + memcpy(result, buffer, len); + return 0; } #endif /* IEEE1284.3 support. */ static int do_hardware_base_addr(struct ctl_table *table, int write, - void __user *result, - size_t *lenp, loff_t *ppos) + void *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; char buffer[20]; @@ -136,13 +136,12 @@ static int do_hardware_base_addr(struct ctl_table *table, int write, *lenp = len; *ppos += len; - - return copy_to_user(result, buffer, len) ? -EFAULT : 0; + memcpy(result, buffer, len); + return 0; } static int do_hardware_irq(struct ctl_table *table, int write, - void __user *result, - size_t *lenp, loff_t *ppos) + void *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; char buffer[20]; @@ -164,13 +163,12 @@ static int do_hardware_irq(struct ctl_table *table, int write, *lenp = len; *ppos += len; - - return copy_to_user(result, buffer, len) ? -EFAULT : 0; + memcpy(result, buffer, len); + return 0; } static int do_hardware_dma(struct ctl_table *table, int write, - void __user *result, - size_t *lenp, loff_t *ppos) + void *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; char buffer[20]; @@ -192,13 +190,12 @@ static int do_hardware_dma(struct ctl_table *table, int write, *lenp = len; *ppos += len; - - return copy_to_user(result, buffer, len) ? -EFAULT : 0; + memcpy(result, buffer, len); + return 0; } static int do_hardware_modes(struct ctl_table *table, int write, - void __user *result, - size_t *lenp, loff_t *ppos) + void *result, size_t *lenp, loff_t *ppos) { struct parport *port = (struct parport *)table->extra1; char buffer[40]; @@ -231,8 +228,8 @@ static int do_hardware_modes(struct ctl_table *table, int write, *lenp = len; *ppos += len; - - return copy_to_user(result, buffer, len) ? -EFAULT : 0; + memcpy(result, buffer, len); + return 0; } #define PARPORT_PORT_DIR(CHILD) { .procname = NULL, .mode = 0555, .child = CHILD } diff --git a/fs/dcache.c b/fs/dcache.c index b280e07e162b..8dd4d8d7bd0b 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -165,7 +165,7 @@ static long get_nr_dentry_negative(void) return sum < 0 ? 0 : sum; } -int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer, +int proc_nr_dentry(struct ctl_table *table, int write, void *buffer, size_t *lenp, loff_t *ppos) { dentry_stat.nr_dentry = get_nr_dentry(); diff --git a/fs/drop_caches.c b/fs/drop_caches.c index dc1a1d5d825b..f00fcc4a4f72 100644 --- a/fs/drop_caches.c +++ b/fs/drop_caches.c @@ -47,7 +47,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused) } int drop_caches_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { int ret; diff --git a/fs/file_table.c b/fs/file_table.c index 30d55c9a1744..3b612535391f 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -80,14 +80,14 @@ EXPORT_SYMBOL_GPL(get_max_files); */ #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS) int proc_nr_files(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { files_stat.nr_files = get_nr_files(); return proc_doulongvec_minmax(table, write, buffer, lenp, ppos); } #else int proc_nr_files(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } diff --git a/fs/fscache/main.c b/fs/fscache/main.c index 59c2494efda3..c1e6cc9091aa 100644 --- a/fs/fscache/main.c +++ b/fs/fscache/main.c @@ -51,8 +51,7 @@ static unsigned fscache_op_max_active = 2; static struct ctl_table_header *fscache_sysctl_header; static int fscache_max_active_sysctl(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct workqueue_struct **wqp = table->extra1; unsigned int *datap = table->data; diff --git a/fs/inode.c b/fs/inode.c index 93d9252a00ab..cc6e701b7e5d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -108,7 +108,7 @@ long get_nr_dirty_inodes(void) */ #ifdef CONFIG_SYSCTL int proc_nr_inodes(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { inodes_stat.nr_inodes = get_nr_inodes(); inodes_stat.nr_unused = get_nr_inodes_unused(); diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c index b6f5d459b087..df2143e05c57 100644 --- a/fs/proc/proc_sysctl.c +++ b/fs/proc/proc_sysctl.c @@ -539,13 +539,13 @@ out: return err; } -static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, +static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf, size_t count, loff_t *ppos, int write) { struct inode *inode = file_inode(filp); struct ctl_table_header *head = grab_header(inode); struct ctl_table *table = PROC_I(inode)->sysctl_entry; - void *new_buf = NULL; + void *kbuf; ssize_t error; if (IS_ERR(head)) @@ -564,27 +564,38 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf, if (!table->proc_handler) goto out; - error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count, - ppos, &new_buf); + if (write) { + kbuf = memdup_user_nul(ubuf, count); + if (IS_ERR(kbuf)) { + error = PTR_ERR(kbuf); + goto out; + } + } else { + error = -ENOMEM; + kbuf = kzalloc(count, GFP_KERNEL); + if (!kbuf) + goto out; + } + + error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count, + ppos); if (error) - goto out; + goto out_free_buf; /* careful: calling conventions are nasty here */ - if (new_buf) { - mm_segment_t old_fs; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - error = table->proc_handler(table, write, (void __user *)new_buf, - &count, ppos); - set_fs(old_fs); - kfree(new_buf); - } else { - error = table->proc_handler(table, write, buf, &count, ppos); + error = table->proc_handler(table, write, kbuf, &count, ppos); + if (error) + goto out_free_buf; + + if (!write) { + error = -EFAULT; + if (copy_to_user(ubuf, kbuf, count)) + goto out_free_buf; } - if (!error) - error = count; + error = count; +out_free_buf: + kfree(kbuf); out: sysctl_head_finish(head); diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c index b6a4f692d345..7b4bac91146b 100644 --- a/fs/quota/dquot.c +++ b/fs/quota/dquot.c @@ -2841,7 +2841,7 @@ const struct quotactl_ops dquot_quotactl_sysfile_ops = { EXPORT_SYMBOL(dquot_quotactl_sysfile_ops); static int do_proc_dqstats(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { unsigned int type = (unsigned long *)table->data - dqstats.stat; s64 value = percpu_counter_sum(&dqstats.counter[type]); diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c index 31b3bdbd2eba..021ef96d0542 100644 --- a/fs/xfs/xfs_sysctl.c +++ b/fs/xfs/xfs_sysctl.c @@ -13,7 +13,7 @@ STATIC int xfs_stats_clear_proc_handler( struct ctl_table *ctl, int write, - void __user *buffer, + void *buffer, size_t *lenp, loff_t *ppos) { @@ -33,7 +33,7 @@ STATIC int xfs_panic_mask_proc_handler( struct ctl_table *ctl, int write, - void __user *buffer, + void *buffer, size_t *lenp, loff_t *ppos) { diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index c11b413d5b1a..0b41fd5fc96b 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -138,8 +138,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, - void __user *buf, size_t *pcount, - loff_t *ppos, void **new_buf, + void **buf, size_t *pcount, loff_t *ppos, enum bpf_attach_type type); int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level, @@ -302,12 +301,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key, }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf) \ +#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos) \ ({ \ int __ret = 0; \ if (cgroup_bpf_enabled) \ __ret = __cgroup_bpf_run_filter_sysctl(head, table, write, \ - buf, count, pos, nbuf, \ + buf, count, pos, \ BPF_CGROUP_SYSCTL); \ __ret; \ }) @@ -429,7 +428,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map, #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) -#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; }) #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; }) #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \ optlen, max_optlen, retval) ({ retval; }) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 4b898cdbdf05..a0eabfbeb0e1 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -86,7 +86,7 @@ static inline unsigned long compact_gap(unsigned int order) #ifdef CONFIG_COMPACTION extern int sysctl_compact_memory; extern int sysctl_compaction_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos); + void *buffer, size_t *length, loff_t *ppos); extern int sysctl_extfrag_threshold; extern int sysctl_compact_unevictable_allowed; diff --git a/include/linux/fs.h b/include/linux/fs.h index 4f6f59b4f22a..9b028d260649 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3536,11 +3536,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf, struct ctl_table; int proc_nr_files(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); int proc_nr_dentry(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); int proc_nr_inodes(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); int __init get_filesystem_list(char *buf); #define __FMODE_EXEC ((__force int) FMODE_EXEC) diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index db95244a62d4..ddfc377de0d2 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -1005,8 +1005,7 @@ extern void disable_trace_on_warning(void); extern int __disable_trace_on_warning; int tracepoint_printk_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #else /* CONFIG_TRACING */ static inline void disable_trace_on_warning(void) { } diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 43a1cef8f0f1..92c21c5ccc58 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -105,14 +105,13 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages, void hugepage_put_subpool(struct hugepage_subpool *spool); void reset_vma_resv_huge_pages(struct vm_area_struct *vma); -int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); -int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *); - -#ifdef CONFIG_NUMA -int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -#endif +int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *); +int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *); long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *, diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 04bdaf01112c..594265bfd390 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -312,7 +312,7 @@ DEFINE_INSN_CACHE_OPS(optinsn); #ifdef CONFIG_SYSCTL extern int sysctl_kprobes_optimization; extern int proc_kprobes_optimization_handler(struct ctl_table *table, - int write, void __user *buffer, + int write, void *buffer, size_t *length, loff_t *ppos); #endif extern void wait_for_kprobe_optimizer(void); diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h index 9022f0c2e2e4..abe3d95f795b 100644 --- a/include/linux/latencytop.h +++ b/include/linux/latencytop.h @@ -38,8 +38,8 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter) void clear_tsk_latency_tracing(struct task_struct *p); -extern int sysctl_latencytop(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); +int sysctl_latencytop(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #else diff --git a/include/linux/mm.h b/include/linux/mm.h index 9c4e7e76dedd..a7b1ef8ed970 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -201,10 +201,10 @@ extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern unsigned long sysctl_overcommit_kbytes; -extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *, - size_t *, loff_t *); -extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *, - size_t *, loff_t *); +int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n)) @@ -2957,8 +2957,8 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm); #ifdef CONFIG_SYSCTL extern int sysctl_drop_caches; -int drop_caches_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); #endif void drop_slab(void); diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b2af594ef0f7..93cf20f41e26 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -910,22 +910,21 @@ static inline int is_highmem(struct zone *zone) /* These two functions are used to setup the per zone pages min values */ struct ctl_table; -int min_free_kbytes_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *, + loff_t *); +int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *, + size_t *, loff_t *); extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES]; -int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *, + size_t *, loff_t *); int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *, size_t *, loff_t *); int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *, size_t *, loff_t *); int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); - -extern int numa_zonelist_order_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *, size_t *, loff_t *); +int numa_zonelist_order_handler(struct ctl_table *, int, + void *, size_t *, loff_t *); extern int percpu_pagelist_fraction; extern char numa_zonelist_order[]; #define NUMA_ZONELIST_ORDER_LEN 16 diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 9003e29cde46..750c7f395ca9 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -202,16 +202,11 @@ static inline void watchdog_update_hrtimer_threshold(u64 period) { } #endif struct ctl_table; -extern int proc_watchdog(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_nmi_watchdog(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_soft_watchdog(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_watchdog_thresh(struct ctl_table *, int , - void __user *, size_t *, loff_t *); -extern int proc_watchdog_cpumask(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); +int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *); +int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *); +int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *); #ifdef CONFIG_HAVE_ACPI_APEI_NMI #include diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 9c3e7619c929..347ea379622a 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1280,15 +1280,12 @@ extern int sysctl_perf_cpu_time_max_percent; extern void perf_sample_event_took(u64 sample_len_ns); -extern int perf_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - +int perf_proc_update_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); int perf_event_max_stack_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); /* Access to perf_event_open(2) syscall. */ #define PERF_SECURITY_OPEN 0 diff --git a/include/linux/printk.h b/include/linux/printk.h index e061635e0409..fcde0772ec98 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -189,7 +189,7 @@ extern int printk_delay_msec; extern int dmesg_restrict; extern int -devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void __user *buf, +devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf, size_t *lenp, loff_t *ppos); extern void wake_up_klogd(void); diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index d4f6215ee03f..7b4d3a49b6c5 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -12,9 +12,8 @@ extern unsigned int sysctl_hung_task_panic; extern unsigned long sysctl_hung_task_timeout_secs; extern unsigned long sysctl_hung_task_check_interval_secs; extern int sysctl_hung_task_warnings; -extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos); +int proc_dohung_task_timeout_secs(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); #else /* Avoid need for ifdefs elsewhere in the code */ enum { sysctl_hung_task_timeout_secs = 0 }; @@ -43,8 +42,7 @@ extern __read_mostly unsigned int sysctl_sched_migration_cost; extern __read_mostly unsigned int sysctl_sched_nr_migrate; int sched_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, - loff_t *ppos); + void *buffer, size_t *length, loff_t *ppos); #endif /* @@ -72,33 +70,21 @@ extern unsigned int sysctl_sched_autogroup_enabled; extern int sysctl_sched_rr_timeslice; extern int sched_rr_timeslice; -extern int sched_rr_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - -extern int sched_rt_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - -#ifdef CONFIG_UCLAMP_TASK -extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -#endif - -extern int sysctl_numa_balancing(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); - -extern int sysctl_schedstats(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +int sched_rr_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int sched_rt_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL) extern unsigned int sysctl_sched_energy_aware; -extern int sched_energy_aware_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +int sched_energy_aware_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); #endif #endif /* _LINUX_SCHED_SYSCTL_H */ diff --git a/include/linux/security.h b/include/linux/security.h index a8d9310472df..6aa229b252ce 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -211,7 +211,7 @@ struct request_sock; #ifdef CONFIG_MMU extern int mmap_min_addr_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #endif /* security_inode_init_security callback function to write xattrs */ diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index 36143ca40b56..f2401e45a3c2 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -44,35 +44,26 @@ struct ctl_dir; extern const int sysctl_vals[]; -typedef int proc_handler (struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); - -extern int proc_dostring(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_douintvec(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_minmax(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_douintvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int proc_dointvec_jiffies(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_dointvec_ms_jiffies(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_doulongvec_minmax(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, - void __user *, size_t *, loff_t *); -extern int proc_do_large_bitmap(struct ctl_table *, int, - void __user *, size_t *, loff_t *); -extern int proc_do_static_key(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer, + size_t *lenp, loff_t *ppos); + +int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); +int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *, + loff_t *); +int proc_dointvec_ms_jiffies(struct ctl_table *, int, void *, size_t *, + loff_t *); +int proc_doulongvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void *, + size_t *, loff_t *); +int proc_do_large_bitmap(struct ctl_table *, int, void *, size_t *, loff_t *); +int proc_do_static_key(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); /* * Register a set of sysctl names by calling register_sysctl_table @@ -246,7 +237,7 @@ static inline void setup_sysctl_set(struct ctl_table_set *p, #endif /* CONFIG_SYSCTL */ -int sysctl_max_threads(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); +int sysctl_max_threads(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos); #endif /* _LINUX_SYSCTL_H */ diff --git a/include/linux/timer.h b/include/linux/timer.h index 0dc19a8c39c9..07910ae5ddd9 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -201,8 +201,7 @@ struct ctl_table; extern unsigned int sysctl_timer_migration; int timer_migration_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); #endif unsigned long __round_jiffies(unsigned long j, int cpu); diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 292485f3d24d..cb507151710f 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -16,8 +16,8 @@ extern int sysctl_stat_interval; #define DISABLE_NUMA_STAT 0 extern int sysctl_vm_numa_stat; DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key); -extern int sysctl_vm_numa_stat_handler(struct ctl_table *table, - int write, void __user *buffer, size_t *length, loff_t *ppos); +int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, + void *buffer, size_t *length, loff_t *ppos); #endif struct reclaim_stat { @@ -274,8 +274,8 @@ void cpu_vm_stats_fold(int cpu); void refresh_zone_stat_thresholds(void); struct ctl_table; -int vmstat_refresh(struct ctl_table *, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); +int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp, + loff_t *ppos); void drain_zonestat(struct zone *zone, struct per_cpu_pageset *); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a19d845dd7eb..f8a7e1a850fb 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -362,24 +362,18 @@ extern int vm_highmem_is_dirtyable; extern int block_dump; extern int laptop_mode; -extern int dirty_background_ratio_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int dirty_background_bytes_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int dirty_ratio_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -extern int dirty_bytes_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); +int dirty_background_ratio_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_background_bytes_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_ratio_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_bytes_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); int dirtytime_interval_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); - -struct ctl_table; -int dirty_writeback_centisecs_handler(struct ctl_table *, int, - void __user *, size_t *, loff_t *); + void *buffer, size_t *lenp, loff_t *ppos); +int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, + void *buffer, size_t *lenp, loff_t *ppos); void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty); unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh); diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c index affd66537e87..d1b8644bfb88 100644 --- a/ipc/ipc_sysctl.c +++ b/ipc/ipc_sysctl.c @@ -24,7 +24,7 @@ static void *get_ipc(struct ctl_table *table) #ifdef CONFIG_PROC_SYSCTL static int proc_ipc_dointvec(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -35,7 +35,7 @@ static int proc_ipc_dointvec(struct ctl_table *table, int write, } static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; @@ -46,7 +46,7 @@ static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write, } static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ipc_namespace *ns = current->nsproxy->ipc_ns; int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos); @@ -59,7 +59,7 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write, } static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; memcpy(&ipc_table, table, sizeof(ipc_table)); @@ -70,7 +70,7 @@ static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write, } static int proc_ipc_auto_msgmni(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table ipc_table; int dummy = 0; diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c index 7c00f28923a8..72a92a08c848 100644 --- a/ipc/mq_sysctl.c +++ b/ipc/mq_sysctl.c @@ -19,7 +19,7 @@ static void *get_mq(struct ctl_table *table) } static int proc_mq_dointvec(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table mq_table; memcpy(&mq_table, table, sizeof(mq_table)); @@ -29,7 +29,7 @@ static int proc_mq_dointvec(struct ctl_table *table, int write, } static int proc_mq_dointvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table mq_table; memcpy(&mq_table, table, sizeof(mq_table)); diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index cb305e71e7de..977bc69bb1c5 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -1137,16 +1137,13 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { * @head: sysctl table header * @table: sysctl table * @write: sysctl is being read (= 0) or written (= 1) - * @buf: pointer to buffer passed by user space + * @buf: pointer to buffer (in and out) * @pcount: value-result argument: value is size of buffer pointed to by @buf, * result is size of @new_buf if program set new value, initial value * otherwise * @ppos: value-result argument: value is position at which read from or write * to sysctl is happening, result is new position if program overrode it, * initial value otherwise - * @new_buf: pointer to pointer to new buffer that will be allocated if program - * overrides new value provided by user space on sysctl write - * NOTE: it's caller responsibility to free *new_buf if it was set * @type: type of program to be executed * * Program is run when sysctl is being accessed, either read or written, and @@ -1157,8 +1154,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = { */ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, struct ctl_table *table, int write, - void __user *buf, size_t *pcount, - loff_t *ppos, void **new_buf, + void **buf, size_t *pcount, loff_t *ppos, enum bpf_attach_type type) { struct bpf_sysctl_kern ctx = { @@ -1173,36 +1169,28 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, .new_updated = 0, }; struct cgroup *cgrp; + loff_t pos = 0; int ret; ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL); - if (ctx.cur_val) { - mm_segment_t old_fs; - loff_t pos = 0; - - old_fs = get_fs(); - set_fs(KERNEL_DS); - if (table->proc_handler(table, 0, (void __user *)ctx.cur_val, - &ctx.cur_len, &pos)) { - /* Let BPF program decide how to proceed. */ - ctx.cur_len = 0; - } - set_fs(old_fs); - } else { + if (!ctx.cur_val || + table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) { /* Let BPF program decide how to proceed. */ ctx.cur_len = 0; } - if (write && buf && *pcount) { + if (write && *buf && *pcount) { /* BPF program should be able to override new value with a * buffer bigger than provided by user. */ ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL); ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount); - if (!ctx.new_val || - copy_from_user(ctx.new_val, buf, ctx.new_len)) + if (ctx.new_val) { + memcpy(ctx.new_val, *buf, ctx.new_len); + } else { /* Let BPF program decide how to proceed. */ ctx.new_len = 0; + } } rcu_read_lock(); @@ -1213,7 +1201,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head, kfree(ctx.cur_val); if (ret == 1 && ctx.new_updated) { - *new_buf = ctx.new_val; + kfree(*buf); + *buf = ctx.new_val; *pcount = ctx.new_len; } else { kfree(ctx.new_val); diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c index c2b41a263166..bdb1533ada81 100644 --- a/kernel/events/callchain.c +++ b/kernel/events/callchain.c @@ -236,7 +236,7 @@ exit_put: * sysctl_perf_event_max_contexts_per_stack. */ int perf_event_max_stack_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int *value = table->data; int new_value = *value, ret; diff --git a/kernel/events/core.c b/kernel/events/core.c index bc9b98a9af9a..f86d46f2c4d9 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -437,8 +437,7 @@ static void update_perf_cpu_limits(void) static bool perf_rotate_context(struct perf_cpu_context *cpuctx); int perf_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; int perf_cpu = sysctl_perf_cpu_time_max_percent; @@ -462,8 +461,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write, int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT; int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); diff --git a/kernel/kprobes.c b/kernel/kprobes.c index 2625c241ac00..ffbe03a45c16 100644 --- a/kernel/kprobes.c +++ b/kernel/kprobes.c @@ -892,7 +892,7 @@ static void unoptimize_all_kprobes(void) static DEFINE_MUTEX(kprobe_sysctl_mutex); int sysctl_kprobes_optimization; int proc_kprobes_optimization_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, + void *buffer, size_t *length, loff_t *ppos) { int ret; diff --git a/kernel/latencytop.c b/kernel/latencytop.c index 8d1c15832e55..166d7bf49666 100644 --- a/kernel/latencytop.c +++ b/kernel/latencytop.c @@ -269,8 +269,8 @@ static int __init init_lstats_procfs(void) return 0; } -int sysctl_latencytop(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +int sysctl_latencytop(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { int err; diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c index 01f8ba32cc0c..3ccaba5f15c0 100644 --- a/kernel/pid_namespace.c +++ b/kernel/pid_namespace.c @@ -263,7 +263,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns) #ifdef CONFIG_CHECKPOINT_RESTORE static int pid_ns_ctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct pid_namespace *pid_ns = task_active_pid_ns(current); struct ctl_table tmp = *table; diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c index 9a9b6156270b..471f649b5868 100644 --- a/kernel/printk/printk.c +++ b/kernel/printk/printk.c @@ -173,7 +173,7 @@ __setup("printk.devkmsg=", control_devkmsg); char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit"; int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { char old_str[DEVKMSG_STR_MAX_SIZE]; unsigned int old; diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 3a61a3b8eaa9..5c589a2e4d19 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -1110,8 +1110,7 @@ static void uclamp_update_root_tg(void) { } #endif int sysctl_sched_uclamp_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { bool update_root_tg = false; int old_min, old_max; @@ -2723,7 +2722,7 @@ void set_numabalancing_state(bool enabled) #ifdef CONFIG_PROC_SYSCTL int sysctl_numa_balancing(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; int err; @@ -2797,8 +2796,8 @@ static void __init init_schedstats(void) } #ifdef CONFIG_PROC_SYSCTL -int sysctl_schedstats(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +int sysctl_schedstats(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct ctl_table t; int err; diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c index 02f323b85b6d..b6077fd5b32f 100644 --- a/kernel/sched/fair.c +++ b/kernel/sched/fair.c @@ -645,8 +645,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq) */ int sched_proc_update_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos); unsigned int factor = get_update_sysctl_factor(); diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c index df11d88c9895..45da29de3ecc 100644 --- a/kernel/sched/rt.c +++ b/kernel/sched/rt.c @@ -2714,9 +2714,8 @@ static void sched_rt_do_global(void) def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period()); } -int sched_rt_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +int sched_rt_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { int old_period, old_runtime; static DEFINE_MUTEX(mutex); @@ -2754,9 +2753,8 @@ undo: return ret; } -int sched_rr_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +int sched_rr_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { int ret; static DEFINE_MUTEX(mutex); diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c index 8344757bba6e..fa64b2ee9fe6 100644 --- a/kernel/sched/topology.c +++ b/kernel/sched/topology.c @@ -209,7 +209,7 @@ bool sched_energy_update; #ifdef CONFIG_PROC_SYSCTL int sched_energy_aware_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret, state; diff --git a/kernel/seccomp.c b/kernel/seccomp.c index 55a6184f5990..d653d8426de9 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -1776,7 +1776,7 @@ static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged, } static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index 3fafca3ced98..e961286d0e14 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -208,12 +208,10 @@ static int max_extfrag_threshold = 1000; #ifdef CONFIG_PROC_SYSCTL static int _proc_do_string(char *data, int maxlen, int write, - char __user *buffer, - size_t *lenp, loff_t *ppos) + char *buffer, size_t *lenp, loff_t *ppos) { size_t len; - char __user *p; - char c; + char c, *p; if (!data || !maxlen || !*lenp) { *lenp = 0; @@ -238,8 +236,7 @@ static int _proc_do_string(char *data, int maxlen, int write, *ppos += *lenp; p = buffer; while ((p - buffer) < *lenp && len < maxlen - 1) { - if (get_user(c, p++)) - return -EFAULT; + c = *(p++); if (c == 0 || c == '\n') break; data[len++] = c; @@ -261,11 +258,9 @@ static int _proc_do_string(char *data, int maxlen, int write, if (len > *lenp) len = *lenp; if (len) - if (copy_to_user(buffer, data, len)) - return -EFAULT; + memcpy(buffer, data, len); if (len < *lenp) { - if (put_user('\n', buffer + len)) - return -EFAULT; + buffer[len] = '\n'; len++; } *lenp = len; @@ -326,13 +321,13 @@ static bool proc_first_pos_non_zero_ignore(loff_t *ppos, * Returns 0 on success. */ int proc_dostring(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (write) proc_first_pos_non_zero_ignore(ppos, table); - return _proc_do_string((char *)(table->data), table->maxlen, write, - (char __user *)buffer, lenp, ppos); + return _proc_do_string(table->data, table->maxlen, write, buffer, lenp, + ppos); } static size_t proc_skip_spaces(char **buf) @@ -463,11 +458,10 @@ static int proc_get_long(char **buf, size_t *size, * @val: the integer to be converted * @neg: sign of the number, %TRUE for negative * - * In case of success %0 is returned and @buf and @size are updated with - * the amount of bytes written. + * In case of success @buf and @size are updated with the amount of bytes + * written. */ -static int proc_put_long(void __user **buf, size_t *size, unsigned long val, - bool neg) +static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg) { int len; char tmp[TMPBUFLEN], *p = tmp; @@ -476,24 +470,22 @@ static int proc_put_long(void __user **buf, size_t *size, unsigned long val, len = strlen(tmp); if (len > *size) len = *size; - if (copy_to_user(*buf, tmp, len)) - return -EFAULT; + memcpy(*buf, tmp, len); *size -= len; *buf += len; - return 0; } #undef TMPBUFLEN -static int proc_put_char(void __user **buf, size_t *size, char c) +static void proc_put_char(void **buf, size_t *size, char c) { if (*size) { - char __user **buffer = (char __user **)buf; - if (put_user(c, *buffer)) - return -EFAULT; - (*size)--, (*buffer)++; + char **buffer = (char **)buf; + **buffer = c; + + (*size)--; + (*buffer)++; *buf = *buffer; } - return 0; } static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp, @@ -541,7 +533,7 @@ static int do_proc_douintvec_conv(unsigned long *lvalp, static const char proc_wspace_sep[] = { ' ', '\t', '\n' }; static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, - int write, void __user *buffer, + int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(bool *negp, unsigned long *lvalp, int *valp, int write, void *data), @@ -549,7 +541,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, { int *i, vleft, first = 1, err = 0; size_t left; - char *kbuf = NULL, *p; + char *p; if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) { *lenp = 0; @@ -569,9 +561,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; - p = kbuf = memdup_user_nul(buffer, left); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); + p = buffer; } for (; left && vleft--; i++, first=0) { @@ -598,24 +588,17 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table, break; } if (!first) - err = proc_put_char(&buffer, &left, '\t'); - if (err) - break; - err = proc_put_long(&buffer, &left, lval, neg); - if (err) - break; + proc_put_char(&buffer, &left, '\t'); + proc_put_long(&buffer, &left, lval, neg); } } if (!write && !first && left && !err) - err = proc_put_char(&buffer, &left, '\n'); + proc_put_char(&buffer, &left, '\n'); if (write && !err && left) left -= proc_skip_spaces(&p); - if (write) { - kfree(kbuf); - if (first) - return err ? : -EINVAL; - } + if (write && first) + return err ? : -EINVAL; *lenp -= left; out: *ppos += *lenp; @@ -623,7 +606,7 @@ out: } static int do_proc_dointvec(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos, + void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(bool *negp, unsigned long *lvalp, int *valp, int write, void *data), void *data) @@ -634,7 +617,7 @@ static int do_proc_dointvec(struct ctl_table *table, int write, static int do_proc_douintvec_w(unsigned int *tbl_data, struct ctl_table *table, - void __user *buffer, + void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, unsigned int *valp, @@ -645,7 +628,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, int err = 0; size_t left; bool neg; - char *kbuf = NULL, *p; + char *p = buffer; left = *lenp; @@ -655,10 +638,6 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; - p = kbuf = memdup_user_nul(buffer, left); - if (IS_ERR(kbuf)) - return -EINVAL; - left -= proc_skip_spaces(&p); if (!left) { err = -EINVAL; @@ -682,7 +661,6 @@ static int do_proc_douintvec_w(unsigned int *tbl_data, left -= proc_skip_spaces(&p); out_free: - kfree(kbuf); if (err) return -EINVAL; @@ -694,7 +672,7 @@ bail_early: return err; } -static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer, +static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, unsigned int *valp, @@ -712,11 +690,11 @@ static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer, goto out; } - err = proc_put_long(&buffer, &left, lval, false); - if (err || !left) + proc_put_long(&buffer, &left, lval, false); + if (!left) goto out; - err = proc_put_char(&buffer, &left, '\n'); + proc_put_char(&buffer, &left, '\n'); out: *lenp -= left; @@ -726,7 +704,7 @@ out: } static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table, - int write, void __user *buffer, + int write, void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, unsigned int *valp, @@ -762,7 +740,7 @@ static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table, } static int do_proc_douintvec(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos, + void *buffer, size_t *lenp, loff_t *ppos, int (*conv)(unsigned long *lvalp, unsigned int *valp, int write, void *data), @@ -785,16 +763,15 @@ static int do_proc_douintvec(struct ctl_table *table, int write, * * Returns 0 on success. */ -int proc_dointvec(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +int proc_dointvec(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL); } #ifdef CONFIG_COMPACTION static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table, - int write, void __user *buffer, - size_t *lenp, loff_t *ppos) + int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret, old; @@ -826,8 +803,8 @@ static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table, * * Returns 0 on success. */ -int proc_douintvec(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +int proc_douintvec(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { return do_proc_douintvec(table, write, buffer, lenp, ppos, do_proc_douintvec_conv, NULL); @@ -838,7 +815,7 @@ int proc_douintvec(struct ctl_table *table, int write, * This means we can safely use a temporary. */ static int proc_taint(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; unsigned long tmptaint = get_taint(); @@ -870,7 +847,7 @@ static int proc_taint(struct ctl_table *table, int write, #ifdef CONFIG_PRINTK static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (write && !capable(CAP_SYS_ADMIN)) return -EPERM; @@ -936,7 +913,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp, * Returns 0 on success or -EINVAL on write when the range check fails. */ int proc_dointvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_dointvec_minmax_conv_param param = { .min = (int *) table->extra1, @@ -1005,7 +982,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp, * Returns 0 on success or -ERANGE on write when the range check fails. */ int proc_douintvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct do_proc_douintvec_minmax_conv_param param = { .min = (unsigned int *) table->extra1, @@ -1036,7 +1013,7 @@ static int do_proc_dopipe_max_size_conv(unsigned long *lvalp, } static int proc_dopipe_max_size(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_douintvec(table, write, buffer, lenp, ppos, do_proc_dopipe_max_size_conv, NULL); @@ -1057,7 +1034,7 @@ static void validate_coredump_safety(void) } static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos); if (!error) @@ -1067,7 +1044,7 @@ static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write, #ifdef CONFIG_COREDUMP static int proc_dostring_coredump(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int error = proc_dostring(table, write, buffer, lenp, ppos); if (!error) @@ -1078,7 +1055,7 @@ static int proc_dostring_coredump(struct ctl_table *table, int write, #ifdef CONFIG_MAGIC_SYSRQ static int sysrq_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int tmp, ret; @@ -1096,16 +1073,14 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write, } #endif -static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos, - unsigned long convmul, - unsigned long convdiv) +static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, + int write, void *buffer, size_t *lenp, loff_t *ppos, + unsigned long convmul, unsigned long convdiv) { unsigned long *i, *min, *max; int vleft, first = 1, err = 0; size_t left; - char *kbuf = NULL, *p; + char *p; if (!data || !table->maxlen || !*lenp || (*ppos && !write)) { *lenp = 0; @@ -1124,9 +1099,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int if (left > PAGE_SIZE - 1) left = PAGE_SIZE - 1; - p = kbuf = memdup_user_nul(buffer, left); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); + p = buffer; } for (; left && vleft--; i++, first = 0) { @@ -1154,26 +1127,18 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int *i = val; } else { val = convdiv * (*i) / convmul; - if (!first) { - err = proc_put_char(&buffer, &left, '\t'); - if (err) - break; - } - err = proc_put_long(&buffer, &left, val, false); - if (err) - break; + if (!first) + proc_put_char(&buffer, &left, '\t'); + proc_put_long(&buffer, &left, val, false); } } if (!write && !first && left && !err) - err = proc_put_char(&buffer, &left, '\n'); + proc_put_char(&buffer, &left, '\n'); if (write && !err) left -= proc_skip_spaces(&p); - if (write) { - kfree(kbuf); - if (first) - return err ? : -EINVAL; - } + if (write && first) + return err ? : -EINVAL; *lenp -= left; out: *ppos += *lenp; @@ -1181,10 +1146,8 @@ out: } static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos, - unsigned long convmul, - unsigned long convdiv) + void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul, + unsigned long convdiv) { return __do_proc_doulongvec_minmax(table->data, table, write, buffer, lenp, ppos, convmul, convdiv); @@ -1207,7 +1170,7 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write, * Returns 0 on success. */ int proc_doulongvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l); } @@ -1230,8 +1193,7 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write, * Returns 0 on success. */ int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, HZ, 1000l); @@ -1325,7 +1287,7 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp, * Returns 0 on success. */ int proc_dointvec_jiffies(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table,write,buffer,lenp,ppos, do_proc_dointvec_jiffies_conv,NULL); @@ -1347,7 +1309,7 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write, * Returns 0 on success. */ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table,write,buffer,lenp,ppos, do_proc_dointvec_userhz_jiffies_conv,NULL); @@ -1369,15 +1331,15 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, * * Returns 0 on success. */ -int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { return do_proc_dointvec(table, write, buffer, lenp, ppos, do_proc_dointvec_ms_jiffies_conv, NULL); } -static int proc_do_cad_pid(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct pid *new_pid; pid_t tmp; @@ -1416,7 +1378,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write, * Returns 0 on success. */ int proc_do_large_bitmap(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int err = 0; bool first = 1; @@ -1432,7 +1394,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, } if (write) { - char *kbuf, *p; + char *p = buffer; size_t skipped = 0; if (left > PAGE_SIZE - 1) { @@ -1441,15 +1403,9 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, skipped = *lenp - left; } - p = kbuf = memdup_user_nul(buffer, left); - if (IS_ERR(kbuf)) - return PTR_ERR(kbuf); - tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL); - if (!tmp_bitmap) { - kfree(kbuf); + if (!tmp_bitmap) return -ENOMEM; - } proc_skip_char(&p, &left, '\n'); while (!err && left) { unsigned long val_a, val_b; @@ -1513,7 +1469,6 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, first = 0; proc_skip_char(&p, &left, '\n'); } - kfree(kbuf); left += skipped; } else { unsigned long bit_a, bit_b = 0; @@ -1525,27 +1480,17 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, bit_b = find_next_zero_bit(bitmap, bitmap_len, bit_a + 1) - 1; - if (!first) { - err = proc_put_char(&buffer, &left, ','); - if (err) - break; - } - err = proc_put_long(&buffer, &left, bit_a, false); - if (err) - break; + if (!first) + proc_put_char(&buffer, &left, ','); + proc_put_long(&buffer, &left, bit_a, false); if (bit_a != bit_b) { - err = proc_put_char(&buffer, &left, '-'); - if (err) - break; - err = proc_put_long(&buffer, &left, bit_b, false); - if (err) - break; + proc_put_char(&buffer, &left, '-'); + proc_put_long(&buffer, &left, bit_b, false); } first = 0; bit_b++; } - if (!err) - err = proc_put_char(&buffer, &left, '\n'); + proc_put_char(&buffer, &left, '\n'); } if (!err) { @@ -1566,68 +1511,67 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, #else /* CONFIG_PROC_SYSCTL */ int proc_dostring(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_dointvec(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_douintvec(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_dointvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_douintvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_dointvec_jiffies(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_doulongvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { - return -ENOSYS; + return -ENOSYS; } int proc_do_large_bitmap(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return -ENOSYS; } @@ -1636,8 +1580,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write, #if defined(CONFIG_SYSCTL) int proc_do_static_key(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct static_key *key = (struct static_key *)table->data; static DEFINE_MUTEX(static_key_mutex); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index a5221abb4594..398e6eadb861 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -249,8 +249,7 @@ void timers_update_nohz(void) } int timer_migration_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c index 8d2b98812625..167a74a15b1a 100644 --- a/kernel/trace/trace.c +++ b/kernel/trace/trace.c @@ -2661,7 +2661,7 @@ static void output_printk(struct trace_event_buffer *fbuffer) } int tracepoint_printk_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { int save_tracepoint_printk; diff --git a/kernel/umh.c b/kernel/umh.c index 7f255b5a8845..9788ed481a6a 100644 --- a/kernel/umh.c +++ b/kernel/umh.c @@ -630,7 +630,7 @@ int call_usermodehelper(const char *path, char **argv, char **envp, int wait) EXPORT_SYMBOL(call_usermodehelper); static int proc_cap_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table t; unsigned long cap_array[_KERNEL_CAPABILITY_U32S]; diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c index 3732c888a949..4ca61d49885b 100644 --- a/kernel/utsname_sysctl.c +++ b/kernel/utsname_sysctl.c @@ -30,7 +30,7 @@ static void *get_uts(struct ctl_table *table) * to observe. Should this be in kernel/sys.c ???? */ static int proc_do_uts_string(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table uts_table; int r; diff --git a/kernel/watchdog.c b/kernel/watchdog.c index b6b1f54a7837..53ff2c81b084 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -661,7 +661,7 @@ static void proc_watchdog_update(void) * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED */ static int proc_watchdog_common(int which, struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int err, old, *param = table->data; @@ -688,7 +688,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write, * /proc/sys/kernel/watchdog */ int proc_watchdog(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED, table, write, buffer, lenp, ppos); @@ -698,7 +698,7 @@ int proc_watchdog(struct ctl_table *table, int write, * /proc/sys/kernel/nmi_watchdog */ int proc_nmi_watchdog(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (!nmi_watchdog_available && write) return -ENOTSUPP; @@ -710,7 +710,7 @@ int proc_nmi_watchdog(struct ctl_table *table, int write, * /proc/sys/kernel/soft_watchdog */ int proc_soft_watchdog(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { return proc_watchdog_common(SOFT_WATCHDOG_ENABLED, table, write, buffer, lenp, ppos); @@ -720,7 +720,7 @@ int proc_soft_watchdog(struct ctl_table *table, int write, * /proc/sys/kernel/watchdog_thresh */ int proc_watchdog_thresh(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int err, old; @@ -743,7 +743,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write, * been brought online, if desired. */ int proc_watchdog_cpumask(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int err; diff --git a/mm/compaction.c b/mm/compaction.c index 46f0fcc93081..d8cfb7b99a83 100644 --- a/mm/compaction.c +++ b/mm/compaction.c @@ -2463,7 +2463,7 @@ int sysctl_compact_memory; * /proc/sys/vm/compact_memory */ int sysctl_compaction_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { if (write) compact_nodes(); diff --git a/mm/hugetlb.c b/mm/hugetlb.c index cd459155d28a..2277c5728b1f 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3352,7 +3352,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array) #ifdef CONFIG_SYSCTL static int hugetlb_sysctl_handler_common(bool obey_mempolicy, struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; unsigned long tmp = h->max_huge_pages; @@ -3375,7 +3375,7 @@ out: } int hugetlb_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { return hugetlb_sysctl_handler_common(false, table, write, @@ -3384,7 +3384,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write, #ifdef CONFIG_NUMA int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { return hugetlb_sysctl_handler_common(true, table, write, buffer, length, ppos); @@ -3392,8 +3392,7 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write, #endif /* CONFIG_NUMA */ int hugetlb_overcommit_handler(struct ctl_table *table, int write, - void __user *buffer, - size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { struct hstate *h = &default_hstate; unsigned long tmp; diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7326b54ab728..d3ee4c4dafac 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -512,8 +512,7 @@ bool node_dirty_ok(struct pglist_data *pgdat) } int dirty_background_ratio_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -524,8 +523,7 @@ int dirty_background_ratio_handler(struct ctl_table *table, int write, } int dirty_background_bytes_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -535,9 +533,8 @@ int dirty_background_bytes_handler(struct ctl_table *table, int write, return ret; } -int dirty_ratio_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { int old_ratio = vm_dirty_ratio; int ret; @@ -551,8 +548,7 @@ int dirty_ratio_handler(struct ctl_table *table, int write, } int dirty_bytes_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { unsigned long old_bytes = vm_dirty_bytes; int ret; @@ -1972,7 +1968,7 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb) * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs */ int dirty_writeback_centisecs_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { unsigned int old_interval = dirty_writeback_interval; int ret; diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 62c1550cd43e..0c43e9ae5004 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -5546,21 +5546,11 @@ char numa_zonelist_order[] = "Node"; * sysctl handler for numa_zonelist_order */ int numa_zonelist_order_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, - loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { - char *str; - int ret; - - if (!write) - return proc_dostring(table, write, buffer, length, ppos); - str = memdup_user_nul(buffer, 16); - if (IS_ERR(str)) - return PTR_ERR(str); - - ret = __parse_numa_zonelist_order(str); - kfree(str); - return ret; + if (write) + return __parse_numa_zonelist_order(buffer); + return proc_dostring(table, write, buffer, length, ppos); } @@ -7963,7 +7953,7 @@ core_initcall(init_per_zone_wmark_min) * changes. */ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { int rc; @@ -7979,7 +7969,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write, } int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { int rc; @@ -8009,7 +7999,7 @@ static void setup_min_unmapped_ratio(void) int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { int rc; @@ -8036,7 +8026,7 @@ static void setup_min_slab_ratio(void) } int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { int rc; @@ -8060,7 +8050,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write, * if in function of the boot time zone sizes. */ int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { proc_dointvec_minmax(table, write, buffer, length, ppos); setup_per_zone_lowmem_reserve(); @@ -8082,7 +8072,7 @@ static void __zone_pcp_update(struct zone *zone) * pagelist can have before it gets flushed back to buddy allocator. */ int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { struct zone *zone; int old_percpu_pagelist_fraction; diff --git a/mm/util.c b/mm/util.c index 988d11e6c17c..8defc8ec141f 100644 --- a/mm/util.c +++ b/mm/util.c @@ -717,9 +717,8 @@ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT; unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */ unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */ -int overcommit_ratio_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +int overcommit_ratio_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { int ret; @@ -729,9 +728,8 @@ int overcommit_ratio_handler(struct ctl_table *table, int write, return ret; } -int overcommit_kbytes_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +int overcommit_kbytes_handler(struct ctl_table *table, int write, void *buffer, + size_t *lenp, loff_t *ppos) { int ret; diff --git a/mm/vmstat.c b/mm/vmstat.c index 96d21a792b57..c03a8c914922 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -76,7 +76,7 @@ static void invalid_numa_statistics(void) static DEFINE_MUTEX(vm_numa_stat_lock); int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *length, loff_t *ppos) + void *buffer, size_t *length, loff_t *ppos) { int ret, oldval; @@ -1751,7 +1751,7 @@ static void refresh_vm_stats(struct work_struct *work) } int vmstat_refresh(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { long val; int err; diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c index 59980ecfc962..04c3f9a82650 100644 --- a/net/bridge/br_netfilter_hooks.c +++ b/net/bridge/br_netfilter_hooks.c @@ -1027,7 +1027,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net, #ifdef CONFIG_SYSCTL static int brnf_sysctl_call_tables(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/net/core/neighbour.c b/net/core/neighbour.c index 39d37d0ef575..3f2263e79e4b 100644 --- a/net/core/neighbour.c +++ b/net/core/neighbour.c @@ -3379,7 +3379,7 @@ EXPORT_SYMBOL(neigh_app_ns); static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN); static int proc_unres_qlen(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int size, ret; struct ctl_table tmp = *ctl; @@ -3443,8 +3443,8 @@ static void neigh_proc_update(struct ctl_table *ctl, int write) } static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, + loff_t *ppos) { struct ctl_table tmp = *ctl; int ret; @@ -3457,8 +3457,8 @@ static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write, return ret; } -int neigh_proc_dointvec(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer, + size_t *lenp, loff_t *ppos) { int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); @@ -3467,8 +3467,7 @@ int neigh_proc_dointvec(struct ctl_table *ctl, int write, } EXPORT_SYMBOL(neigh_proc_dointvec); -int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, - void __user *buffer, +int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos); @@ -3479,8 +3478,8 @@ int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, EXPORT_SYMBOL(neigh_proc_dointvec_jiffies); static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, + loff_t *ppos) { int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos); @@ -3489,8 +3488,7 @@ static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write, } int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos); @@ -3500,8 +3498,8 @@ int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write, EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies); static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, + loff_t *ppos) { int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos); @@ -3510,8 +3508,8 @@ static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write, } static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, + loff_t *ppos) { struct neigh_parms *p = ctl->extra2; int ret; diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c index 9f9e00ba3ad7..0ddb13a6282b 100644 --- a/net/core/sysctl_net_core.c +++ b/net/core/sysctl_net_core.c @@ -45,7 +45,7 @@ EXPORT_SYMBOL(sysctl_devconf_inherit_init_net); #ifdef CONFIG_RPS static int rps_sock_flow_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { unsigned int orig_size, size; int ret, i; @@ -115,8 +115,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write, static DEFINE_MUTEX(flow_limit_update_mutex); static int flow_limit_cpu_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct sd_flow_limit *cur; struct softnet_data *sd; @@ -180,10 +179,7 @@ write_unlock: } if (len < *lenp) kbuf[len++] = '\n'; - if (copy_to_user(buffer, kbuf, len)) { - ret = -EFAULT; - goto done; - } + memcpy(buffer, kbuf, len); *lenp = len; *ppos += len; } @@ -194,8 +190,7 @@ done: } static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { unsigned int old, *ptr; int ret; @@ -217,7 +212,7 @@ static int flow_limit_table_len_sysctl(struct ctl_table *table, int write, #ifdef CONFIG_NET_SCHED static int set_default_qdisc(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { char id[IFNAMSIZ]; struct ctl_table tbl = { @@ -236,7 +231,7 @@ static int set_default_qdisc(struct ctl_table *table, int write, #endif static int proc_do_dev_weight(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; @@ -251,7 +246,7 @@ static int proc_do_dev_weight(struct ctl_table *table, int write, } static int proc_do_rss_key(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table fake_table; char buf[NETDEV_RSS_KEY_LEN * 3]; @@ -264,7 +259,7 @@ static int proc_do_rss_key(struct ctl_table *table, int write, #ifdef CONFIG_BPF_JIT static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { int ret, jit_enable = *(int *)table->data; @@ -291,8 +286,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write, # ifdef CONFIG_HAVE_EBPF_JIT static int proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; @@ -303,8 +297,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write, static int proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (!capable(CAP_SYS_ADMIN)) return -EPERM; diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c index cca7ae712995..65abcf1b3210 100644 --- a/net/decnet/dn_dev.c +++ b/net/decnet/dn_dev.c @@ -160,8 +160,8 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU static int min_priority[1]; static int max_priority[] = { 127 }; /* From DECnet spec */ -static int dn_forwarding_proc(struct ctl_table *, int, - void __user *, size_t *, loff_t *); +static int dn_forwarding_proc(struct ctl_table *, int, void *, size_t *, + loff_t *); static struct dn_dev_sysctl_table { struct ctl_table_header *sysctl_header; struct ctl_table dn_dev_vars[5]; @@ -245,8 +245,7 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms) } static int dn_forwarding_proc(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { #ifdef CONFIG_DECNET_ROUTER struct net_device *dev = table->extra1; diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c index 55bf64a22b59..deae519bdeec 100644 --- a/net/decnet/sysctl_net_decnet.c +++ b/net/decnet/sysctl_net_decnet.c @@ -134,8 +134,7 @@ static int parse_addr(__le16 *addr, char *str) } static int dn_node_address_handler(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { char addr[DN_ASCBUF_LEN]; size_t len; @@ -148,10 +147,7 @@ static int dn_node_address_handler(struct ctl_table *table, int write, if (write) { len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1); - - if (copy_from_user(addr, buffer, len)) - return -EFAULT; - + memcpy(addr, buffer, len); addr[len] = 0; strip_it(addr); @@ -173,11 +169,9 @@ static int dn_node_address_handler(struct ctl_table *table, int write, len = strlen(addr); addr[len++] = '\n'; - if (len > *lenp) len = *lenp; - - if (copy_to_user(buffer, addr, len)) - return -EFAULT; - + if (len > *lenp) + len = *lenp; + memcpy(buffer, addr, len); *lenp = len; *ppos += len; @@ -185,8 +179,7 @@ static int dn_node_address_handler(struct ctl_table *table, int write, } static int dn_def_dev_handler(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { size_t len; struct net_device *dev; @@ -201,9 +194,7 @@ static int dn_def_dev_handler(struct ctl_table *table, int write, if (*lenp > 16) return -E2BIG; - if (copy_from_user(devname, buffer, *lenp)) - return -EFAULT; - + memcpy(devname, buffer, *lenp); devname[*lenp] = 0; strip_it(devname); @@ -238,9 +229,7 @@ static int dn_def_dev_handler(struct ctl_table *table, int write, if (len > *lenp) len = *lenp; - if (copy_to_user(buffer, devname, len)) - return -EFAULT; - + memcpy(buffer, devname, len); *lenp = len; *ppos += len; diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c index 30fa42f5997d..a118978d222c 100644 --- a/net/ipv4/devinet.c +++ b/net/ipv4/devinet.c @@ -2361,8 +2361,7 @@ static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf) } static int devinet_conf_proc(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int old_value = *(int *)ctl->data; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); @@ -2414,8 +2413,7 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write, } static int devinet_sysctl_forward(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; @@ -2458,8 +2456,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write, } static int ipv4_doint_and_flush(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; diff --git a/net/ipv4/route.c b/net/ipv4/route.c index 788c69d9bfe0..041f4dcac440 100644 --- a/net/ipv4/route.c +++ b/net/ipv4/route.c @@ -3336,8 +3336,7 @@ static int ip_rt_gc_elasticity __read_mostly = 8; static int ip_min_valid_pmtu __read_mostly = IPV4_MIN_MTU; static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = (struct net *)__ctl->extra1; diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c index 81b267e990a1..868e317cc324 100644 --- a/net/ipv4/sysctl_net_ipv4.c +++ b/net/ipv4/sysctl_net_ipv4.c @@ -71,8 +71,7 @@ static void set_local_port_range(struct net *net, int range[2]) /* Validate changes from /proc interface. */ static int ipv4_local_port_range(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.ip_local_ports.range); @@ -107,7 +106,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write, /* Validate changes from /proc interface. */ static int ipv4_privileged_ports(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.sysctl_ip_prot_sock); @@ -168,8 +167,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig /* Validate changes from /proc interface. */ static int ipv4_ping_group_range(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct user_namespace *user_ns = current_user_ns(); int ret; @@ -204,8 +202,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write, } static int ipv4_fwd_update_priority(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net; int ret; @@ -221,7 +218,7 @@ static int ipv4_fwd_update_priority(struct ctl_table *table, int write, } static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(ctl->data, struct net, ipv4.tcp_congestion_control); @@ -241,9 +238,8 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write, } static int proc_tcp_available_congestion_control(struct ctl_table *ctl, - int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, }; int ret; @@ -258,9 +254,8 @@ static int proc_tcp_available_congestion_control(struct ctl_table *ctl, } static int proc_allowed_congestion_control(struct ctl_table *ctl, - int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX }; int ret; @@ -296,8 +291,7 @@ static int sscanf_key(char *buf, __le32 *key) } static int proc_tcp_fastopen_key(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, ipv4.sysctl_tcp_fastopen); @@ -399,7 +393,7 @@ static void proc_configure_early_demux(int enabled, int protocol) } static int proc_tcp_early_demux(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; @@ -415,7 +409,7 @@ static int proc_tcp_early_demux(struct ctl_table *table, int write, } static int proc_udp_early_demux(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; @@ -431,8 +425,7 @@ static int proc_udp_early_demux(struct ctl_table *table, int write, } static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, - int write, - void __user *buffer, + int write, void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, @@ -447,8 +440,7 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table, } static int proc_tcp_available_ulp(struct ctl_table *ctl, - int write, - void __user *buffer, size_t *lenp, + int write, void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table tbl = { .maxlen = TCP_ULP_BUF_MAX, }; @@ -466,7 +458,7 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl, #ifdef CONFIG_IP_ROUTE_MULTIPATH static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = container_of(table->data, struct net, diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c index 24e319dfb510..9d0e89bccb90 100644 --- a/net/ipv6/addrconf.c +++ b/net/ipv6/addrconf.c @@ -6108,9 +6108,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp) #ifdef CONFIG_SYSCTL -static -int addrconf_sysctl_forward(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int addrconf_sysctl_forward(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; @@ -6134,9 +6133,8 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write, return ret; } -static -int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { struct inet6_dev *idev = ctl->extra1; int min_mtu = IPV6_MIN_MTU; @@ -6206,9 +6204,8 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf) return 0; } -static -int addrconf_sysctl_disable(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int addrconf_sysctl_disable(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; @@ -6232,9 +6229,8 @@ int addrconf_sysctl_disable(struct ctl_table *ctl, int write, return ret; } -static -int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int ret; @@ -6275,7 +6271,7 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write, } static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { int ret = 0; @@ -6337,7 +6333,7 @@ out: } static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, + void *buffer, size_t *lenp, loff_t *ppos) { int err; @@ -6404,8 +6400,7 @@ out: static int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl, - int write, - void __user *buffer, + int write, void *buffer, size_t *lenp, loff_t *ppos) { @@ -6505,10 +6500,8 @@ int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val) return 0; } -static -int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) +static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = ctl->data; int val = *valp; diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c index 1ecd4e9b0bdf..58f1255295d3 100644 --- a/net/ipv6/ndisc.c +++ b/net/ipv6/ndisc.c @@ -1835,7 +1835,8 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl, } } -int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos) +int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer, + size_t *lenp, loff_t *ppos) { struct net_device *dev = ctl->extra1; struct inet6_dev *idev; diff --git a/net/ipv6/route.c b/net/ipv6/route.c index 310cbddaa533..acdb31e38412 100644 --- a/net/ipv6/route.c +++ b/net/ipv6/route.c @@ -6088,9 +6088,8 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v) #ifdef CONFIG_SYSCTL -static -int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write, + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net; int delay; diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c index 63b657aa8d29..fac2135aa47b 100644 --- a/net/ipv6/sysctl_net_ipv6.c +++ b/net/ipv6/sysctl_net_ipv6.c @@ -26,8 +26,7 @@ static int auto_flowlabels_min; static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX; static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net; int ret; diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c index 4701edffb1f7..a42e4ed5ab0e 100644 --- a/net/mpls/af_mpls.c +++ b/net/mpls/af_mpls.c @@ -1362,8 +1362,7 @@ done: (&((struct mpls_dev *)0)->field) static int mpls_conf_proc(struct ctl_table *ctl, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int oval = *(int *)ctl->data; int ret = proc_dointvec(ctl, write, buffer, lenp, ppos); @@ -2594,7 +2593,7 @@ nolabels: } static int mpls_platform_labels(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = table->data; int platform_labels = net->mpls.platform_labels; diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c index 8d14a1acbc37..412656c34f20 100644 --- a/net/netfilter/ipvs/ip_vs_ctl.c +++ b/net/netfilter/ipvs/ip_vs_ctl.c @@ -1736,7 +1736,7 @@ static int three = 3; static int proc_do_defense_mode(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct netns_ipvs *ipvs = table->extra2; int *valp = table->data; @@ -1763,7 +1763,7 @@ proc_do_defense_mode(struct ctl_table *table, int write, static int proc_do_sync_threshold(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; int val[2]; @@ -1788,7 +1788,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write, static int proc_do_sync_ports(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int *valp = table->data; int val = *valp; diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c index 9b57330c81f8..31b027b12ff3 100644 --- a/net/netfilter/nf_conntrack_standalone.c +++ b/net/netfilter/nf_conntrack_standalone.c @@ -517,7 +517,7 @@ static unsigned int nf_conntrack_htable_size_user __read_mostly; static int nf_conntrack_hash_sysctl(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c index bb25d4c794c7..6cb9f9474b05 100644 --- a/net/netfilter/nf_log.c +++ b/net/netfilter/nf_log.c @@ -414,7 +414,7 @@ static struct ctl_table nf_log_sysctl_ftable[] = { }; static int nf_log_proc_dostring(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { const struct nf_logger *logger; char buf[NFLOGGER_NAME_LEN]; diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c index 251e750fd9aa..0d0bf41381c2 100644 --- a/net/phonet/sysctl.c +++ b/net/phonet/sysctl.c @@ -49,8 +49,7 @@ void phonet_get_local_port_range(int *min, int *max) } static int proc_local_port_range(struct ctl_table *table, int write, - void __user *buffer, - size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; int range[2] = {local_port_range[0], local_port_range[1]}; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 66121bc6f34e..46782fac4c16 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -62,8 +62,7 @@ static atomic_t rds_tcp_unloading = ATOMIC_INIT(0); static struct kmem_cache *rds_tcp_conn_slab; static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *fpos); + void *buffer, size_t *lenp, loff_t *fpos); static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF; static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF; @@ -676,8 +675,7 @@ static void rds_tcp_sysctl_reset(struct net *net) } static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *fpos) + void *buffer, size_t *lenp, loff_t *fpos) { struct net *net = current->nsproxy->net_ns; int err; diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c index 4740aa70e652..c16c80963e55 100644 --- a/net/sctp/sysctl.c +++ b/net/sctp/sysctl.c @@ -43,20 +43,15 @@ static unsigned long max_autoclose_max = ? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ; static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); -static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); +static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void *buffer, + size_t *lenp, loff_t *ppos); static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static int proc_sctp_do_auth(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos); + void *buffer, size_t *lenp, loff_t *ppos); static struct ctl_table sctp_table[] = { { @@ -343,8 +338,7 @@ static struct ctl_table sctp_net_table[] = { }; static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; struct ctl_table tbl; @@ -389,8 +383,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write, } static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; unsigned int min = *(unsigned int *) ctl->extra1; @@ -418,8 +411,7 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write, } static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; unsigned int min = *(unsigned int *) ctl->extra1; @@ -447,8 +439,7 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, } static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (write) pr_warn_once("Changing rto_alpha or rto_beta may lead to " @@ -458,8 +449,7 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write, } static int proc_sctp_do_auth(struct ctl_table *ctl, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct net *net = current->nsproxy->net_ns; struct ctl_table tbl; diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c index d75f17b56f0e..999eee1ed61c 100644 --- a/net/sunrpc/sysctl.c +++ b/net/sunrpc/sysctl.c @@ -60,7 +60,7 @@ rpc_unregister_sysctl(void) } static int proc_do_xprt(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { char tmpbuf[256]; size_t len; @@ -70,15 +70,15 @@ static int proc_do_xprt(struct ctl_table *table, int write, return 0; } len = svc_print_xprts(tmpbuf, sizeof(tmpbuf)); - return simple_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len); + return memory_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len); } static int -proc_dodebug(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) +proc_dodebug(struct ctl_table *table, int write, void *buffer, size_t *lenp, + loff_t *ppos) { - char tmpbuf[20], c, *s = NULL; - char __user *p; + char tmpbuf[20], *s = NULL; + char *p; unsigned int value; size_t left, len; @@ -90,18 +90,17 @@ proc_dodebug(struct ctl_table *table, int write, left = *lenp; if (write) { - if (!access_ok(buffer, left)) - return -EFAULT; p = buffer; - while (left && __get_user(c, p) >= 0 && isspace(c)) - left--, p++; + while (left && isspace(*p)) { + left--; + p++; + } if (!left) goto done; if (left > sizeof(tmpbuf) - 1) return -EINVAL; - if (copy_from_user(tmpbuf, p, left)) - return -EFAULT; + memcpy(tmpbuf, p, left); tmpbuf[left] = '\0'; value = simple_strtol(tmpbuf, &s, 0); @@ -121,11 +120,9 @@ proc_dodebug(struct ctl_table *table, int write, len = sprintf(tmpbuf, "0x%04x", *(unsigned int *) table->data); if (len > left) len = left; - if (copy_to_user(buffer, tmpbuf, len)) - return -EFAULT; + memcpy(buffer, tmpbuf, len); if ((left -= len) > 0) { - if (put_user('\n', (char __user *)buffer + len)) - return -EFAULT; + *((char *)buffer + len) = '\n'; left--; } } diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c index 97bca509a391..526da5d4710b 100644 --- a/net/sunrpc/xprtrdma/svc_rdma.c +++ b/net/sunrpc/xprtrdma/svc_rdma.c @@ -80,8 +80,7 @@ atomic_t rdma_stat_sq_prod; * current value. */ static int read_reset_stat(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, - loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { atomic_t *stat = (atomic_t *)table->data; @@ -103,8 +102,8 @@ static int read_reset_stat(struct ctl_table *table, int write, len -= *ppos; if (len > *lenp) len = *lenp; - if (len && copy_to_user(buffer, str_buf, len)) - return -EFAULT; + if (len) + memcpy(buffer, str_buf, len); *lenp = len; *ppos += len; } diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c index b621ad74f54a..27e371b44dad 100644 --- a/security/apparmor/lsm.c +++ b/security/apparmor/lsm.c @@ -1696,7 +1696,7 @@ static int __init alloc_buffers(void) #ifdef CONFIG_SYSCTL static int apparmor_dointvec(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { if (!policy_admin_capable(NULL)) return -EPERM; diff --git a/security/min_addr.c b/security/min_addr.c index 94d2b0cf0e7b..88c9a6a21f47 100644 --- a/security/min_addr.c +++ b/security/min_addr.c @@ -30,7 +30,7 @@ static void update_mmap_min_addr(void) * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly */ int mmap_min_addr_handler(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { int ret; diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c index 94dc346370b1..536c99646f6a 100644 --- a/security/yama/yama_lsm.c +++ b/security/yama/yama_lsm.c @@ -430,7 +430,7 @@ static struct security_hook_list yama_hooks[] __lsm_ro_after_init = { #ifdef CONFIG_SYSCTL static int yama_dointvec_minmax(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos) + void *buffer, size_t *lenp, loff_t *ppos) { struct ctl_table table_copy; -- cgit v1.2.3 From f9d041271cf44ca02eed0cc82e1a6d8c814c53ed Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:05 -0700 Subject: bpf: Refactor bpf_link update handling Make bpf_link update support more generic by making it into another bpf_link_ops methods. This allows generic syscall handling code to be agnostic to various conditionally compiled features (e.g., the case of CONFIG_CGROUP_BPF). This also allows to keep link type-specific code to remain static within respective code base. Refactor existing bpf_cgroup_link code and take advantage of this. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-2-andriin@fb.com --- include/linux/bpf-cgroup.h | 12 ------------ include/linux/bpf.h | 3 ++- kernel/bpf/cgroup.c | 30 ++++++++++++++++++++++++++++-- kernel/bpf/syscall.c | 11 ++++------- kernel/cgroup/cgroup.c | 27 --------------------------- 5 files changed, 34 insertions(+), 49 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 0b41fd5fc96b..a9cb9a5bf8e9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -100,8 +100,6 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, struct bpf_cgroup_link *link, enum bpf_attach_type type); -int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link, - struct bpf_prog *new_prog); int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -112,8 +110,6 @@ int cgroup_bpf_attach(struct cgroup *cgrp, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type); -int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog, - struct bpf_prog *new_prog); int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, union bpf_attr __user *uattr); @@ -353,7 +349,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr, #else struct bpf_prog; -struct bpf_link; struct cgroup_bpf {}; static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } static inline void cgroup_bpf_offline(struct cgroup *cgrp) {} @@ -377,13 +372,6 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr, return -EINVAL; } -static inline int cgroup_bpf_replace(struct bpf_link *link, - struct bpf_prog *old_prog, - struct bpf_prog *new_prog) -{ - return -EINVAL; -} - static inline int cgroup_bpf_prog_query(const union bpf_attr *attr, union bpf_attr __user *uattr) { diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 10960cfabea4..81c8620cb4c4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1093,7 +1093,8 @@ struct bpf_link { struct bpf_link_ops { void (*release)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); - + int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog); }; void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index bf634959885c..da6e48e802b2 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -557,8 +557,9 @@ found: * * Must be called with cgroup_mutex held. */ -int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link, - struct bpf_prog *new_prog) +static int __cgroup_bpf_replace(struct cgroup *cgrp, + struct bpf_cgroup_link *link, + struct bpf_prog *new_prog) { struct list_head *progs = &cgrp->bpf.progs[link->type]; struct bpf_prog *old_prog; @@ -583,6 +584,30 @@ int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link, return 0; } +static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog, + struct bpf_prog *old_prog) +{ + struct bpf_cgroup_link *cg_link; + int ret; + + cg_link = container_of(link, struct bpf_cgroup_link, link); + + mutex_lock(&cgroup_mutex); + /* link might have been auto-released by dying cgroup, so fail */ + if (!cg_link->cgroup) { + ret = -EINVAL; + goto out_unlock; + } + if (old_prog && link->prog != old_prog) { + ret = -EPERM; + goto out_unlock; + } + ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog); +out_unlock: + mutex_unlock(&cgroup_mutex); + return ret; +} + static struct bpf_prog_list *find_detach_entry(struct list_head *progs, struct bpf_prog *prog, struct bpf_cgroup_link *link, @@ -811,6 +836,7 @@ static void bpf_cgroup_link_dealloc(struct bpf_link *link) const struct bpf_link_ops bpf_cgroup_link_lops = { .release = bpf_cgroup_link_release, .dealloc = bpf_cgroup_link_dealloc, + .update_prog = cgroup_bpf_replace, }; int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 7626b8024471..f5358e1462eb 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3645,13 +3645,10 @@ static int link_update(union bpf_attr *attr) goto out_put_progs; } -#ifdef CONFIG_CGROUP_BPF - if (link->ops == &bpf_cgroup_link_lops) { - ret = cgroup_bpf_replace(link, old_prog, new_prog); - goto out_put_progs; - } -#endif - ret = -EINVAL; + if (link->ops->update_prog) + ret = link->ops->update_prog(link, new_prog, old_prog); + else + ret = EINVAL; out_put_progs: if (old_prog) diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 06b5ea9d899d..557a9b9d2244 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -6508,33 +6508,6 @@ int cgroup_bpf_attach(struct cgroup *cgrp, return ret; } -int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog, - struct bpf_prog *new_prog) -{ - struct bpf_cgroup_link *cg_link; - int ret; - - if (link->ops != &bpf_cgroup_link_lops) - return -EINVAL; - - cg_link = container_of(link, struct bpf_cgroup_link, link); - - mutex_lock(&cgroup_mutex); - /* link might have been auto-released by dying cgroup, so fail */ - if (!cg_link->cgroup) { - ret = -EINVAL; - goto out_unlock; - } - if (old_prog && link->prog != old_prog) { - ret = -EPERM; - goto out_unlock; - } - ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog); -out_unlock: - mutex_unlock(&cgroup_mutex); - return ret; -} - int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type) { -- cgit v1.2.3 From a3b80e1078943dc12553166fb08e258463dec013 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:06 -0700 Subject: bpf: Allocate ID for bpf_link Generate ID for each bpf_link using IDR, similarly to bpf_map and bpf_prog. bpf_link creation, initialization, attachment, and exposing to user-space through FD and ID is a complicated multi-step process, abstract it away through bpf_link_primer and bpf_link_prime(), bpf_link_settle(), and bpf_link_cleanup() internal API. They guarantee that until bpf_link is properly attached, user-space won't be able to access partially-initialized bpf_link either from FD or ID. All this allows to simplify bpf_link attachment and error handling code. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-3-andriin@fb.com --- include/linux/bpf.h | 17 ++++-- include/uapi/linux/bpf.h | 1 + kernel/bpf/cgroup.c | 14 ++--- kernel/bpf/syscall.c | 143 ++++++++++++++++++++++++++++++++--------------- 4 files changed, 118 insertions(+), 57 deletions(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 81c8620cb4c4..875d1f0af803 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1085,11 +1085,19 @@ int bpf_prog_new_fd(struct bpf_prog *prog); struct bpf_link { atomic64_t refcnt; + u32 id; const struct bpf_link_ops *ops; struct bpf_prog *prog; struct work_struct work; }; +struct bpf_link_primer { + struct bpf_link *link; + struct file *file; + int fd; + u32 id; +}; + struct bpf_link_ops { void (*release)(struct bpf_link *link); void (*dealloc)(struct bpf_link *link); @@ -1097,10 +1105,11 @@ struct bpf_link_ops { struct bpf_prog *old_prog); }; -void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, - struct bpf_prog *prog); -void bpf_link_cleanup(struct bpf_link *link, struct file *link_file, - int link_fd); +void bpf_link_init(struct bpf_link *link, + const struct bpf_link_ops *ops, struct bpf_prog *prog); +int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); +int bpf_link_settle(struct bpf_link_primer *primer); +void bpf_link_cleanup(struct bpf_link_primer *primer); void bpf_link_inc(struct bpf_link *link); void bpf_link_put(struct bpf_link *link); int bpf_link_new_fd(struct bpf_link *link); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 4a6c47f3febe..6121aa487465 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -523,6 +523,7 @@ union bpf_attr { __u32 prog_id; __u32 map_id; __u32 btf_id; + __u32 link_id; }; __u32 next_id; __u32 open_flags; diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index da6e48e802b2..1bdf37fca879 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -841,10 +841,10 @@ const struct bpf_link_ops bpf_cgroup_link_lops = { int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) { + struct bpf_link_primer link_primer; struct bpf_cgroup_link *link; - struct file *link_file; struct cgroup *cgrp; - int err, link_fd; + int err; if (attr->link_create.flags) return -EINVAL; @@ -862,22 +862,20 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) link->cgroup = cgrp; link->type = attr->link_create.attach_type; - link_file = bpf_link_new_file(&link->link, &link_fd); - if (IS_ERR(link_file)) { + err = bpf_link_prime(&link->link, &link_primer); + if (err) { kfree(link); - err = PTR_ERR(link_file); goto out_put_cgroup; } err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type, BPF_F_ALLOW_MULTI); if (err) { - bpf_link_cleanup(&link->link, link_file, link_fd); + bpf_link_cleanup(&link_primer); goto out_put_cgroup; } - fd_install(link_fd, link_file); - return link_fd; + return bpf_link_settle(&link_primer); out_put_cgroup: cgroup_put(cgrp); diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index f5358e1462eb..5439e05e3d25 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -42,6 +42,8 @@ static DEFINE_IDR(prog_idr); static DEFINE_SPINLOCK(prog_idr_lock); static DEFINE_IDR(map_idr); static DEFINE_SPINLOCK(map_idr_lock); +static DEFINE_IDR(link_idr); +static DEFINE_SPINLOCK(link_idr_lock); int sysctl_unprivileged_bpf_disabled __read_mostly; @@ -2181,25 +2183,38 @@ static int bpf_obj_get(const union bpf_attr *attr) attr->file_flags); } -void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops, - struct bpf_prog *prog) +void bpf_link_init(struct bpf_link *link, + const struct bpf_link_ops *ops, struct bpf_prog *prog) { atomic64_set(&link->refcnt, 1); + link->id = 0; link->ops = ops; link->prog = prog; } +static void bpf_link_free_id(int id) +{ + if (!id) + return; + + spin_lock_bh(&link_idr_lock); + idr_remove(&link_idr, id); + spin_unlock_bh(&link_idr_lock); +} + /* Clean up bpf_link and corresponding anon_inode file and FD. After * anon_inode is created, bpf_link can't be just kfree()'d due to deferred - * anon_inode's release() call. This helper manages marking bpf_link as - * defunct, releases anon_inode file and puts reserved FD. + * anon_inode's release() call. This helper marksbpf_link as + * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt + * is not decremented, it's the responsibility of a calling code that failed + * to complete bpf_link initialization. */ -void bpf_link_cleanup(struct bpf_link *link, struct file *link_file, - int link_fd) +void bpf_link_cleanup(struct bpf_link_primer *primer) { - link->prog = NULL; - fput(link_file); - put_unused_fd(link_fd); + primer->link->prog = NULL; + bpf_link_free_id(primer->id); + fput(primer->file); + put_unused_fd(primer->fd); } void bpf_link_inc(struct bpf_link *link) @@ -2210,6 +2225,7 @@ void bpf_link_inc(struct bpf_link *link) /* bpf_link_free is guaranteed to be called from process context */ static void bpf_link_free(struct bpf_link *link) { + bpf_link_free_id(link->id); if (link->prog) { /* detach BPF program, clean up used resources */ link->ops->release(link); @@ -2275,9 +2291,11 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, "link_type:\t%s\n" + "link_id:\t%u\n" "prog_tag:\t%s\n" "prog_id:\t%u\n", link_type, + link->id, prog_tag, prog->aux->id); } @@ -2292,36 +2310,76 @@ static const struct file_operations bpf_link_fops = { .write = bpf_dummy_write, }; -int bpf_link_new_fd(struct bpf_link *link) +static int bpf_link_alloc_id(struct bpf_link *link) { - return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); -} + int id; + + idr_preload(GFP_KERNEL); + spin_lock_bh(&link_idr_lock); + id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC); + spin_unlock_bh(&link_idr_lock); + idr_preload_end(); -/* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but - * instead of immediately installing fd in fdtable, just reserve it and - * return. Caller then need to either install it with fd_install(fd, file) or - * release with put_unused_fd(fd). - * This is useful for cases when bpf_link attachment/detachment are - * complicated and expensive operations and should be delayed until all the fd - * reservation and anon_inode creation succeeds. + return id; +} + +/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file, + * reserving unused FD and allocating ID from link_idr. This is to be paired + * with bpf_link_settle() to install FD and ID and expose bpf_link to + * user-space, if bpf_link is successfully attached. If not, bpf_link and + * pre-allocated resources are to be freed with bpf_cleanup() call. All the + * transient state is passed around in struct bpf_link_primer. + * This is preferred way to create and initialize bpf_link, especially when + * there are complicated and expensive operations inbetween creating bpf_link + * itself and attaching it to BPF hook. By using bpf_link_prime() and + * bpf_link_settle() kernel code using bpf_link doesn't have to perform + * expensive (and potentially failing) roll back operations in a rare case + * that file, FD, or ID can't be allocated. */ -struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd) +int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer) { struct file *file; - int fd; + int fd, id; fd = get_unused_fd_flags(O_CLOEXEC); if (fd < 0) - return ERR_PTR(fd); + return fd; file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC); if (IS_ERR(file)) { put_unused_fd(fd); - return file; + return PTR_ERR(file); } - *reserved_fd = fd; - return file; + id = bpf_link_alloc_id(link); + if (id < 0) { + put_unused_fd(fd); + fput(file); + return id; + } + + primer->link = link; + primer->file = file; + primer->fd = fd; + primer->id = id; + return 0; +} + +int bpf_link_settle(struct bpf_link_primer *primer) +{ + /* make bpf_link fetchable by ID */ + spin_lock_bh(&link_idr_lock); + primer->link->id = primer->id; + spin_unlock_bh(&link_idr_lock); + /* make bpf_link fetchable by FD */ + fd_install(primer->fd, primer->file); + /* pass through installed FD */ + return primer->fd; +} + +int bpf_link_new_fd(struct bpf_link *link) +{ + return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC); } struct bpf_link *bpf_link_get_from_fd(u32 ufd) @@ -2367,9 +2425,9 @@ static const struct bpf_link_ops bpf_tracing_link_lops = { static int bpf_tracing_prog_attach(struct bpf_prog *prog) { + struct bpf_link_primer link_primer; struct bpf_tracing_link *link; - struct file *link_file; - int link_fd, err; + int err; switch (prog->type) { case BPF_PROG_TYPE_TRACING: @@ -2404,22 +2462,19 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) } bpf_link_init(&link->link, &bpf_tracing_link_lops, prog); - link_file = bpf_link_new_file(&link->link, &link_fd); - if (IS_ERR(link_file)) { + err = bpf_link_prime(&link->link, &link_primer); + if (err) { kfree(link); - err = PTR_ERR(link_file); goto out_put_prog; } err = bpf_trampoline_link_prog(prog); if (err) { - bpf_link_cleanup(&link->link, link_file, link_fd); + bpf_link_cleanup(&link_primer); goto out_put_prog; } - fd_install(link_fd, link_file); - return link_fd; - + return bpf_link_settle(&link_primer); out_put_prog: bpf_prog_put(prog); return err; @@ -2447,7 +2502,7 @@ static void bpf_raw_tp_link_dealloc(struct bpf_link *link) kfree(raw_tp); } -static const struct bpf_link_ops bpf_raw_tp_lops = { +static const struct bpf_link_ops bpf_raw_tp_link_lops = { .release = bpf_raw_tp_link_release, .dealloc = bpf_raw_tp_link_dealloc, }; @@ -2456,13 +2511,13 @@ static const struct bpf_link_ops bpf_raw_tp_lops = { static int bpf_raw_tracepoint_open(const union bpf_attr *attr) { + struct bpf_link_primer link_primer; struct bpf_raw_tp_link *link; struct bpf_raw_event_map *btp; - struct file *link_file; struct bpf_prog *prog; const char *tp_name; char buf[128]; - int link_fd, err; + int err; if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN)) return -EINVAL; @@ -2515,24 +2570,22 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) err = -ENOMEM; goto out_put_btp; } - bpf_link_init(&link->link, &bpf_raw_tp_lops, prog); + bpf_link_init(&link->link, &bpf_raw_tp_link_lops, prog); link->btp = btp; - link_file = bpf_link_new_file(&link->link, &link_fd); - if (IS_ERR(link_file)) { + err = bpf_link_prime(&link->link, &link_primer); + if (err) { kfree(link); - err = PTR_ERR(link_file); goto out_put_btp; } err = bpf_probe_register(link->btp, prog); if (err) { - bpf_link_cleanup(&link->link, link_file, link_fd); + bpf_link_cleanup(&link_primer); goto out_put_btp; } - fd_install(link_fd, link_file); - return link_fd; + return bpf_link_settle(&link_primer); out_put_btp: bpf_put_raw_tracepoint(btp); @@ -3464,7 +3517,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr, if (file->f_op == &bpf_link_fops) { struct bpf_link *link = file->private_data; - if (link->ops == &bpf_raw_tp_lops) { + if (link->ops == &bpf_raw_tp_link_lops) { struct bpf_raw_tp_link *raw_tp = container_of(link, struct bpf_raw_tp_link, link); struct bpf_raw_event_map *btp = raw_tp->btp; -- cgit v1.2.3 From 2d602c8cf40d65d4a7ac34fe18648d8778e6e594 Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:07 -0700 Subject: bpf: Support GET_FD_BY_ID and GET_NEXT_ID for bpf_link Add support to look up bpf_link by ID and iterate over all existing bpf_links in the system. GET_FD_BY_ID code handles not-yet-ready bpf_link by checking that its ID hasn't been set to non-zero value yet. Setting bpf_link's ID is done as the very last step in finalizing bpf_link, together with installing FD. This approach allows users of bpf_link in kernel code to not worry about races between user-space and kernel code that hasn't finished attaching and initializing bpf_link. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-4-andriin@fb.com --- include/uapi/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 49 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 6121aa487465..7e6541fceade 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -113,6 +113,8 @@ enum bpf_cmd { BPF_MAP_DELETE_BATCH, BPF_LINK_CREATE, BPF_LINK_UPDATE, + BPF_LINK_GET_FD_BY_ID, + BPF_LINK_GET_NEXT_ID, }; enum bpf_map_type { diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 5439e05e3d25..1c213a730502 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3713,6 +3713,48 @@ out_put_link: return ret; } +static int bpf_link_inc_not_zero(struct bpf_link *link) +{ + return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? 0 : -ENOENT; +} + +#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id + +static int bpf_link_get_fd_by_id(const union bpf_attr *attr) +{ + struct bpf_link *link; + u32 id = attr->link_id; + int fd, err; + + if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + spin_lock_bh(&link_idr_lock); + link = idr_find(&link_idr, id); + /* before link is "settled", ID is 0, pretend it doesn't exist yet */ + if (link) { + if (link->id) + err = bpf_link_inc_not_zero(link); + else + err = -EAGAIN; + } else { + err = -ENOENT; + } + spin_unlock_bh(&link_idr_lock); + + if (err) + return err; + + fd = bpf_link_new_fd(link); + if (fd < 0) + bpf_link_put(link); + + return fd; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr; @@ -3830,6 +3872,13 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz case BPF_LINK_UPDATE: err = link_update(&attr); break; + case BPF_LINK_GET_FD_BY_ID: + err = bpf_link_get_fd_by_id(&attr); + break; + case BPF_LINK_GET_NEXT_ID: + err = bpf_obj_get_next_id(&attr, uattr, + &link_idr, &link_idr_lock); + break; default: err = -EINVAL; break; -- cgit v1.2.3 From f2e10bff16a0fdd41ba278c84da9813700e356af Mon Sep 17 00:00:00 2001 From: Andrii Nakryiko Date: Tue, 28 Apr 2020 17:16:08 -0700 Subject: bpf: Add support for BPF_OBJ_GET_INFO_BY_FD for bpf_link Add ability to fetch bpf_link details through BPF_OBJ_GET_INFO_BY_FD command. Also enhance show_fdinfo to potentially include bpf_link type-specific information (similarly to obj_info). Also introduce enum bpf_link_type stored in bpf_link itself and expose it in UAPI. bpf_link_tracing also now will store and return bpf_attach_type. Signed-off-by: Andrii Nakryiko Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200429001614.1544-5-andriin@fb.com --- include/linux/bpf-cgroup.h | 2 - include/linux/bpf.h | 8 ++- include/linux/bpf_types.h | 6 ++ include/uapi/linux/bpf.h | 28 ++++++++ kernel/bpf/btf.c | 2 + kernel/bpf/cgroup.c | 43 +++++++++++- kernel/bpf/syscall.c | 155 ++++++++++++++++++++++++++++++++++++----- kernel/bpf/verifier.c | 2 + tools/include/uapi/linux/bpf.h | 31 +++++++++ 9 files changed, 253 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index a9cb9a5bf8e9..272626cc3fc9 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -57,8 +57,6 @@ struct bpf_cgroup_link { enum bpf_attach_type type; }; -extern const struct bpf_link_ops bpf_cgroup_link_lops; - struct bpf_prog_list { struct list_head node; struct bpf_prog *prog; diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 875d1f0af803..c07b1d2f3824 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1026,9 +1026,11 @@ extern const struct file_operations bpf_prog_fops; extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ extern const struct bpf_map_ops _ops; +#define BPF_LINK_TYPE(_id, _name) #include #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +#undef BPF_LINK_TYPE extern const struct bpf_prog_ops bpf_offload_prog_ops; extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; @@ -1086,6 +1088,7 @@ int bpf_prog_new_fd(struct bpf_prog *prog); struct bpf_link { atomic64_t refcnt; u32 id; + enum bpf_link_type type; const struct bpf_link_ops *ops; struct bpf_prog *prog; struct work_struct work; @@ -1103,9 +1106,12 @@ struct bpf_link_ops { void (*dealloc)(struct bpf_link *link); int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog, struct bpf_prog *old_prog); + void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq); + int (*fill_link_info)(const struct bpf_link *link, + struct bpf_link_info *info); }; -void bpf_link_init(struct bpf_link *link, +void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog); int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer); int bpf_link_settle(struct bpf_link_primer *primer); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index ba0c2d56f8a3..8345cdf553b8 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -118,3 +118,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops) #if defined(CONFIG_BPF_JIT) BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops) #endif + +BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint) +BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing) +#ifdef CONFIG_CGROUP_BPF +BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup) +#endif diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 7e6541fceade..0eccafae55bb 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -222,6 +222,15 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +enum bpf_link_type { + BPF_LINK_TYPE_UNSPEC = 0, + BPF_LINK_TYPE_RAW_TRACEPOINT = 1, + BPF_LINK_TYPE_TRACING = 2, + BPF_LINK_TYPE_CGROUP = 3, + + MAX_BPF_LINK_TYPE, +}; + /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -3612,6 +3621,25 @@ struct bpf_btf_info { __u32 id; } __attribute__((aligned(8))); +struct bpf_link_info { + __u32 type; + __u32 id; + __u32 prog_id; + union { + struct { + __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ + __u32 tp_name_len; /* in/out: tp_name buffer len */ + } raw_tracepoint; + struct { + __u32 attach_type; + } tracing; + struct { + __u64 cgroup_id; + __u32 attach_type; + } cgroup; + }; +} __attribute__((aligned(8))); + /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on * attach attach type). diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index d65c6912bdaf..a2cfba89a8e1 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -3482,6 +3482,7 @@ extern char __weak __stop_BTF[]; extern struct btf *btf_vmlinux; #define BPF_MAP_TYPE(_id, _ops) +#define BPF_LINK_TYPE(_id, _name) static union { struct bpf_ctx_convert { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ @@ -3508,6 +3509,7 @@ static u8 bpf_ctx_convert_map[] = { 0, /* avoid empty array */ }; #undef BPF_MAP_TYPE +#undef BPF_LINK_TYPE static const struct btf_member * btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf, diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c index 1bdf37fca879..5c0e964105ac 100644 --- a/kernel/bpf/cgroup.c +++ b/kernel/bpf/cgroup.c @@ -833,10 +833,48 @@ static void bpf_cgroup_link_dealloc(struct bpf_link *link) kfree(cg_link); } -const struct bpf_link_ops bpf_cgroup_link_lops = { +static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_cgroup_link *cg_link = + container_of(link, struct bpf_cgroup_link, link); + u64 cg_id = 0; + + mutex_lock(&cgroup_mutex); + if (cg_link->cgroup) + cg_id = cgroup_id(cg_link->cgroup); + mutex_unlock(&cgroup_mutex); + + seq_printf(seq, + "cgroup_id:\t%llu\n" + "attach_type:\t%d\n", + cg_id, + cg_link->type); +} + +static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_cgroup_link *cg_link = + container_of(link, struct bpf_cgroup_link, link); + u64 cg_id = 0; + + mutex_lock(&cgroup_mutex); + if (cg_link->cgroup) + cg_id = cgroup_id(cg_link->cgroup); + mutex_unlock(&cgroup_mutex); + + info->cgroup.cgroup_id = cg_id; + info->cgroup.attach_type = cg_link->type; + return 0; +} + +static const struct bpf_link_ops bpf_cgroup_link_lops = { .release = bpf_cgroup_link_release, .dealloc = bpf_cgroup_link_dealloc, .update_prog = cgroup_bpf_replace, + .show_fdinfo = bpf_cgroup_link_show_fdinfo, + .fill_link_info = bpf_cgroup_link_fill_link_info, }; int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) @@ -858,7 +896,8 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog) err = -ENOMEM; goto out_put_cgroup; } - bpf_link_init(&link->link, &bpf_cgroup_link_lops, prog); + bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops, + prog); link->cgroup = cgrp; link->type = attr->link_create.attach_type; diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 1c213a730502..d23c04cbe14f 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -51,9 +51,11 @@ static const struct bpf_map_ops * const bpf_map_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) #define BPF_MAP_TYPE(_id, _ops) \ [_id] = &_ops, +#define BPF_LINK_TYPE(_id, _name) #include #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +#undef BPF_LINK_TYPE }; /* @@ -1548,9 +1550,11 @@ static const struct bpf_prog_ops * const bpf_prog_types[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ [_id] = & _name ## _prog_ops, #define BPF_MAP_TYPE(_id, _ops) +#define BPF_LINK_TYPE(_id, _name) #include #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +#undef BPF_LINK_TYPE }; static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog) @@ -2183,10 +2187,11 @@ static int bpf_obj_get(const union bpf_attr *attr) attr->file_flags); } -void bpf_link_init(struct bpf_link *link, +void bpf_link_init(struct bpf_link *link, enum bpf_link_type type, const struct bpf_link_ops *ops, struct bpf_prog *prog) { atomic64_set(&link->refcnt, 1); + link->type = type; link->id = 0; link->ops = ops; link->prog = prog; @@ -2266,27 +2271,23 @@ static int bpf_link_release(struct inode *inode, struct file *filp) return 0; } -#ifdef CONFIG_PROC_FS -static const struct bpf_link_ops bpf_raw_tp_lops; -static const struct bpf_link_ops bpf_tracing_link_lops; +#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) +#define BPF_MAP_TYPE(_id, _ops) +#define BPF_LINK_TYPE(_id, _name) [_id] = #_name, +static const char *bpf_link_type_strs[] = { + [BPF_LINK_TYPE_UNSPEC] = "", +#include +}; +#undef BPF_PROG_TYPE +#undef BPF_MAP_TYPE +#undef BPF_LINK_TYPE +#ifdef CONFIG_PROC_FS static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) { const struct bpf_link *link = filp->private_data; const struct bpf_prog *prog = link->prog; char prog_tag[sizeof(prog->tag) * 2 + 1] = { }; - const char *link_type; - - if (link->ops == &bpf_raw_tp_lops) - link_type = "raw_tracepoint"; - else if (link->ops == &bpf_tracing_link_lops) - link_type = "tracing"; -#ifdef CONFIG_CGROUP_BPF - else if (link->ops == &bpf_cgroup_link_lops) - link_type = "cgroup"; -#endif - else - link_type = "unknown"; bin2hex(prog_tag, prog->tag, sizeof(prog->tag)); seq_printf(m, @@ -2294,10 +2295,12 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp) "link_id:\t%u\n" "prog_tag:\t%s\n" "prog_id:\t%u\n", - link_type, + bpf_link_type_strs[link->type], link->id, prog_tag, prog->aux->id); + if (link->ops->show_fdinfo) + link->ops->show_fdinfo(link, m); } #endif @@ -2403,6 +2406,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd) struct bpf_tracing_link { struct bpf_link link; + enum bpf_attach_type attach_type; }; static void bpf_tracing_link_release(struct bpf_link *link) @@ -2418,9 +2422,33 @@ static void bpf_tracing_link_dealloc(struct bpf_link *link) kfree(tr_link); } +static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_tracing_link *tr_link = + container_of(link, struct bpf_tracing_link, link); + + seq_printf(seq, + "attach_type:\t%d\n", + tr_link->attach_type); +} + +static int bpf_tracing_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_tracing_link *tr_link = + container_of(link, struct bpf_tracing_link, link); + + info->tracing.attach_type = tr_link->attach_type; + + return 0; +} + static const struct bpf_link_ops bpf_tracing_link_lops = { .release = bpf_tracing_link_release, .dealloc = bpf_tracing_link_dealloc, + .show_fdinfo = bpf_tracing_link_show_fdinfo, + .fill_link_info = bpf_tracing_link_fill_link_info, }; static int bpf_tracing_prog_attach(struct bpf_prog *prog) @@ -2460,7 +2488,9 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog) err = -ENOMEM; goto out_put_prog; } - bpf_link_init(&link->link, &bpf_tracing_link_lops, prog); + bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING, + &bpf_tracing_link_lops, prog); + link->attach_type = prog->expected_attach_type; err = bpf_link_prime(&link->link, &link_primer); if (err) { @@ -2502,9 +2532,56 @@ static void bpf_raw_tp_link_dealloc(struct bpf_link *link) kfree(raw_tp); } +static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link, + struct seq_file *seq) +{ + struct bpf_raw_tp_link *raw_tp_link = + container_of(link, struct bpf_raw_tp_link, link); + + seq_printf(seq, + "tp_name:\t%s\n", + raw_tp_link->btp->tp->name); +} + +static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link, + struct bpf_link_info *info) +{ + struct bpf_raw_tp_link *raw_tp_link = + container_of(link, struct bpf_raw_tp_link, link); + char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name); + const char *tp_name = raw_tp_link->btp->tp->name; + u32 ulen = info->raw_tracepoint.tp_name_len; + size_t tp_len = strlen(tp_name); + + if (ulen && !ubuf) + return -EINVAL; + + info->raw_tracepoint.tp_name_len = tp_len + 1; + + if (!ubuf) + return 0; + + if (ulen >= tp_len + 1) { + if (copy_to_user(ubuf, tp_name, tp_len + 1)) + return -EFAULT; + } else { + char zero = '\0'; + + if (copy_to_user(ubuf, tp_name, ulen - 1)) + return -EFAULT; + if (put_user(zero, ubuf + ulen - 1)) + return -EFAULT; + return -ENOSPC; + } + + return 0; +} + static const struct bpf_link_ops bpf_raw_tp_link_lops = { .release = bpf_raw_tp_link_release, .dealloc = bpf_raw_tp_link_dealloc, + .show_fdinfo = bpf_raw_tp_link_show_fdinfo, + .fill_link_info = bpf_raw_tp_link_fill_link_info, }; #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd @@ -2570,7 +2647,8 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr) err = -ENOMEM; goto out_put_btp; } - bpf_link_init(&link->link, &bpf_raw_tp_link_lops, prog); + bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT, + &bpf_raw_tp_link_lops, prog); link->btp = btp; err = bpf_link_prime(&link->link, &link_primer); @@ -3366,6 +3444,42 @@ static int bpf_btf_get_info_by_fd(struct btf *btf, return btf_get_info_by_fd(btf, attr, uattr); } +static int bpf_link_get_info_by_fd(struct bpf_link *link, + const union bpf_attr *attr, + union bpf_attr __user *uattr) +{ + struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info); + struct bpf_link_info info; + u32 info_len = attr->info.info_len; + int err; + + err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len); + if (err) + return err; + info_len = min_t(u32, sizeof(info), info_len); + + memset(&info, 0, sizeof(info)); + if (copy_from_user(&info, uinfo, info_len)) + return -EFAULT; + + info.type = link->type; + info.id = link->id; + info.prog_id = link->prog->aux->id; + + if (link->ops->fill_link_info) { + err = link->ops->fill_link_info(link, &info); + if (err) + return err; + } + + if (copy_to_user(uinfo, &info, info_len) || + put_user(info_len, &uattr->info.info_len)) + return -EFAULT; + + return 0; +} + + #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, @@ -3390,6 +3504,9 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr, uattr); else if (f.file->f_op == &btf_fops) err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr); + else if (f.file->f_op == &bpf_link_fops) + err = bpf_link_get_info_by_fd(f.file->private_data, + attr, uattr); else err = -EINVAL; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index 91728e0f27eb..2b337e32aa94 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -28,9 +28,11 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = { #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \ [_id] = & _name ## _verifier_ops, #define BPF_MAP_TYPE(_id, _ops) +#define BPF_LINK_TYPE(_id, _name) #include #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +#undef BPF_LINK_TYPE }; /* bpf_check() is a static code analyzer that walks eBPF program diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 4a6c47f3febe..0eccafae55bb 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -113,6 +113,8 @@ enum bpf_cmd { BPF_MAP_DELETE_BATCH, BPF_LINK_CREATE, BPF_LINK_UPDATE, + BPF_LINK_GET_FD_BY_ID, + BPF_LINK_GET_NEXT_ID, }; enum bpf_map_type { @@ -220,6 +222,15 @@ enum bpf_attach_type { #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE +enum bpf_link_type { + BPF_LINK_TYPE_UNSPEC = 0, + BPF_LINK_TYPE_RAW_TRACEPOINT = 1, + BPF_LINK_TYPE_TRACING = 2, + BPF_LINK_TYPE_CGROUP = 3, + + MAX_BPF_LINK_TYPE, +}; + /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command * * NONE(default): No further bpf programs allowed in the subtree. @@ -523,6 +534,7 @@ union bpf_attr { __u32 prog_id; __u32 map_id; __u32 btf_id; + __u32 link_id; }; __u32 next_id; __u32 open_flags; @@ -3609,6 +3621,25 @@ struct bpf_btf_info { __u32 id; } __attribute__((aligned(8))); +struct bpf_link_info { + __u32 type; + __u32 id; + __u32 prog_id; + union { + struct { + __aligned_u64 tp_name; /* in/out: tp_name buffer ptr */ + __u32 tp_name_len; /* in/out: tp_name buffer len */ + } raw_tracepoint; + struct { + __u32 attach_type; + } tracing; + struct { + __u64 cgroup_id; + __u32 attach_type; + } cgroup; + }; +} __attribute__((aligned(8))); + /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed * by user and intended to be used by socket (e.g. to bind to, depends on * attach attach type). -- cgit v1.2.3 From d46edd671a147032e22cfeb271a5734703093649 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Thu, 30 Apr 2020 00:15:04 -0700 Subject: bpf: Sharing bpf runtime stats with BPF_ENABLE_STATS Currently, sysctl kernel.bpf_stats_enabled controls BPF runtime stats. Typical userspace tools use kernel.bpf_stats_enabled as follows: 1. Enable kernel.bpf_stats_enabled; 2. Check program run_time_ns; 3. Sleep for the monitoring period; 4. Check program run_time_ns again, calculate the difference; 5. Disable kernel.bpf_stats_enabled. The problem with this approach is that only one userspace tool can toggle this sysctl. If multiple tools toggle the sysctl at the same time, the measurement may be inaccurate. To fix this problem while keep backward compatibility, introduce a new bpf command BPF_ENABLE_STATS. On success, this command enables stats and returns a valid fd. BPF_ENABLE_STATS takes argument "type". Currently, only one type, BPF_STATS_RUN_TIME, is supported. We can extend the command to support other types of stats in the future. With BPF_ENABLE_STATS, user space tool would have the following flow: 1. Get a fd with BPF_ENABLE_STATS, and make sure it is valid; 2. Check program run_time_ns; 3. Sleep for the monitoring period; 4. Check program run_time_ns again, calculate the difference; 5. Close the fd. Signed-off-by: Song Liu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/20200430071506.1408910-2-songliubraving@fb.com --- include/linux/bpf.h | 1 + include/uapi/linux/bpf.h | 11 ++++++++ kernel/bpf/syscall.c | 57 ++++++++++++++++++++++++++++++++++++++++++ kernel/sysctl.c | 36 +++++++++++++++++++++++++- tools/include/uapi/linux/bpf.h | 11 ++++++++ 5 files changed, 115 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c07b1d2f3824..1262ec460ab3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -987,6 +987,7 @@ _out: \ #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +extern struct mutex bpf_stats_enabled_mutex; /* * Block execution of BPF programs attached to instrumentation (perf, diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 0eccafae55bb..705e4822f997 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -115,6 +115,7 @@ enum bpf_cmd { BPF_LINK_UPDATE, BPF_LINK_GET_FD_BY_ID, BPF_LINK_GET_NEXT_ID, + BPF_ENABLE_STATS, }; enum bpf_map_type { @@ -390,6 +391,12 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* type for BPF_ENABLE_STATS */ +enum bpf_stats_type { + /* enabled run_time_ns and run_cnt */ + BPF_STATS_RUN_TIME = 0, +}; + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, @@ -601,6 +608,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { /* struct used by BPF_ENABLE_STATS command */ + __u32 type; + } enable_stats; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index c75b2dd2459c..4f34eecec9ce 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3872,6 +3872,60 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr) return fd; } +DEFINE_MUTEX(bpf_stats_enabled_mutex); + +static int bpf_stats_release(struct inode *inode, struct file *file) +{ + mutex_lock(&bpf_stats_enabled_mutex); + static_key_slow_dec(&bpf_stats_enabled_key.key); + mutex_unlock(&bpf_stats_enabled_mutex); + return 0; +} + +static const struct file_operations bpf_stats_fops = { + .release = bpf_stats_release, +}; + +static int bpf_enable_runtime_stats(void) +{ + int fd; + + mutex_lock(&bpf_stats_enabled_mutex); + + /* Set a very high limit to avoid overflow */ + if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) { + mutex_unlock(&bpf_stats_enabled_mutex); + return -EBUSY; + } + + fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC); + if (fd >= 0) + static_key_slow_inc(&bpf_stats_enabled_key.key); + + mutex_unlock(&bpf_stats_enabled_mutex); + return fd; +} + +#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type + +static int bpf_enable_stats(union bpf_attr *attr) +{ + + if (CHECK_ATTR(BPF_ENABLE_STATS)) + return -EINVAL; + + if (!capable(CAP_SYS_ADMIN)) + return -EPERM; + + switch (attr->enable_stats.type) { + case BPF_STATS_RUN_TIME: + return bpf_enable_runtime_stats(); + default: + break; + } + return -EINVAL; +} + SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size) { union bpf_attr attr; @@ -3996,6 +4050,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz err = bpf_obj_get_next_id(&attr, uattr, &link_idr, &link_idr_lock); break; + case BPF_ENABLE_STATS: + err = bpf_enable_stats(&attr); + break; default: err = -EINVAL; break; diff --git a/kernel/sysctl.c b/kernel/sysctl.c index e961286d0e14..7adfe5dbce9d 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -201,6 +201,40 @@ static int max_extfrag_threshold = 1000; #endif /* CONFIG_SYSCTL */ +#ifdef CONFIG_BPF_SYSCALL +static int bpf_stats_handler(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos) +{ + struct static_key *key = (struct static_key *)table->data; + static int saved_val; + int val, ret; + struct ctl_table tmp = { + .data = &val, + .maxlen = sizeof(val), + .mode = table->mode, + .extra1 = SYSCTL_ZERO, + .extra2 = SYSCTL_ONE, + }; + + if (write && !capable(CAP_SYS_ADMIN)) + return -EPERM; + + mutex_lock(&bpf_stats_enabled_mutex); + val = saved_val; + ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos); + if (write && !ret && val != saved_val) { + if (val) + static_key_slow_inc(key); + else + static_key_slow_dec(key); + saved_val = val; + } + mutex_unlock(&bpf_stats_enabled_mutex); + return ret; +} +#endif + /* * /proc/sys support */ @@ -2549,7 +2583,7 @@ static struct ctl_table kern_table[] = { .data = &bpf_stats_enabled_key.key, .maxlen = sizeof(bpf_stats_enabled_key), .mode = 0644, - .proc_handler = proc_do_static_key, + .proc_handler = bpf_stats_handler, }, #endif #if defined(CONFIG_TREE_RCU) diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 0eccafae55bb..705e4822f997 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -115,6 +115,7 @@ enum bpf_cmd { BPF_LINK_UPDATE, BPF_LINK_GET_FD_BY_ID, BPF_LINK_GET_NEXT_ID, + BPF_ENABLE_STATS, }; enum bpf_map_type { @@ -390,6 +391,12 @@ enum { */ #define BPF_F_QUERY_EFFECTIVE (1U << 0) +/* type for BPF_ENABLE_STATS */ +enum bpf_stats_type { + /* enabled run_time_ns and run_cnt */ + BPF_STATS_RUN_TIME = 0, +}; + enum bpf_stack_build_id_status { /* user space need an empty entry to identify end of a trace */ BPF_STACK_BUILD_ID_EMPTY = 0, @@ -601,6 +608,10 @@ union bpf_attr { __u32 old_prog_fd; } link_update; + struct { /* struct used by BPF_ENABLE_STATS command */ + __u32 type; + } enable_stats; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From beecf11bc2188067824591612151c4dc6ec383c7 Mon Sep 17 00:00:00 2001 From: Stanislav Fomichev Date: Thu, 30 Apr 2020 16:31:52 -0700 Subject: bpf: Bpf_{g,s}etsockopt for struct bpf_sock_addr Currently, bpf_getsockopt and bpf_setsockopt helpers operate on the 'struct bpf_sock_ops' context in BPF_PROG_TYPE_SOCK_OPS program. Let's generalize them and make them available for 'struct bpf_sock_addr'. That way, in the future, we can allow those helpers in more places. As an example, let's expose those 'struct bpf_sock_addr' based helpers to BPF_CGROUP_INET{4,6}_CONNECT hooks. That way we can override CC before the connection is made. v3: * Expose custom helpers for bpf_sock_addr context instead of doing generic bpf_sock argument (as suggested by Daniel). Even with try_socket_lock that doesn't sleep we have a problem where context sk is already locked and socket lock is non-nestable. v2: * s/BPF_PROG_TYPE_CGROUP_SOCKOPT/BPF_PROG_TYPE_SOCK_OPS/ Signed-off-by: Stanislav Fomichev Signed-off-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Acked-by: John Fastabend Link: https://lore.kernel.org/bpf/20200430233152.199403-1-sdf@google.com --- include/uapi/linux/bpf.h | 14 ++- net/core/filter.c | 118 +++++++++++++++++----- tools/include/uapi/linux/bpf.h | 14 ++- tools/testing/selftests/bpf/config | 1 + tools/testing/selftests/bpf/progs/connect4_prog.c | 46 +++++++++ 5 files changed, 166 insertions(+), 27 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 705e4822f997..b3643e27e264 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1587,7 +1587,7 @@ union bpf_attr { * Return * 0 * - * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) + * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1595,6 +1595,11 @@ union bpf_attr { * must be specified, see **setsockopt(2)** for more information. * The option value of length *optlen* is pointed by *optval*. * + * *bpf_socket* should be one of the following: + * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** + * and **BPF_CGROUP_INET6_CONNECT**. + * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: * @@ -1789,7 +1794,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) + * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1798,6 +1803,11 @@ union bpf_attr { * The retrieved value is stored in the structure pointed by * *opval* and of length *optlen*. * + * *bpf_socket* should be one of the following: + * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** + * and **BPF_CGROUP_INET6_CONNECT**. + * * This helper actually implements a subset of **getsockopt()**. * It supports the following *level*\ s: * diff --git a/net/core/filter.c b/net/core/filter.c index 70b32723e6be..dfaf5df13722 100644 --- a/net/core/filter.c +++ b/net/core/filter.c @@ -4194,16 +4194,19 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = { .arg1_type = ARG_PTR_TO_CTX, }; -BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, - int, level, int, optname, char *, optval, int, optlen) +#define SOCKOPT_CC_REINIT (1 << 0) + +static int _bpf_setsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen, u32 flags) { - struct sock *sk = bpf_sock->sk; int ret = 0; int val; if (!sk_fullsock(sk)) return -EINVAL; + sock_owned_by_me(sk); + if (level == SOL_SOCKET) { if (optlen != sizeof(int)) return -EINVAL; @@ -4298,7 +4301,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, sk->sk_prot->setsockopt == tcp_setsockopt) { if (optname == TCP_CONGESTION) { char name[TCP_CA_NAME_MAX]; - bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN; + bool reinit = flags & SOCKOPT_CC_REINIT; strncpy(name, optval, min_t(long, optlen, TCP_CA_NAME_MAX-1)); @@ -4345,24 +4348,14 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, return ret; } -static const struct bpf_func_proto bpf_setsockopt_proto = { - .func = bpf_setsockopt, - .gpl_only = false, - .ret_type = RET_INTEGER, - .arg1_type = ARG_PTR_TO_CTX, - .arg2_type = ARG_ANYTHING, - .arg3_type = ARG_ANYTHING, - .arg4_type = ARG_PTR_TO_MEM, - .arg5_type = ARG_CONST_SIZE, -}; - -BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, - int, level, int, optname, char *, optval, int, optlen) +static int _bpf_getsockopt(struct sock *sk, int level, int optname, + char *optval, int optlen) { - struct sock *sk = bpf_sock->sk; - if (!sk_fullsock(sk)) goto err_clear; + + sock_owned_by_me(sk); + #ifdef CONFIG_INET if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) { struct inet_connection_sock *icsk; @@ -4428,8 +4421,71 @@ err_clear: return -EINVAL; } -static const struct bpf_func_proto bpf_getsockopt_proto = { - .func = bpf_getsockopt, +BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx, + int, level, int, optname, char *, optval, int, optlen) +{ + u32 flags = 0; + return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen, + flags); +} + +static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = { + .func = bpf_sock_addr_setsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx, + int, level, int, optname, char *, optval, int, optlen) +{ + return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen); +} + +static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = { + .func = bpf_sock_addr_getsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_UNINIT_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock, + int, level, int, optname, char *, optval, int, optlen) +{ + u32 flags = 0; + if (bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN) + flags |= SOCKOPT_CC_REINIT; + return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen, + flags); +} + +static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = { + .func = bpf_sock_ops_setsockopt, + .gpl_only = false, + .ret_type = RET_INTEGER, + .arg1_type = ARG_PTR_TO_CTX, + .arg2_type = ARG_ANYTHING, + .arg3_type = ARG_ANYTHING, + .arg4_type = ARG_PTR_TO_MEM, + .arg5_type = ARG_CONST_SIZE, +}; + +BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock, + int, level, int, optname, char *, optval, int, optlen) +{ + return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen); +} + +static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = { + .func = bpf_sock_ops_getsockopt, .gpl_only = false, .ret_type = RET_INTEGER, .arg1_type = ARG_PTR_TO_CTX, @@ -6043,6 +6099,22 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) return &bpf_sk_storage_get_proto; case BPF_FUNC_sk_storage_delete: return &bpf_sk_storage_delete_proto; + case BPF_FUNC_setsockopt: + switch (prog->expected_attach_type) { + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + return &bpf_sock_addr_setsockopt_proto; + default: + return NULL; + } + case BPF_FUNC_getsockopt: + switch (prog->expected_attach_type) { + case BPF_CGROUP_INET4_CONNECT: + case BPF_CGROUP_INET6_CONNECT: + return &bpf_sock_addr_getsockopt_proto; + default: + return NULL; + } default: return bpf_base_func_proto(func_id); } @@ -6261,9 +6333,9 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog) { switch (func_id) { case BPF_FUNC_setsockopt: - return &bpf_setsockopt_proto; + return &bpf_sock_ops_setsockopt_proto; case BPF_FUNC_getsockopt: - return &bpf_getsockopt_proto; + return &bpf_sock_ops_getsockopt_proto; case BPF_FUNC_sock_ops_cb_flags_set: return &bpf_sock_ops_cb_flags_set_proto; case BPF_FUNC_sock_map_update: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 705e4822f997..b3643e27e264 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1587,7 +1587,7 @@ union bpf_attr { * Return * 0 * - * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) + * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **setsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1595,6 +1595,11 @@ union bpf_attr { * must be specified, see **setsockopt(2)** for more information. * The option value of length *optlen* is pointed by *optval*. * + * *bpf_socket* should be one of the following: + * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** + * and **BPF_CGROUP_INET6_CONNECT**. + * * This helper actually implements a subset of **setsockopt()**. * It supports the following *level*\ s: * @@ -1789,7 +1794,7 @@ union bpf_attr { * Return * 0 on success, or a negative error in case of failure. * - * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen) + * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen) * Description * Emulate a call to **getsockopt()** on the socket associated to * *bpf_socket*, which must be a full socket. The *level* at @@ -1798,6 +1803,11 @@ union bpf_attr { * The retrieved value is stored in the structure pointed by * *opval* and of length *optlen*. * + * *bpf_socket* should be one of the following: + * * **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**. + * * **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT** + * and **BPF_CGROUP_INET6_CONNECT**. + * * This helper actually implements a subset of **getsockopt()**. * It supports the following *level*\ s: * diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 60e3ae5d4e48..6e5b94c036ca 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -37,3 +37,4 @@ CONFIG_IPV6_SIT=m CONFIG_BPF_JIT=y CONFIG_BPF_LSM=y CONFIG_SECURITY=y +CONFIG_TCP_CONG_DCTCP=y diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c index ad3c498a8150..972918cd2d7f 100644 --- a/tools/testing/selftests/bpf/progs/connect4_prog.c +++ b/tools/testing/selftests/bpf/progs/connect4_prog.c @@ -8,6 +8,7 @@ #include #include #include +#include #include #include @@ -16,6 +17,10 @@ #define DST_REWRITE_IP4 0x7f000001U #define DST_REWRITE_PORT4 4444 +#ifndef TCP_CA_NAME_MAX +#define TCP_CA_NAME_MAX 16 +#endif + int _version SEC("version") = 1; __attribute__ ((noinline)) @@ -33,6 +38,43 @@ int do_bind(struct bpf_sock_addr *ctx) return 1; } +static __inline int verify_cc(struct bpf_sock_addr *ctx, + char expected[TCP_CA_NAME_MAX]) +{ + char buf[TCP_CA_NAME_MAX]; + int i; + + if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf))) + return 1; + + for (i = 0; i < TCP_CA_NAME_MAX; i++) { + if (buf[i] != expected[i]) + return 1; + if (buf[i] == 0) + break; + } + + return 0; +} + +static __inline int set_cc(struct bpf_sock_addr *ctx) +{ + char dctcp[TCP_CA_NAME_MAX] = "dctcp"; + char cubic[TCP_CA_NAME_MAX] = "cubic"; + + if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &dctcp, sizeof(dctcp))) + return 1; + if (verify_cc(ctx, dctcp)) + return 1; + + if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic))) + return 1; + if (verify_cc(ctx, cubic)) + return 1; + + return 0; +} + SEC("cgroup/connect4") int connect_v4_prog(struct bpf_sock_addr *ctx) { @@ -66,6 +108,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx) bpf_sk_release(sk); + /* Rewrite congestion control. */ + if (ctx->type == SOCK_STREAM && set_cc(ctx)) + return 0; + /* Rewrite destination. */ ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4); ctx->user_port = bpf_htons(DST_REWRITE_PORT4); -- cgit v1.2.3